├── .github └── workflows │ └── build.yml ├── .gitignore ├── LICENSE ├── README.md ├── examples ├── fetch_abstracts.py └── pubmedflow.ipynb ├── pubmedflow ├── __init__.py ├── pubmedflow.py └── utils.py └── setup.py /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Build 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: true 18 | matrix: 19 | python-version: ["3.8", "3.9"] 20 | os: [ubuntu-latest, macos-latest] 21 | 22 | steps: 23 | - uses: actions/checkout@v3 24 | - name: Set up Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v3 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | python -m pip install flake8 pytest 32 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 33 | - name: Lint with flake8 34 | run: | 35 | # stop the build if there are Python syntax errors or undefined names 36 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 37 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 38 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 39 | - name: Test with pytest 40 | run: | 41 | pip install --upgrade pip 42 | python setup.py install 43 | 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | .DS_Store 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 NFFLOW 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

PUBMED-FLOW

2 |

Open source data collection tool to fetch data from pubmed

3 |

Contribute and Support

4 | 5 | 6 | [![License:MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 7 | [![GitHub commit](https://img.shields.io/github/last-commit/nfflow/pubmedflow)](https://github.com/nfflow/pubmedflow/commits/main) 8 | [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](http://makeapullrequest.com) 9 | [![Open All Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1mjlnHAb7aqwfDEylo05z3RdIyyaNRoQ5?usp=sharing) 10 | 11 | 12 | ## 🎮 Features 13 | 14 | - fetch pubmed ids (pmids) based on keyword query (supports multiple keywords query) 15 | - Fetch Abstract of research papers from pubmed based on pmids 16 | - Download the full pdf of respective pmid -> if available on pubmedcentral (pmc) 17 | - if pdf not available on pmc -> download from scihub internally 18 | 19 | 20 | ## How to obtain ncbi key? 21 | 22 | - Follow this [tutorial](https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/#:~:text=To%20create%20the%20key%2C%20go,and%20copy%20the%20resulting%20key) 23 | 24 | ## Installation 25 | ### From pypi 26 | 27 | ``` 28 | pip install pubmedflow 29 | ``` 30 | 31 | ### From source 32 | ```python 33 | python setup.py install 34 | ``` 35 | OR 36 | ``` 37 | pip install git+https://github.com/nfflow/pubmedflow 38 | ``` 39 | 40 | ## How to use api? 41 | 42 | Arguments: 43 | Name | Input | Description 44 | ----------- | ----------- | ----------- 45 | folder_name | Optional, str | path to store output data 46 | 47 | 48 | ## Quick Start: 49 | 50 | ### Download pubmed articles as PDF and DataFrame - 51 | 52 | ```python 53 | 54 | import eutils 55 | from pubmedflow import LazyPubmed 56 | 57 | 58 | pb = LazyPubmed(title_query, 59 | folder_name='pubmed_data', 60 | api_key='', 61 | max_documents=None, 62 | download_pdf=True, 63 | scihub=False) 64 | 65 | ``` 66 | 67 | ### Perform unsupervised learning to make a pre-trained model from the collected data: 68 | 69 | ```python 70 | pb.pubmed_train(model_name='sentence-transformers/all-mpnet-base-v2', 71 | model_output_path='pubmedflow_model', 72 | model_architecture='ct') 73 | ``` 74 | 75 | ### Do question answering on the downloaded text to get answer spans from each article: 76 | 77 | ```python 78 | 79 | qa_results = pb.pubmed_qa(qa_query = 'What are the chronic diseases',) 80 | print(qa_results) 81 | ``` 82 | 83 | ### Summarise each of them 84 | 85 | ```python 86 | 87 | summ_results = pb.pubmed_summarise() 88 | print(summ_results) 89 | ``` 90 | 91 | ### Perform entity extraction on each of them 92 | 93 | ```python 94 | 95 | ents = pb.pubmed_entity_extraction() 96 | print(ents) 97 | ``` 98 | 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /examples/fetch_abstracts.py: -------------------------------------------------------------------------------- 1 | from pubmedflow import LazyPubmed 2 | pb = LazyPubmed() 3 | 4 | result = pb.fetch(query = "lncRNA", 5 | key = "your_api_key", 6 | max_documents = 5) -------------------------------------------------------------------------------- /examples/pubmedflow.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "id": "d7aa088f", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "from pubmedflow import LazyPubmed\n", 11 | "\n", 12 | "pb = LazyPubmed()\n", 13 | "df_result = pb.pubmed_search(query = 'Chronic',\n", 14 | " key = \"your_api_key\",\n", 15 | " max_documents = 10,\n", 16 | " download_pdf = True, \n", 17 | " scihub = False)" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "id": "e14c8487", 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "
\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | "
titleissuepagesabstractjournalauthorspubdatepmidmesh_termspublication_types...referencesdeleteaffiliationspmcother_idmedline_tanlm_unique_idissn_linkingcountrypdf_content
0Assessing the experience of person-centred coo...25(3)1069-1080BACKGROUND\\nCountries are adapting their healt...Health expectations : an international journal...Rijken|Mieke|M|https://orcid.org/0000-0001-607...202235318778D000328:Adult; D000368:Aged; D000369:Aged, 80 ...D016428:Journal Article...29444767;29166917;8870135;15804318;22778146;18...FalseNivel (Netherlands Institute for Health Servic...NaNNaNHealth Expect98159261369-6513EnglandReceived 11 August 2021 Revised 28 November 20...
1Association Between Systolic Blood Pressure Va...11(11)e025513Background Whether visit-to-visit systolic blo...Journal of the American Heart AssociationPark|Cheol Ho|CH|0000-0003-4636-5745;Kim|Hyung...202235656977D001794:Blood Pressure; D002318:Cardiovascular...D016428:Journal Article...NaNFalseDepartment of Internal Medicine College of Med...NaNNaNJ Am Heart Assoc1015805242047-9980EnglandNaN
2Comorbidity progression patterns of major chro...NaN17423953221087647OBJECTIVE\\nThe presence of one chronic disease...Chronic illnessUddin|Shahadat|S|https://orcid.org/0000-0003-0...202235306857NaND016428:Journal Article...NaNFalseFaculty of Engineering, 4334The University of ...NaNNaNChronic Illn1012530191742-3953United StatesNaN
3A Review of Laser Therapy and Low-Intensity Ul...26(1)57-63PURPOSE OF REVIEW\\nChronic pain management the...Current pain and headache reportsChen|Frank R|FR|;Manzi|Joseph E|JE|;Mehta|Neel...202235133560D059350:Chronic Pain; D006801:Humans; D053685:...D016428:Journal Article; D016454:Review...32880358;25824429;31726927;30443883;12605432;2...FalseDepartment of Anesthesiology, Hospital of the ...NaNNaNCurr Pain Headache Rep1009706661534-3081United StatesNaN
4Patients' and healthcare providers' perception...22(1)9BACKGROUND\\nTelehealth and online health infor...BMC geriatricsJiang|Yuyu|Y|;Sun|Pingping|P|;Chen|Zhongyi|Z|;...202234979967D000368:Aged; D019468:Disease Management; D006...D016428:Journal Article; D013485:Research Supp......32512462;16867972;32314971;12020305;33687342;2...FalseResearch office of chronic disease management ...NaNNaNBMC Geriatr1009685481471-2318EnglandJiang et al BMC Geriatrics 2022 22 9 https doi...
5A Preliminary Study of Provider Burden in the ...22(11)1408-1417This study compared perceptions of the burden ...The journal of painTait|Raymond C|RC|;Chibnall|John T|JT|;Kalauok...202133989786D000328:Adult; D001291:Attitude of Health Pers...D016428:Journal Article; D013485:Research Supp......NaNFalseDepartment of Psychiatry and Behavioral Neuros...NaNNaNJ Pain1008986571526-5900United StatesNaN
6\"A little bit of a guidance and a little bit o...43(23)3347-3356PURPOSE\\nTo understand preferences, barriers, ...Disability and rehabilitationDnes|Natalie|N|;Coley|Bridget|B|;Frisby|Kaitly...202132223460D000293:Adolescent; D000328:Adult; D059350:Chr...D016428:Journal Article; D013485:Research Supp......NaNFalseDepartment of Physical Therapy, University of ...NaNNaNDisabil Rehabil92071790963-8288EnglandNaN
7Chronic disease health literacy in First Natio...30(17-18)2683-2695AIM\\nTo explore chronic disease education, sel...Journal of clinical nursingRheault|Haunnah|H|https://orcid.org/0000-0001-...202134180097D000328:Adult; D001315:Australia; D002908:Chro...D016428:Journal Article...NaNFalseSchool of Nursing, Queensland University of Te...NaNNaNJ Clin Nurs92073020962-1067EnglandNaN
8Patient Perceptions of Physician Burden in the...22(9)1060-1071While patient perceptions of burden to caregiv...The journal of painTait|Raymond C|RC|;Chibnall|John T|JT|;Kalauok...202133727158D000328:Adult; D059350:Chronic Pain; D002983:C...D016428:Journal Article...NaNFalseDepartment of Psychiatry and Behavioral Neuros...NaNNaNJ Pain1008986571526-5900United StatesNaN
9The relationship between the perception of chr...NaN17423953211039792OBJECTIVES\\nIn this study, it was aimed to det...Chronic illnessAkca|Nesrin|N|;Saygili|Meltem|M|;Ture|Aysun Ka...202134569319NaND016428:Journal Article...NaNFalse52977Kirikkale University, Faculty of Health S...NaNNaNChronic Illn1012530191742-3953United StatesNaN
\n", 313 | "

10 rows × 23 columns

\n", 314 | "
" 315 | ], 316 | "text/plain": [ 317 | " title issue \\\n", 318 | "0 Assessing the experience of person-centred coo... 25(3) \n", 319 | "1 Association Between Systolic Blood Pressure Va... 11(11) \n", 320 | "2 Comorbidity progression patterns of major chro... NaN \n", 321 | "3 A Review of Laser Therapy and Low-Intensity Ul... 26(1) \n", 322 | "4 Patients' and healthcare providers' perception... 22(1) \n", 323 | "5 A Preliminary Study of Provider Burden in the ... 22(11) \n", 324 | "6 \"A little bit of a guidance and a little bit o... 43(23) \n", 325 | "7 Chronic disease health literacy in First Natio... 30(17-18) \n", 326 | "8 Patient Perceptions of Physician Burden in the... 22(9) \n", 327 | "9 The relationship between the perception of chr... NaN \n", 328 | "\n", 329 | " pages abstract \\\n", 330 | "0 1069-1080 BACKGROUND\\nCountries are adapting their healt... \n", 331 | "1 e025513 Background Whether visit-to-visit systolic blo... \n", 332 | "2 17423953221087647 OBJECTIVE\\nThe presence of one chronic disease... \n", 333 | "3 57-63 PURPOSE OF REVIEW\\nChronic pain management the... \n", 334 | "4 9 BACKGROUND\\nTelehealth and online health infor... \n", 335 | "5 1408-1417 This study compared perceptions of the burden ... \n", 336 | "6 3347-3356 PURPOSE\\nTo understand preferences, barriers, ... \n", 337 | "7 2683-2695 AIM\\nTo explore chronic disease education, sel... \n", 338 | "8 1060-1071 While patient perceptions of burden to caregiv... \n", 339 | "9 17423953211039792 OBJECTIVES\\nIn this study, it was aimed to det... \n", 340 | "\n", 341 | " journal \\\n", 342 | "0 Health expectations : an international journal... \n", 343 | "1 Journal of the American Heart Association \n", 344 | "2 Chronic illness \n", 345 | "3 Current pain and headache reports \n", 346 | "4 BMC geriatrics \n", 347 | "5 The journal of pain \n", 348 | "6 Disability and rehabilitation \n", 349 | "7 Journal of clinical nursing \n", 350 | "8 The journal of pain \n", 351 | "9 Chronic illness \n", 352 | "\n", 353 | " authors pubdate pmid \\\n", 354 | "0 Rijken|Mieke|M|https://orcid.org/0000-0001-607... 2022 35318778 \n", 355 | "1 Park|Cheol Ho|CH|0000-0003-4636-5745;Kim|Hyung... 2022 35656977 \n", 356 | "2 Uddin|Shahadat|S|https://orcid.org/0000-0003-0... 2022 35306857 \n", 357 | "3 Chen|Frank R|FR|;Manzi|Joseph E|JE|;Mehta|Neel... 2022 35133560 \n", 358 | "4 Jiang|Yuyu|Y|;Sun|Pingping|P|;Chen|Zhongyi|Z|;... 2022 34979967 \n", 359 | "5 Tait|Raymond C|RC|;Chibnall|John T|JT|;Kalauok... 2021 33989786 \n", 360 | "6 Dnes|Natalie|N|;Coley|Bridget|B|;Frisby|Kaitly... 2021 32223460 \n", 361 | "7 Rheault|Haunnah|H|https://orcid.org/0000-0001-... 2021 34180097 \n", 362 | "8 Tait|Raymond C|RC|;Chibnall|John T|JT|;Kalauok... 2021 33727158 \n", 363 | "9 Akca|Nesrin|N|;Saygili|Meltem|M|;Ture|Aysun Ka... 2021 34569319 \n", 364 | "\n", 365 | " mesh_terms \\\n", 366 | "0 D000328:Adult; D000368:Aged; D000369:Aged, 80 ... \n", 367 | "1 D001794:Blood Pressure; D002318:Cardiovascular... \n", 368 | "2 NaN \n", 369 | "3 D059350:Chronic Pain; D006801:Humans; D053685:... \n", 370 | "4 D000368:Aged; D019468:Disease Management; D006... \n", 371 | "5 D000328:Adult; D001291:Attitude of Health Pers... \n", 372 | "6 D000293:Adolescent; D000328:Adult; D059350:Chr... \n", 373 | "7 D000328:Adult; D001315:Australia; D002908:Chro... \n", 374 | "8 D000328:Adult; D059350:Chronic Pain; D002983:C... \n", 375 | "9 NaN \n", 376 | "\n", 377 | " publication_types ... \\\n", 378 | "0 D016428:Journal Article ... \n", 379 | "1 D016428:Journal Article ... \n", 380 | "2 D016428:Journal Article ... \n", 381 | "3 D016428:Journal Article; D016454:Review ... \n", 382 | "4 D016428:Journal Article; D013485:Research Supp... ... \n", 383 | "5 D016428:Journal Article; D013485:Research Supp... ... \n", 384 | "6 D016428:Journal Article; D013485:Research Supp... ... \n", 385 | "7 D016428:Journal Article ... \n", 386 | "8 D016428:Journal Article ... \n", 387 | "9 D016428:Journal Article ... \n", 388 | "\n", 389 | " references delete \\\n", 390 | "0 29444767;29166917;8870135;15804318;22778146;18... False \n", 391 | "1 NaN False \n", 392 | "2 NaN False \n", 393 | "3 32880358;25824429;31726927;30443883;12605432;2... False \n", 394 | "4 32512462;16867972;32314971;12020305;33687342;2... False \n", 395 | "5 NaN False \n", 396 | "6 NaN False \n", 397 | "7 NaN False \n", 398 | "8 NaN False \n", 399 | "9 NaN False \n", 400 | "\n", 401 | " affiliations pmc other_id \\\n", 402 | "0 Nivel (Netherlands Institute for Health Servic... NaN NaN \n", 403 | "1 Department of Internal Medicine College of Med... NaN NaN \n", 404 | "2 Faculty of Engineering, 4334The University of ... NaN NaN \n", 405 | "3 Department of Anesthesiology, Hospital of the ... NaN NaN \n", 406 | "4 Research office of chronic disease management ... NaN NaN \n", 407 | "5 Department of Psychiatry and Behavioral Neuros... NaN NaN \n", 408 | "6 Department of Physical Therapy, University of ... NaN NaN \n", 409 | "7 School of Nursing, Queensland University of Te... NaN NaN \n", 410 | "8 Department of Psychiatry and Behavioral Neuros... NaN NaN \n", 411 | "9 52977Kirikkale University, Faculty of Health S... NaN NaN \n", 412 | "\n", 413 | " medline_ta nlm_unique_id issn_linking country \\\n", 414 | "0 Health Expect 9815926 1369-6513 England \n", 415 | "1 J Am Heart Assoc 101580524 2047-9980 England \n", 416 | "2 Chronic Illn 101253019 1742-3953 United States \n", 417 | "3 Curr Pain Headache Rep 100970666 1534-3081 United States \n", 418 | "4 BMC Geriatr 100968548 1471-2318 England \n", 419 | "5 J Pain 100898657 1526-5900 United States \n", 420 | "6 Disabil Rehabil 9207179 0963-8288 England \n", 421 | "7 J Clin Nurs 9207302 0962-1067 England \n", 422 | "8 J Pain 100898657 1526-5900 United States \n", 423 | "9 Chronic Illn 101253019 1742-3953 United States \n", 424 | "\n", 425 | " pdf_content \n", 426 | "0 Received 11 August 2021 Revised 28 November 20... \n", 427 | "1 NaN \n", 428 | "2 NaN \n", 429 | "3 NaN \n", 430 | "4 Jiang et al BMC Geriatrics 2022 22 9 https doi... \n", 431 | "5 NaN \n", 432 | "6 NaN \n", 433 | "7 NaN \n", 434 | "8 NaN \n", 435 | "9 NaN \n", 436 | "\n", 437 | "[10 rows x 23 columns]" 438 | ] 439 | }, 440 | "execution_count": 2, 441 | "metadata": {}, 442 | "output_type": "execute_result" 443 | } 444 | ], 445 | "source": [ 446 | "df_result" 447 | ] 448 | } 449 | ], 450 | "metadata": { 451 | "kernelspec": { 452 | "display_name": "Python 3 (ipykernel)", 453 | "language": "python", 454 | "name": "python3" 455 | }, 456 | "language_info": { 457 | "codemirror_mode": { 458 | "name": "ipython", 459 | "version": 3 460 | }, 461 | "file_extension": ".py", 462 | "mimetype": "text/x-python", 463 | "name": "python", 464 | "nbconvert_exporter": "python", 465 | "pygments_lexer": "ipython3", 466 | "version": "3.8.9" 467 | } 468 | }, 469 | "nbformat": 4, 470 | "nbformat_minor": 5 471 | } 472 | -------------------------------------------------------------------------------- /pubmedflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .pubmedflow import * -------------------------------------------------------------------------------- /pubmedflow/pubmedflow.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class is to implement the core pubmed functions, 3 | download the articles and query based on keywords 4 | @author: Aaditya(Ankit) 5 | @date created: 27/06/2022 6 | @date last modified: 02/08/2022 7 | """ 8 | 9 | from pathlib import Path 10 | import pandas as pd 11 | import uuid 12 | from .utils import fetch, xml2df, get_pdf, get_final_data 13 | 14 | 15 | class LazyPubmed(object): 16 | 17 | def __init__(self, title_query, 18 | folder_name='pubmed_data', 19 | api_key='', 20 | max_documents=None, 21 | download_pdf=True, 22 | scihub=False): 23 | 24 | # creating folders for storing data 25 | # --------------------------------------------------------- 26 | 27 | self.folder_uuid = str(uuid.uuid4()) 28 | self.folder_name = folder_name 29 | self.raw_pdf_path = f'{self.folder_name}/{self.folder_uuid}/raw_pdfs/' 30 | self.final_df = f'{self.folder_name}/{self.folder_uuid}/final_df/' 31 | self.raw_abs_path = f'{self.folder_name}/{self.folder_uuid}/raw_abstracts/' 32 | self.meta_data_path = f'{self.folder_name}/{self.folder_uuid}/meta_data/' 33 | self.xml2pdf_path = f'{self.folder_name}/{self.folder_uuid}/xml2df/' 34 | self.key = api_key 35 | 36 | Path(self.raw_pdf_path).mkdir(parents=True, 37 | exist_ok=True) 38 | Path(self.raw_abs_path).mkdir(parents=True, 39 | exist_ok=True) 40 | Path(self.meta_data_path).mkdir(parents=True, 41 | exist_ok=True) 42 | Path(self.xml2pdf_path).mkdir(parents=True, 43 | exist_ok=True) 44 | Path(self.final_df).mkdir(parents=True, 45 | exist_ok=True) 46 | # --------------------------------------------------------- 47 | self.user_agent_list = [ 48 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15', 49 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0', 50 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36', 51 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:77.0) Gecko/20100101 Firefox/77.0', 52 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36', 53 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36', 54 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Safari/605.1.15', 55 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:101.0) Gecko/20100101 Firefox/101.0', 56 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36 OPR/88.0.4412.27', 57 | ] 58 | 59 | final_df = self.download_articles(title_query, 60 | max_documents=max_documents, 61 | download_pdf=download_pdf, 62 | scihub=scihub) 63 | self.final_df = final_df 64 | 65 | 66 | def download_articles(self, query, 67 | max_documents=None, 68 | download_pdf=True, 69 | scihub=False, 70 | ): 71 | """function to fetch ids, fetch abstracts and fetch respective pdf files""" 72 | 73 | fetch_ids = fetch(self,query, max_documents=max_documents) 74 | final_df = xml2df(self.raw_abs_path, self.xml2pdf_path) 75 | final_df = final_df[final_df['pmid'].notna()] 76 | final_df['pmid'] = final_df['pmid'].apply(lambda x: int(x)) 77 | 78 | pids = list(set(list(final_df['pmid']))) 79 | 80 | if download_pdf: 81 | get_pdf(self, pids, save=True, scihub=scihub) 82 | dart = get_final_data(self.raw_pdf_path) 83 | dart = dart[dart['pmid'].notna()] 84 | dart['pmid'] = dart['pmid'].apply(lambda x: int(x)) 85 | final_df = pd.merge(final_df, dart, on='pmid', how='left') 86 | 87 | final_df.to_csv(f'{self.final_df}final_df.csv') 88 | return final_df 89 | 90 | def pubmed_train(self, 91 | model_name='sentence-transformers/all-mpnet-base-v2', 92 | model_output_path='pubmedflow_model', 93 | model_architecture='ct'): 94 | 95 | final_df = self.final_df 96 | pdf_content = [i for i in final_df['pdf_content'] if isinstance(i, 97 | str)] 98 | import nltk 99 | nltk.download('punkt') 100 | from nltk.tokenize import sent_tokenize 101 | train_sentences = [] 102 | for text in pdf_content: 103 | train_sentences += sent_tokenize(text) 104 | 105 | if len(train_sentences) > 0: 106 | print(train_sentences) 107 | train_df = pd.DataFrame({'text': train_sentences}) 108 | 109 | from nfmodelapis.text.SentenceEmbedder import ModelSelect 110 | trainer = ModelSelect(model_name, 111 | model_output_path, 112 | model_architecture=model_architecture 113 | ).return_trainer() 114 | trainer.train(data=train_df) 115 | else: 116 | raise Exception('''No data collected to train. 117 | Check the search parameters to 118 | collect more data''') 119 | 120 | def pubmed_entity_extraction(self): 121 | final_df = self.final_df 122 | from nfmodelapis.text.ner import NERPipeline 123 | ner = NERPipeline(final_df) 124 | ents = ner.batch_ner('pdf_content') 125 | return ents 126 | 127 | def pubmed_qa(self, 128 | qa_query): 129 | 130 | final_df = self.final_df 131 | 132 | from nfmodelapis.text.question_answering import QAPipeline 133 | pipe = QAPipeline(final_df) 134 | res = pipe.batch_qa(qa_query, 'pdf_content') 135 | return res 136 | 137 | def pubmed_summarize(self): 138 | final_df = self.final_df 139 | 140 | from nfmodelapis.text.summarization import SummarizationPipeline 141 | pipe = SummarizationPipeline(final_df) 142 | res = pipe.batch_summarize('pdf_content') 143 | return res 144 | 145 | 146 | -------------------------------------------------------------------------------- /pubmedflow/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class is to implement the util functions for pubmed main class 3 | @author: Aaditya(Ankit) 4 | @date created: 27/06/2022 5 | @date last modified: 02/08/2022 6 | """ 7 | import re 8 | import pubmed_parser as pp 9 | import pandas as pd 10 | from tqdm import tqdm 11 | import io 12 | import uuid 13 | import glob 14 | 15 | from pdfminer3.layout import LAParams 16 | from pdfminer3.pdfpage import PDFPage 17 | from pdfminer3.pdfinterp import PDFResourceManager 18 | from pdfminer3.pdfinterp import PDFPageInterpreter 19 | from pdfminer3.converter import TextConverter 20 | 21 | import random 22 | import requests 23 | 24 | import json 25 | 26 | from datetime import date 27 | from metapub import FindIt 28 | from bs4 import BeautifulSoup 29 | from scidownl import scihub_download 30 | 31 | 32 | def preprocess_text(sentence): 33 | """Remove punctuations and extra spaces""" 34 | 35 | sentence = re.sub('[^a-zA-Z0-9]', ' ', sentence) 36 | sentence = re.sub(r'\s+', ' ', sentence) 37 | 38 | return sentence 39 | 40 | 41 | def pdf_in(x): 42 | """Read the text content of a pdf""" 43 | 44 | resource_manager = PDFResourceManager() 45 | fake_file_handle = io.StringIO() 46 | converter = TextConverter( 47 | resource_manager, fake_file_handle, laparams=LAParams()) 48 | page_interpreter = PDFPageInterpreter(resource_manager, converter) 49 | 50 | with open(x, 'rb') as fh: 51 | 52 | for page in PDFPage.get_pages(fh, 53 | caching=True, 54 | check_extractable=True): 55 | page_interpreter.process_page(page) 56 | 57 | text = fake_file_handle.getvalue() 58 | 59 | # close open handles 60 | converter.close() 61 | fake_file_handle.close() 62 | 63 | return text 64 | 65 | 66 | def get_pdftext_content(pdf_name): 67 | """read multiple pdfs from a folder""" 68 | 69 | result_data = pdf_in(pdf_name) 70 | t = [k for k in result_data.split('\n') if k != ''] 71 | t_join = " ".join(t) 72 | if 'References' in t_join: 73 | t_join = "".join(t_join.split('References')[:-1]) 74 | 75 | return t_join 76 | 77 | 78 | def get_final_data(folder_name): 79 | """convert pdf text content into a pd.dataframe""" 80 | 81 | df_data = {'pmid': [], 'pdf_content': []} 82 | 83 | pdfs = [pdf_file 84 | for pdf_file in glob.glob(f'{folder_name}*')] 85 | 86 | for single_pdf in tqdm(range(len(pdfs))): 87 | 88 | fname = pdfs[single_pdf] 89 | pdf_name = fname.split('/')[3].split('.pdf')[0] 90 | df_data['pmid'].append(pdf_name) 91 | 92 | try: 93 | pdf_content = get_pdftext_content(fname) 94 | 95 | if pdf_content != ' ': 96 | df_data['pdf_content'].append(pdf_content) 97 | else: 98 | df_data['pdf_content'].append('') 99 | 100 | except Exception as e: 101 | pass 102 | 103 | df_data = pd.DataFrame(df_data) 104 | return df_data 105 | 106 | 107 | def parse_xml(file): 108 | """parse the xml file""" 109 | 110 | dicts_out = pp.parse_medline_xml(file) 111 | return dicts_out 112 | 113 | 114 | def xml2df(folder_name, save_path): 115 | """xml data to pd.dataframe""" 116 | 117 | all_files = glob.glob(f"./{folder_name}*xml") 118 | u_id = str(uuid.uuid4()) 119 | 120 | df_list = [] 121 | 122 | try: 123 | for i in tqdm(all_files): 124 | raw_df = parse_xml(i) 125 | df = pd.DataFrame(raw_df) 126 | df_list.append(df) 127 | 128 | final_df = pd.concat(df_list) 129 | 130 | final_df.to_csv(f'./{save_path}{u_id}.csv', index=False) 131 | final_df = pd.read_csv(f'./{save_path}{u_id}.csv') 132 | 133 | # it will replace blank will NaN now load again and select without NaN 134 | final_df = final_df[final_df['abstract'].notna() 135 | ].reset_index(drop=True) 136 | final_df.to_csv(f'{save_path}{u_id}.csv', index=False) 137 | return final_df 138 | 139 | except Exception as e: 140 | print(e) 141 | pass 142 | 143 | 144 | def request_head(self, url): 145 | """Request function for urls""" 146 | 147 | headers = requests.utils.default_headers() 148 | headers['User-Agent'] = random.choice(self.user_agent_list) 149 | r = requests.get(url, headers=headers, 150 | allow_redirects=True, 151 | verify=False) 152 | return r 153 | 154 | 155 | def pdf_links(self, pmid): 156 | """Get pdf links from Pubmed website""" 157 | 158 | data = {} 159 | url = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" 160 | url_request = request_head(self, url) 161 | soup = BeautifulSoup(url_request.text, 'html.parser') 162 | 163 | try: 164 | full_text_class = soup.find_all( 165 | "div", {"class": "full-text-links"}) 166 | full_content = full_text_class[0].find_all( 167 | "div", {"class": "full-text-links-list"}) 168 | all_links = full_content[0].findAll("a", href=True) 169 | 170 | for single_link in all_links: 171 | data[single_link.text.strip()] = single_link['href'] 172 | return data 173 | except Exception as e: 174 | print(e) 175 | return {'data': None} 176 | 177 | 178 | def get_date(self): 179 | currentDate = date.today() 180 | today = currentDate.strftime('%Y/%m/%d') 181 | return today 182 | 183 | 184 | def pmc(self, url): 185 | """Download pdf from pmc website""" 186 | 187 | url_request = request_head(self, url) 188 | soup = BeautifulSoup(url_request.text, 'html.parser') 189 | full_text_class = soup.find_all( 190 | "ul", {"class": "pmc-sidebar__formats"}) 191 | link_ = full_text_class[0].find_all( 192 | "li", {"class": "pdf-link other_item"})[0].findAll("a", href=True)[0] 193 | link_url = f"https://www.ncbi.nlm.nih.gov{link_['href']}" 194 | 195 | return link_url 196 | 197 | 198 | def save_pdf(self, pmid, pdf_url): 199 | """save pdf in local folder""" 200 | 201 | pdf_request = request_head(self, pdf_url) 202 | with open(f'{self.raw_pdf_path}{pmid}.pdf', 'wb') as f: 203 | f.write(pdf_request.content) 204 | return 0 205 | 206 | 207 | def scihub_mode(self, pmid): 208 | """scihub search""" 209 | 210 | paper_type = "pmid" 211 | out = f'{self.raw_pdf_path}{pmid}.pdf' 212 | scihub_download(pmid, paper_type=paper_type, out=out) 213 | 214 | 215 | def get_pdf(self, pmids, save=False, scihub=True): 216 | """Main function to download and search pdfs -> save in local folder""" 217 | 218 | downloadble_url = {} 219 | not_downloaded = {} 220 | 221 | pdf_count = len(pmids) 222 | print(f"Total pdf downloading : {pdf_count}.. \n") 223 | 224 | for pmid in tqdm(pmids): 225 | 226 | try: 227 | pdf_source = pdf_links(self, pmid) 228 | if 'Free PMC article' in pdf_source.keys(): 229 | pdf_url = pmc(self, pdf_source['Free PMC article']) 230 | downloadble_url[pmid] = pdf_url 231 | if save: 232 | save_pdf(self, pmid, pdf_url) 233 | 234 | else: 235 | if FindIt(pmid).url: 236 | print("saving from findit") 237 | findit_url = FindIt(pmid).url 238 | save_pdf(self, pmid, findit_url) 239 | downloadble_url[pmid] = findit_url 240 | 241 | elif scihub: 242 | print("saving from scihub") 243 | scihub_mode(self, pmid) 244 | downloadble_url[pmid] = 'sci_hub' 245 | else: 246 | not_downloaded[pmid] = pdf_source 247 | 248 | except Exception as e: 249 | pass 250 | 251 | return json.dumps({ 252 | 'downloaded': downloadble_url, 253 | 'not_downloaded': not_downloaded 254 | }, indent=3) 255 | 256 | 257 | def write_json(self, path_name, data, name): 258 | """Write json data""" 259 | 260 | with open(f'{path_name}{name}.json', 'w', encoding='utf-8') as f: 261 | json.dump(data, f, ensure_ascii=False, indent=4) 262 | return 0 263 | 264 | 265 | def get_records(self, query=None): 266 | """get fetch result and ids from ncbi website using api""" 267 | 268 | if query: 269 | search_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?sort=relevance&db=pubmed&term={query}&mindate=1800/01/01&maxdate={get_date(self)}&usehistory=y&retmode=json" 270 | else: 271 | search_url = f"https://eutils.ncbi.nlm.nih.gov/entrdefez/eutils/esearch.fcgi?db=pubmed&mindate=1800/01/01&maxdate={self.get_date()}&usehistory=y&retmode=json" 272 | 273 | search_r = requests.post(search_url, verify=False) 274 | search_data = search_r.json() 275 | 276 | webenv = search_data["esearchresult"]['webenv'] 277 | total_records = int(search_data["esearchresult"]['count']) 278 | 279 | return {'total_records': total_records, 280 | 'webenv': webenv, 281 | 'search_data': search_data} 282 | 283 | 284 | def fetch(self, query, 285 | max_documents=None): 286 | """function to do multi task -> fetch ids, based on ids fetch abstracts""" 287 | 288 | all_records = get_records(self, query) 289 | webenv = all_records['webenv'] 290 | all_rec = all_records['total_records'] 291 | 292 | if max_documents: 293 | all_rec = max_documents 294 | fetch_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&api_key={self.key}&retmax={max_documents}&retmode=xml&query_key=1&webenv="+webenv 295 | else: 296 | fetch_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&api_key={self.key}&retmax=9999&retmode=xml&query_key=1&webenv="+webenv 297 | 298 | print("-------------------------------------------\n") 299 | print(f" Fetching total documents -> {all_rec}..\n") 300 | print("-------------------------------------------\n") 301 | 302 | for i in tqdm(range(0, all_rec, 10000)): 303 | try: 304 | meta_data = {} 305 | u_id = str(uuid.uuid4()) 306 | payload = fetch_url+"&retstart="+str(i) 307 | 308 | print(f"Getting this URL: {payload} \n") 309 | 310 | fetch_r = requests.post(payload, verify=False) 311 | pre_name = f'{self.raw_abs_path}/pubmed_batch_{u_id}_{str(i)}_to_{str(i+all_rec)}.xml' 312 | 313 | f = open(pre_name, 'wb') 314 | f.write(fetch_r.content) 315 | f.close() 316 | 317 | meta_data['uid'] = u_id 318 | meta_data['query'] = query 319 | meta_data['url'] = payload 320 | meta_data['total'] = all_rec 321 | meta_data['iter'] = i 322 | 323 | write_json(self, self.meta_data_path, meta_data, u_id) 324 | 325 | except Exception as e: 326 | with open('exceptions', 'a') as f: 327 | f.write(f" featch_1_fetch_exception {e} number {i} \n") 328 | pass 329 | 330 | return 0 331 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | install_requires = ["shutup", 7 | "numpy", 8 | "pandas", 9 | "bs4", 10 | "metapub", 11 | "scidownl", 12 | "pdfminer3", 13 | "pubmed_parser"] 14 | setuptools.setup( 15 | name="pubmedflow", 16 | version="0.0.2", 17 | author="Aditya Ura", 18 | author_email="aadityaura@gmail.com", 19 | description="Data Collection from pubmed made easy", 20 | long_description=long_description, 21 | long_description_content_type="text/markdown", 22 | license='MIT License', 23 | url="https://github.com/nfflow/pubmedflow", 24 | install_requires=install_requires, 25 | packages=setuptools.find_packages(), 26 | python_requires='>=3.6', 27 | include_package_data=True, 28 | extras_require={"qa": ["nfmodelapis"]}, 29 | ) 30 | --------------------------------------------------------------------------------