├── src ├── __init__.py ├── fetch │ ├── __init__.py │ └── fetch.py ├── parse │ ├── __init__.py │ └── parse.py ├── version.py ├── providers │ ├── __init__.py │ ├── arxiv.py │ ├── scidb.py │ └── scihub.py └── papers_dl.py ├── test.sh ├── tests ├── documents │ ├── ids.txt │ ├── reyes-rendering.html │ ├── real-time-rendering.html │ ├── b-tree-techniques.html │ ├── bsp-tree.html │ ├── arxiv.html │ ├── superscalar-cisc.html │ ├── scihub.html │ └── scidb.html ├── __init__.py ├── test_fetch.py ├── test_cli.py └── test_parse.py ├── LICENSE ├── requirements.txt ├── pyproject.toml ├── README.md └── .gitignore /src/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/fetch/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/parse/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.25" 2 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | python -m unittest discover 2 | -------------------------------------------------------------------------------- /tests/documents/ids.txt: -------------------------------------------------------------------------------- 1 | https://www.cell.com/current-biology/fulltext/S0960-9822(19)31469-1 2 | 10.1016/j.cub.2019.11.030 3 | 10.1107/s0907444905036693 4 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | sys.path.insert( 5 | 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")) 6 | ) 7 | -------------------------------------------------------------------------------- /src/providers/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | sys.path.insert( 5 | 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "fetch")) 6 | ) 7 | -------------------------------------------------------------------------------- /tests/documents/reyes-rendering.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /src/providers/arxiv.py: -------------------------------------------------------------------------------- 1 | # from urllib.parse import urljoin 2 | 3 | # from loguru import logger 4 | from parse.parse import parse_ids_from_text 5 | 6 | 7 | async def get_url(identifier): 8 | is_arxiv = parse_ids_from_text(identifier, ["arxiv"]) 9 | if is_arxiv: 10 | pdf_url = f"https://arxiv.org/pdf/{identifier}.pdf" 11 | return pdf_url 12 | 13 | return None 14 | -------------------------------------------------------------------------------- /tests/documents/real-time-rendering.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |