├── .github ├── dependabot.yml └── workflows │ ├── ci.yaml │ └── code-quality.yml ├── .gitignore ├── .gitkeep ├── .logs ├── sciscraper.log ├── sciscraper.log.1 ├── sciscraper.log.2 └── sciscraper.log.3 ├── CHANGELOG.md ├── LICENSE ├── README.md ├── config_setup.json ├── logging_config.json ├── main.py ├── poetry.lock ├── poetry.toml ├── pyproject.toml ├── sciscraper.code-workspace ├── src ├── __init__.py ├── argsbuilder.py ├── change_dir.py ├── config.py ├── docscraper.py ├── doi_regex.py ├── doifrompdf.py ├── downloaders.py ├── factories.py ├── fetch.py ├── log.py ├── profilers.py ├── py.typed ├── sciscraper.code-workspace ├── scraperesults.py ├── serials.py ├── stagers.py └── webscrapers.py ├── tests ├── __init__.py ├── conftest.py ├── test_change_dir.py ├── test_cli.py ├── test_dirs │ ├── test_example_file_1.csv │ ├── test_file_blank.txt │ ├── test_file_multiline.txt │ └── test_pdf_1.pdf ├── test_doifrompdf.py ├── test_doiregex.py ├── test_downloaders.py ├── test_fetch.py ├── test_serializers.py ├── test_stagers.py ├── test_webscrapers.py └── test_wordscore.py └── words ├── bycatch_words.txt ├── google_keywords.txt └── target_words.txt /.github/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/.github/dependabot.yml -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/.github/workflows/ci.yaml -------------------------------------------------------------------------------- /.github/workflows/code-quality.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/.github/workflows/code-quality.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.logs/sciscraper.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/.logs/sciscraper.log -------------------------------------------------------------------------------- /.logs/sciscraper.log.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/.logs/sciscraper.log.1 -------------------------------------------------------------------------------- /.logs/sciscraper.log.2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/.logs/sciscraper.log.2 -------------------------------------------------------------------------------- /.logs/sciscraper.log.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/.logs/sciscraper.log.3 -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/README.md -------------------------------------------------------------------------------- /config_setup.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/config_setup.json -------------------------------------------------------------------------------- /logging_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/logging_config.json -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/main.py -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/poetry.lock -------------------------------------------------------------------------------- /poetry.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/poetry.toml -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/pyproject.toml -------------------------------------------------------------------------------- /sciscraper.code-workspace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/sciscraper.code-workspace -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/argsbuilder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/argsbuilder.py -------------------------------------------------------------------------------- /src/change_dir.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/change_dir.py -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/config.py -------------------------------------------------------------------------------- /src/docscraper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/docscraper.py -------------------------------------------------------------------------------- /src/doi_regex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/doi_regex.py -------------------------------------------------------------------------------- /src/doifrompdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/doifrompdf.py -------------------------------------------------------------------------------- /src/downloaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/downloaders.py -------------------------------------------------------------------------------- /src/factories.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/factories.py -------------------------------------------------------------------------------- /src/fetch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/fetch.py -------------------------------------------------------------------------------- /src/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/log.py -------------------------------------------------------------------------------- /src/profilers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/profilers.py -------------------------------------------------------------------------------- /src/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/sciscraper.code-workspace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/sciscraper.code-workspace -------------------------------------------------------------------------------- /src/scraperesults.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/scraperesults.py -------------------------------------------------------------------------------- /src/serials.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/serials.py -------------------------------------------------------------------------------- /src/stagers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/stagers.py -------------------------------------------------------------------------------- /src/webscrapers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/src/webscrapers.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/test_change_dir.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/test_change_dir.py -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/test_cli.py -------------------------------------------------------------------------------- /tests/test_dirs/test_example_file_1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/test_dirs/test_example_file_1.csv -------------------------------------------------------------------------------- /tests/test_dirs/test_file_blank.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_dirs/test_file_multiline.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/test_dirs/test_file_multiline.txt -------------------------------------------------------------------------------- /tests/test_dirs/test_pdf_1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/test_dirs/test_pdf_1.pdf -------------------------------------------------------------------------------- /tests/test_doifrompdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/test_doifrompdf.py -------------------------------------------------------------------------------- /tests/test_doiregex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/test_doiregex.py -------------------------------------------------------------------------------- /tests/test_downloaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/test_downloaders.py -------------------------------------------------------------------------------- /tests/test_fetch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/test_fetch.py -------------------------------------------------------------------------------- /tests/test_serializers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/test_serializers.py -------------------------------------------------------------------------------- /tests/test_stagers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/test_stagers.py -------------------------------------------------------------------------------- /tests/test_webscrapers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/test_webscrapers.py -------------------------------------------------------------------------------- /tests/test_wordscore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/tests/test_wordscore.py -------------------------------------------------------------------------------- /words/bycatch_words.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/words/bycatch_words.txt -------------------------------------------------------------------------------- /words/google_keywords.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/words/google_keywords.txt -------------------------------------------------------------------------------- /words/target_words.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pathos315/sciscraper/HEAD/words/target_words.txt --------------------------------------------------------------------------------