├── requirements.txt ├── tests ├── __init__.py └── test_magzdb.py ├── magzdb ├── version.py ├── __init__.py ├── downloader.py ├── cli.py └── magzdb.py ├── requirements_dev.txt ├── Dockerfile ├── MANIFEST.in ├── setup.cfg ├── .editorconfig ├── .github ├── ISSUE_TEMPLATE.md └── workflows │ ├── continuous-integration-pip.yml │ └── continuous-integration-publish.yml ├── .coveragerc ├── .pre-commit-config.yaml ├── LICENSE ├── setup.py ├── .gitignore ├── Makefile ├── CONTRIBUTING.md └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | loguru>=0.5.3 2 | requests>=2.24.0 3 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Unit test package for magzdb.""" 2 | -------------------------------------------------------------------------------- /magzdb/version.py: -------------------------------------------------------------------------------- 1 | __author__ = """Aakash Gajjar""" 2 | __email__ = "skyqutip@gmail.com" 3 | __version__ = "1.2.0" 4 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | black==23.11.0 2 | blacken-docs==1.16.0 3 | coverage==7.3.2 4 | pre-commit==3.5.0 5 | pydocstyle==6.3.0 6 | pytest==7.4.3 7 | twine==4.0.2 8 | watchdog==3.0.0 9 | wheel==0.42.0 10 | -------------------------------------------------------------------------------- /magzdb/__init__.py: -------------------------------------------------------------------------------- 1 | """Top-level package for magzdb.""" 2 | # For relative imports to work in Python 3.6 3 | import os 4 | import sys 5 | 6 | sys.path.append(os.path.dirname(os.path.realpath(__file__))) 7 | 8 | from magzdb.magzdb import Magzdb 9 | 10 | __all__ = ["Magzdb"] 11 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim-bullseye 2 | 3 | RUN pip install -U magzdb 4 | 5 | RUN apt update && \ 6 | apt install wget --yes && \ 7 | apt-get clean autoclean && \ 8 | apt-get autoremove --yes 9 | 10 | WORKDIR /tmp 11 | 12 | ENTRYPOINT [ "magzdb", "--downloader", "wget" ] -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CONTRIBUTING.md 2 | include LICENSE 3 | include README.md 4 | include requirements.txt 5 | 6 | recursive-include tests * 7 | recursive-exclude * __pycache__ 8 | recursive-exclude * *.py[co] 9 | 10 | recursive-include docs *.md conf.py Makefile make.bat *.jpg *.png *.gif 11 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.1.0 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:magzdb/version.py] 7 | search = __version__ = "{current_version}" 8 | replace = __version__ = "{new_version}" 9 | 10 | [bdist_wheel] 11 | universal = 1 12 | 13 | [flake8] 14 | exclude = docs 15 | 16 | [aliases] 17 | # Define setup.py command aliases here 18 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | charset = utf-8 11 | end_of_line = lf 12 | 13 | [*.bat] 14 | indent_style = tab 15 | end_of_line = crlf 16 | 17 | [LICENSE] 18 | insert_final_newline = false 19 | 20 | [Makefile] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | * magzdb version: 2 | * Python version: 3 | * Operating System: 4 | 5 | ### Description 6 | 7 | Describe what you were trying to get done. 8 | Tell us what happened, what went wrong, and what you expected to happen. 9 | 10 | ### What I Did 11 | 12 | ``` 13 | Paste the command(s) you ran and the output. 14 | If there was a crash, please include the traceback here. 15 | ``` 16 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | exclude_lines = 3 | pragma: no cover 4 | except re.error as e: 5 | except requests.ConnectionError as e: 6 | except requests.HTTPError as e: 7 | except FileExistsError: 8 | except requests.exceptions.RequestException: 9 | except FileNotFoundError: 10 | except AttributeError: 11 | os.remove(dest) 12 | continue 13 | return 14 | 15 | [run] 16 | omit = 17 | .eggs/* 18 | venv/* 19 | setup.py 20 | magzdb/cli.py 21 | magzdb/version.py 22 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v2.4.0 6 | hooks: 7 | - id: check-added-large-files 8 | - id: check-yaml 9 | - id: detect-private-key 10 | - id: end-of-file-fixer 11 | - id: requirements-txt-fixer 12 | - id: trailing-whitespace 13 | - repo: https://github.com/asottile/reorder_python_imports 14 | rev: v2.3.0 15 | hooks: 16 | - id: reorder-python-imports 17 | - repo: https://github.com/psf/black 18 | rev: 19.10b0 19 | hooks: 20 | - id: black 21 | - repo: https://github.com/asottile/blacken-docs 22 | rev: v1.7.0 23 | hooks: 24 | - id: blacken-docs 25 | additional_dependencies: [black==19.10b0] 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022, Aakash Gajjar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/continuous-integration-pip.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | strategy: 10 | matrix: 11 | python-version: ["3.10"] 12 | 13 | steps: 14 | - uses: actions/cache@v2 15 | with: 16 | path: ~/.cache/pip 17 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} 18 | restore-keys: | 19 | ${{ runner.os }}-pip- 20 | - uses: actions/checkout@v2 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v2 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Install downloaders 26 | run: | 27 | sudo apt-get install wget curl 28 | sudo snap install aria2c 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install pytest 33 | pip install pytest-cov 34 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 35 | - name: Test with pytest 36 | run: | 37 | pytest --cov=./ --cov-report=xml 38 | - name: Upload coverage to Codecov 39 | uses: codecov/codecov-action@v1 40 | -------------------------------------------------------------------------------- /.github/workflows/continuous-integration-publish.yml: -------------------------------------------------------------------------------- 1 | name: publish 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*.*.*' 7 | 8 | jobs: 9 | build: 10 | 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: [3.8] 15 | 16 | steps: 17 | - uses: actions/cache@v2 18 | with: 19 | path: ~/.cache/pip 20 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} 21 | restore-keys: | 22 | ${{ runner.os }}-pip- 23 | - uses: actions/checkout@v2 24 | - name: Set up Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | pip install setuptools 32 | pip install wheel 33 | pip install twine 34 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 35 | - name: Build and publish 36 | env: 37 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 38 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 39 | run: | 40 | python setup.py sdist bdist_wheel 41 | twine upload dist/* 42 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """The setup script.""" 3 | from setuptools import find_packages 4 | from setuptools import setup 5 | 6 | from magzdb.version import __version__ 7 | 8 | with open("README.md") as readme_file: 9 | readme = readme_file.read() 10 | 11 | with open("requirements.txt", "r") as file: 12 | requirements = [r for r in file.readlines() if len(r) > 0] 13 | 14 | setup_requirements = [] 15 | 16 | test_requirements = ["pytest"].extend(requirements) 17 | 18 | setup( 19 | author="Aakash Gajjar", 20 | author_email="skyqutip@gmail.com", 21 | python_requires=">=3.5", 22 | classifiers=[ 23 | "Development Status :: 5 - Production/Stable", 24 | "Intended Audience :: End Users/Desktop ", 25 | "License :: OSI Approved :: MIT License", 26 | "Natural Language :: English", 27 | "Programming Language :: Python :: 3", 28 | "Programming Language :: Python :: 3.6", 29 | "Programming Language :: Python :: 3.7", 30 | "Programming Language :: Python :: 3.8", 31 | "Programming Language :: Python :: 3.9", 32 | "Programming Language :: Python :: 3.10", 33 | ], 34 | description="Magzdb.org Downloader", 35 | entry_points={"console_scripts": ["magzdb=magzdb.cli:main",],}, 36 | include_package_data=True, 37 | install_requires=requirements, 38 | keywords="magzdb", 39 | license="MIT license", 40 | long_description=readme, 41 | long_description_content_type="text/markdown", 42 | name="magzdb", 43 | packages=find_packages(include=["magzdb", "magzdb.*"]), 44 | setup_requires=setup_requirements, 45 | test_suite="tests", 46 | tests_require=test_requirements, 47 | url="https://github.com/skyme5/magzdb", 48 | version=__version__, 49 | zip_safe=False, 50 | ) 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | # IDE settings 105 | .vscode/ 106 | 107 | *.pdf 108 | 109 | .test-data/ 110 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help 2 | .DEFAULT_GOAL := help 3 | 4 | define BROWSER_PYSCRIPT 5 | import os, webbrowser, sys 6 | 7 | from urllib.request import pathname2url 8 | 9 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 10 | endef 11 | export BROWSER_PYSCRIPT 12 | 13 | define PRINT_HELP_PYSCRIPT 14 | import re, sys 15 | 16 | for line in sys.stdin: 17 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 18 | if match: 19 | target, help = match.groups() 20 | print("%-20s %s" % (target, help)) 21 | endef 22 | export PRINT_HELP_PYSCRIPT 23 | 24 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 25 | 26 | help: 27 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 28 | 29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 30 | 31 | clean-build: ## remove build artifacts 32 | rm -fr build/ 33 | rm -fr dist/ 34 | rm -fr .eggs/ 35 | find . -name '*.egg-info' -exec rm -fr {} + 36 | find . -name '*.egg' -exec rm -rf {} + 37 | 38 | clean-pyc: ## remove Python file artifacts 39 | find . -name '*.pyc' -exec rm -f {} + 40 | find . -name '*.pyo' -exec rm -f {} + 41 | find . -name '*~' -exec rm -f {} + 42 | find . -name '__pycache__' -exec rm -fr {} + 43 | 44 | clean-test: ## remove test and coverage artifacts 45 | rm -fr .tox/ 46 | rm -f .coverage 47 | rm -fr htmlcov/ 48 | rm -fr .pytest_cache 49 | 50 | lint: ## check style with flake8 51 | black magzdb tests 52 | 53 | test: ## run tests quickly with the default Python 54 | pytest 55 | 56 | coverage: ## check code coverage quickly with the default Python 57 | coverage run --source magzdb setup.py test 58 | coverage report -m 59 | coverage html 60 | $(BROWSER) htmlcov/index.html 61 | 62 | release: dist ## package and upload a release 63 | twine upload dist/* 64 | 65 | dist: clean ## builds source and wheel package 66 | python setup.py sdist 67 | python setup.py bdist_wheel 68 | ls -l dist 69 | 70 | install: clean ## install the package to the active Python's site-packages 71 | python setup.py install 72 | 73 | deps: 74 | pip install -r requirements.txt -r requirements_dev.txt 75 | -------------------------------------------------------------------------------- /magzdb/downloader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | import requests 5 | from loguru import logger 6 | 7 | DOWNLOADER_LIST = ["aria2", "curl", "self", "wget"] 8 | 9 | 10 | def download_file(url: str, dest: str): 11 | USER_AGENT = ( 12 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" 13 | "AppleWebKit/537.36 (KHTML, like Gecko)" 14 | "Chrome/84.0.4147.68 Safari/537.36" 15 | ) 16 | 17 | if len(os.path.dirname(dest)) > 0: 18 | os.makedirs(os.path.dirname(dest), exist_ok=True) 19 | 20 | try: 21 | if os.path.isfile(dest) and os.path.getsize(dest) == 0: # pragma: no cover 22 | os.remove(dest) 23 | except FileNotFoundError: 24 | pass 25 | 26 | try: 27 | with open(dest, "xb") as handle: 28 | headers = {"User-Agent": USER_AGENT} 29 | response = requests.get(url, stream=True, timeout=160, headers=headers) 30 | if response.status_code != requests.Response.ok: 31 | response.raise_for_status() 32 | 33 | for data in response.iter_content(chunk_size=8192): 34 | handle.write(data) 35 | handle.close() 36 | except FileExistsError: 37 | pass 38 | except requests.exceptions.RequestException: 39 | logger.error(f"File {dest} not found on Server {url}".format(dest)) 40 | pass 41 | 42 | if os.path.getsize(dest) == 0: # pragma: no cover 43 | os.remove(dest) 44 | 45 | 46 | def external_downloader(dir: str, filename: str, url: str, name: str, debug: bool): 47 | parameters = { # pragma: no cover 48 | "aria2": [ 49 | "aria2c", 50 | "--retry-wait=3", 51 | "-c", 52 | f"--dir={dir}", 53 | f"--out={filename}", 54 | url, 55 | ], 56 | "wget": ["wget", "-c", "-O", os.path.join(dir, filename), url], 57 | "curl": ["curl", "-C", "-", url, "--output", os.path.join(dir, filename)], 58 | } 59 | 60 | silent_flags = { # pragma: no cover 61 | "aria2": ["-q"], 62 | "wget": ["-nv"], 63 | "curl": ["--silent"], 64 | } 65 | 66 | if debug: 67 | print(parameters.get(name)) 68 | 69 | return parameters.get(name) if debug else parameters.get(name) + silent_flags[name] 70 | -------------------------------------------------------------------------------- /magzdb/cli.py: -------------------------------------------------------------------------------- 1 | """Console script for magzdb.""" 2 | import argparse 3 | import signal 4 | import sys 5 | 6 | from loguru import logger 7 | 8 | from magzdb.magzdb import Magzdb 9 | from magzdb.version import __version__ 10 | 11 | 12 | def handler(signum, frame): 13 | exit(0) 14 | 15 | 16 | def main(): 17 | """Console script for magzdb.""" 18 | parser = argparse.ArgumentParser(description="Magzdb.org Downloader") 19 | 20 | parser.add_argument( 21 | "-V", 22 | "--version", 23 | action="version", 24 | help="Print program version and exit", 25 | version=__version__, 26 | ) 27 | 28 | parser.add_argument( 29 | "-i", 30 | "--id", 31 | help="ID of the Magazine to Download. eg. http://magzdb.org/j/.", 32 | metavar="MAGAZINE_ID", 33 | required=True, 34 | type=str, 35 | ) 36 | 37 | parser.add_argument( 38 | "-e", 39 | "--editions", 40 | help="Select Edition", 41 | metavar="EDITION", 42 | nargs="*", 43 | type=str, 44 | ) 45 | 46 | parser.add_argument( 47 | "-f", "--filter", help="Use filter. See README#Filters", type=str, default=None, 48 | ) 49 | 50 | parser.add_argument( 51 | "-l", "--latest", action="store_true", help="Download only latest edition.", 52 | ) 53 | 54 | parser.add_argument( 55 | "-P", 56 | "--directory-prefix", 57 | help="Download directory.", 58 | metavar="DIRECTORY_PREFIX", 59 | type=None, 60 | ) 61 | 62 | parser.add_argument( 63 | "--downloader", 64 | help="Use External downloader (RECOMMENDED). Currently supported: aria2, wget", 65 | metavar="DOWNLOADER", 66 | choices=["aria2", "wget", "curl", "self"], 67 | default="self", 68 | ) 69 | 70 | parser.add_argument( 71 | "--debug", help="Print debug information.", action="store_true", 72 | ) 73 | 74 | parser.add_argument( 75 | "--skip-download", help="Don't download files.", action="store_true", 76 | ) 77 | 78 | args = parser.parse_args() 79 | 80 | if args.downloader == "self": 81 | logger.warning("Use of external downloader like wget or aria2 is recommended") 82 | 83 | dl = Magzdb( 84 | directory_prefix=args.directory_prefix, 85 | downloader=args.downloader, 86 | debug=args.debug, 87 | skip_download=args.skip_download, 88 | ) 89 | 90 | signal.signal(signal.SIGINT, handler) 91 | 92 | dl.download( 93 | id=args.id, 94 | editions=args.editions or list(), 95 | latest_only=args.latest, 96 | filter=args.filter, 97 | ) 98 | 99 | return 0 100 | 101 | 102 | if __name__ == "__main__": 103 | sys.exit(main()) # pragma: no cover 104 | -------------------------------------------------------------------------------- /tests/test_magzdb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Tests for `magzdb` package.""" 3 | import os 4 | import shutil 5 | import unittest 6 | 7 | import pytest 8 | 9 | from magzdb import Magzdb 10 | from magzdb.magzdb import Magzdb 11 | 12 | 13 | class TestMagzdb(unittest.TestCase): 14 | """Tests for `magzdb` package.""" 15 | 16 | def setUp(self): 17 | """Set up test fixtures""" 18 | self.data_dir = ".test-data" 19 | self.title = "Nature" 20 | self.magzdb = Magzdb( 21 | directory_prefix=self.data_dir, debug=True, skip_download=True 22 | ) 23 | self.magzdbDLWget = Magzdb( 24 | directory_prefix=self.data_dir, 25 | downloader="wget", 26 | debug=True, 27 | skip_download=True, 28 | ) 29 | self.magzdbDLAria2 = Magzdb( 30 | directory_prefix=self.data_dir, 31 | downloader="aria2", 32 | debug=True, 33 | skip_download=True, 34 | ) 35 | self.magzdbDLCurl = Magzdb( 36 | directory_prefix=self.data_dir, 37 | downloader="curl", 38 | debug=True, 39 | skip_download=True, 40 | ) 41 | self.magzdbNoDL = Magzdb( 42 | directory_prefix=self.data_dir, 43 | downloader="wget", 44 | debug=True, 45 | skip_download=True, 46 | ) 47 | 48 | def tearDown(self): 49 | """Tear down test fixtures.""" 50 | self.magzdb.request.close() 51 | self.magzdbDLWget.request.close() 52 | self.magzdbDLAria2.request.close() 53 | self.magzdbDLCurl.request.close() 54 | self.magzdbNoDL.request.close() 55 | if os.path.isdir(os.path.join(self.data_dir)): 56 | shutil.rmtree(os.path.join(self.data_dir)) 57 | 58 | def test_download_internal(self): 59 | """Test download.""" 60 | self.magzdb.download(id="2249", editions=["2716361"]) 61 | self.magzdb.download(id="2490", editions=["3694138"]) 62 | 63 | def test_download_latest(self): 64 | """Test download.""" 65 | self.magzdb.download(id="2249", editions=["2716361"], latest_only=True) 66 | 67 | def test_download_wget(self): 68 | """Test download.""" 69 | self.magzdbDLWget.download(id="2249", editions=["2716361"]) 70 | 71 | def test_download_curl(self): 72 | """Test download.""" 73 | self.magzdbDLCurl.download(id="2249", editions=["2716361"]) 74 | 75 | def test_download_aria2c(self): 76 | """Test download.""" 77 | self.magzdbDLAria2.download(id="2249", editions=["2716361"]) 78 | 79 | def test_issue_count(self): 80 | """Test download.""" 81 | title, editions = self.magzdb.get_editions(id="1826") 82 | self.assertTrue(len(editions) >= 2441) 83 | self.assertEqual(title, self.title) 84 | 85 | def test_issue_filter(self): 86 | """Test download.""" 87 | self.magzdbNoDL.download(id="2249", filter="eid == 2716361") 88 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions are welcome, and they are greatly appreciated! Every little bit 4 | helps, and credit will always be given. 5 | 6 | You can contribute in many ways: 7 | 8 | ## Types of Contributions 9 | 10 | ### Report Bugs 11 | 12 | Report bugs at https://github.com/skyme5/magzdb/issues. 13 | 14 | If you are reporting a bug, please include: 15 | 16 | * Your operating system name and version. 17 | * Any details about your local setup that might be helpful in troubleshooting. 18 | * Detailed steps to reproduce the bug. 19 | 20 | ### Fix Bugs 21 | 22 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help 23 | wanted" is open to whoever wants to implement it. 24 | 25 | ### Implement Features 26 | 27 | Look through the GitHub issues for features. Anything tagged with "enhancement" 28 | and "help wanted" is open to whoever wants to implement it. 29 | 30 | ### Write Documentation 31 | 32 | magzdb could always use more documentation, whether as part of the 33 | official magzdb docs, in docstrings, or even on the web in blog posts, 34 | articles, and such. 35 | 36 | ### Submit Feedback 37 | 38 | The best way to send feedback is to file an issue at https://github.com/skyme5/magzdb/issues. 39 | 40 | If you are proposing a feature: 41 | 42 | * Explain in detail how it would work. 43 | * Keep the scope as narrow as possible, to make it easier to implement. 44 | * Remember that this is a volunteer-driven project, and that contributions 45 | are welcome :) 46 | 47 | ## Get Started! 48 | 49 | Ready to contribute? Here's how to set up `magzdb` for local development. 50 | 51 | 1. Fork the `magzdb` repo on GitHub. 52 | 2. Clone your fork locally:: 53 | 54 | ```shell 55 | $ git clone git@github.com:your_name_here/magzdb.git 56 | ``` 57 | 58 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: 59 | 60 | ```shell 61 | $ mkvirtualenv magzdb 62 | $ cd magzdb/ 63 | $ python setup.py develop 64 | ``` 65 | 66 | 4. Create a branch for local development:: 67 | 68 | ```shell 69 | $ git checkout -b name-of-your-bugfix-or-feature 70 | ``` 71 | 72 | Now you can make your changes locally. 73 | 74 | 5. When you're done making changes, check that your changes pass flake8 and the 75 | tests, including testing other Python versions with tox:: 76 | 77 | ```shell 78 | $ flake8 magzdb tests 79 | $ python setup.py test or pytest 80 | $ tox 81 | ``` 82 | 83 | To get flake8 and tox, just pip install them into your virtualenv. 84 | 85 | 6. Commit your changes and push your branch to GitHub:: 86 | 87 | ```shell 88 | $ git add . 89 | $ git commit -m "Your detailed description of your changes." 90 | $ git push origin name-of-your-bugfix-or-feature 91 | ``` 92 | 93 | 7. Submit a pull request through the GitHub website. 94 | 95 | ## Pull Request Guidelines 96 | 97 | Before you submit a pull request, check that it meets these guidelines: 98 | 99 | 1. The pull request should include tests. 100 | 2. If the pull request adds functionality, the docs should be updated. Put 101 | your new functionality into a function with a docstring, and add the 102 | feature to the list in README.rst. 103 | 3. The pull request should work for Python 3.5, 3.6, 3.7 and 3.8, and for PyPy. Check 104 | https://travis-ci.com/skyme5/magzdb/pull_requests 105 | and make sure that the tests pass for all supported Python versions. 106 | 107 | ## Tips 108 | 109 | To run tests 110 | 111 | ```shell 112 | $ pytest . 113 | ``` 114 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 |

3 |

4 | magzdb - magzdb.org Downloader

5 | 6 | Python Package 10 | 11 | 12 | CI 16 | 17 | 18 | Code Coverage 22 | 23 | 24 | Python Versions 28 | 29 | 30 | The Uncompromising Code Formatter 34 | 35 | 36 | Monthly Downloads 40 | 41 | 42 | License: MIT 46 | 47 |

48 | Buy Me A Coffee 49 |
50 | 51 |

52 | 53 | ### Installation 54 | 55 | Install using pip 56 | 57 | ```bash 58 | $ pip install -U magzdb 59 | ``` 60 | 61 | 62 | ### Usage 63 | 64 | ```text 65 | usage: magzdb [-h] [-V] -i MAGAZINE_ID [-e [EDITION [EDITION ...]]] 66 | [-f FILTER] [-l] [-P DIRECTORY_PREFIX] [--downloader DOWNLOADER] 67 | [--debug] 68 | 69 | Magzdb.org Downloader 70 | 71 | required arguments: 72 | -i MAGAZINE_ID, --id MAGAZINE_ID 73 | ID of the Magazine to Download. eg. http://magzdb.org/j/. 74 | 75 | optional arguments: 76 | -h, --help show this help message and exit 77 | -V, --version Print program version and exit 78 | -e [EDITION [EDITION ...]], --editions [EDITION [EDITION ...]] 79 | Select Edition 80 | -f FILTER, --filter FILTER 81 | Use filter. See README#Filters 82 | -l, --latest Download only latest edition. 83 | -P DIRECTORY_PREFIX, --directory-prefix DIRECTORY_PREFIX 84 | Download directory. 85 | --downloader DOWNLOADER 86 | Use External downloader (RECOMMENDED). Currently supported: aria2, wget, curl 87 | --debug Print debug information. 88 | --skip-download Don't download files. 89 | ``` 90 | 91 | ### Usage Examples 92 | 93 | #### Docker 94 | 95 | ```bash 96 | docker build . -t magzdb 97 | docker run -v $(PWD):/tmp magzdb -h 98 | 99 | # Add alias to shell 100 | alias magzdb="docker run -v $(PWD):/tmp magzdb" 101 | magzdb -h 102 | ``` 103 | 104 | #### Download all editions 105 | 106 | ```bash 107 | $ magzdb -i 1826 108 | ``` 109 | 110 | #### Filters 111 | 112 | You can supply filter using `-f`, for example to download issues between 113 | `4063895` and `4063901`, you can write as 114 | 115 | ```bash 116 | $ magzdb -i 1826 -f "eid > 4063895 and eid < 4063901" 117 | ``` 118 | 119 | You can use `eid`, `year` in the filter expression. 120 | 121 | ##### More examples of filter expression 122 | 123 | - `eid > 4063895 and eid < 4063901` or `eid >= 4063895 and eid <= 4063901` 124 | - `eid >= 4063895` or `eid != 4063895` 125 | - `year >= 2018`, `year <= 2018`, `year == 2018` or even `year != 2018` 126 | 127 | #### Download only latest edition 128 | 129 | ```bash 130 | $ magzdb -i 1826 -l 131 | ``` 132 | 133 | #### Download only latest edition with custom location `magazine` 134 | 135 | ```bash 136 | $ magzdb -i 1826 -l -P magazine 137 | ``` 138 | 139 | #### Use external downloader 140 | 141 | ```bash 142 | $ magzdb -i 1826 -l -P magazine --downloader wget 143 | ``` 144 | 145 | > This is recommended since internal downloader does not support resuming interrupted downloads. 146 | 147 | ### Python Installation Recommendation 148 | 149 | If you don't want to install official [Python](https://www.python.org/downloads/) to your system (global). 150 | You can install [pyenv installer](https://github.com/pyenv/pyenv-installer) environment under your specific account. It's prefered method for macOS users, because High Sierra and later macOS ships with old Python 2.7.10. 151 | 152 | ## Contributing 153 | 154 | Found a bug or missing a feature you are more than welcome to contribute. 155 | 156 | 157 | 158 | 159 | 160 | ## License 161 | 162 | MIT 163 | -------------------------------------------------------------------------------- /magzdb/magzdb.py: -------------------------------------------------------------------------------- 1 | """Main module.""" 2 | import os 3 | import re 4 | import subprocess 5 | 6 | import requests 7 | from loguru import logger 8 | 9 | from magzdb.downloader import download_file 10 | from magzdb.downloader import DOWNLOADER_LIST 11 | from magzdb.downloader import external_downloader 12 | 13 | 14 | class Magzdb: 15 | """Magzdb Downloader.""" 16 | 17 | def __init__( 18 | self, 19 | directory_prefix=None, 20 | downloader="self", 21 | debug=False, 22 | skip_download=False, 23 | ): 24 | """Set global options. 25 | 26 | Args: 27 | directory_prefix (str): Directory prefix for downloading. Defaults to current directory. 28 | downloader (str): One of self, aria2, wget. Defaults to self. 29 | debug (bool, optional): logger.error debug information. Defaults to False. 30 | skip_download (bool, optional): skip downloading 31 | """ 32 | self.directory_prefix = directory_prefix or os.getcwd() 33 | self.downloader = downloader if downloader in DOWNLOADER_LIST else "self" 34 | self.debug = debug 35 | self.skip_download = skip_download 36 | 37 | self.REGEX_TITLE = re.compile( 38 | r"""(?P<title>[^|]+)\|\s+magzDB""", 39 | flags=re.IGNORECASE | re.MULTILINE, 40 | ) 41 | self.REGEX_EDITION = re.compile( 42 | r"""\d+)"\s*title="(?P\d+)[^"]+">", ">=", "=="] 90 | number = re.compile(r"^[-+]?([1-9]\d*|0)$") 91 | filter_split = re.findall( 92 | r"(\w+|<=|>=|<|>|==|!=)\s*(\d+)?", filter_str.lower() 93 | ) 94 | flat_filter_split = [ 95 | item.strip() 96 | for sublist in filter_split 97 | for item in sublist 98 | if item != "" 99 | ] 100 | filter = " ".join( 101 | [ 102 | e 103 | for e in flat_filter_split 104 | if e in allowed_tokens or re.match(number, e) 105 | ] 106 | ) 107 | return filter 108 | 109 | def eval_filter(filter_str, params): 110 | eid, year, *_ = params 111 | filter = filter_str.replace("eid", eid) 112 | filter = filter.replace("year", year) 113 | return eval(filter) 114 | 115 | if editions is not None and len(editions) > 0: 116 | return [e for e in all_editions if e[0] in editions] 117 | 118 | if filter is not None: 119 | filter = prepare_filter(filter) 120 | self._print("Filter prepared: `{}`".format(filter)) 121 | return [e for e in all_editions if eval_filter(filter, e)] 122 | 123 | return all_editions 124 | 125 | def get_valid_filename(self, s): 126 | """Return the given string converted to a string that can be used for a clean filename. 127 | 128 | Remove leading and trailing spaces; convert other spaces to 129 | underscores; and remove anything that is not an alphanumeric, dash, 130 | underscore, or dot. 131 | >>> get_valid_filename("john's portrait in 2004.jpg") 132 | 'johns_portrait_in_2004.jpg' 133 | """ 134 | s = str(s).strip().replace(" ", "_") 135 | return re.sub(r"(?u)[^-\w.]", "", s) 136 | 137 | def format_filename(self, title, year, eid, ext=".pdf"): 138 | """Return formatted title""" 139 | return self.get_valid_filename(f"#{title} #{year} No #{eid}") + ext 140 | 141 | def get_editions(self, id: str): 142 | """Get title and editions for `id`. 143 | 144 | If list of editions is provided then returns only those. 145 | 146 | Args: 147 | id (str): Magazine ID 148 | 149 | Raises: 150 | Exception: re.error 151 | Exception: requests.ConnectionError 152 | Exception: requests.HTTPError 153 | 154 | Returns: 155 | Tuple[str, list]: Tuple of title and editions found from magzdb 156 | """ 157 | try: 158 | docstring = self.request.get("http://magzdb.org/j/" + id).text 159 | title = re.search(self.REGEX_TITLE, docstring).group("title") 160 | editions = re.findall(self.REGEX_EDITION, docstring) 161 | 162 | return (title.strip(), editions) 163 | except re.error as e: 164 | logger.error(e) 165 | raise Exception("REGEX error.") 166 | except requests.ConnectionError as e: 167 | logger.error(e) 168 | raise Exception("Connection error encountered.") 169 | except requests.HTTPError as e: 170 | logger.error(e) 171 | raise Exception("HTTP Error encountered.") 172 | 173 | def download( 174 | self, id: str, editions=list(), latest_only=False, filter=None, 175 | ): 176 | """Download Editions.""" 177 | title, all_editions = self.get_editions(id=id) 178 | title = self.get_valid_filename(title) 179 | directory = os.path.join(self.directory_prefix, title) 180 | 181 | if not os.path.exists(directory): 182 | os.makedirs(directory, exist_ok=True) 183 | 184 | selected_editions = self.apply_filter(all_editions, editions, filter) 185 | 186 | logger.info("Found {} editions of {}".format(len(selected_editions), title)) 187 | 188 | if latest_only: 189 | selected_editions = selected_editions[-1:] 190 | 191 | for edition in list(reversed(selected_editions)): 192 | eid, year, *_ = edition 193 | 194 | logger.info("Downloading year {} id {}".format(year, eid)) 195 | 196 | for dowload_id in self._html_regex( 197 | self.EDITION_DOWNLOAD_PAGE.format(eid), 198 | r"""\d+)\/dl>""", 199 | ): 200 | self._print("Download Link ID: {}".format(dowload_id)) 201 | 202 | download_url_list = self._html_regex( 203 | self.EDITION_DOWNLOAD_URL.format(dowload_id), 204 | r'''http[^\"]*(?:\.\w+)?)"''', 205 | ) 206 | 207 | if not download_url_list: 208 | continue 209 | 210 | download_url = download_url_list[0] 211 | 212 | self._print("Download URL: {}".format(download_url)) 213 | 214 | filename = self.format_filename(title, year, eid) 215 | filepath = os.path.join(directory, filename) 216 | print(filepath) 217 | 218 | if self.downloader == "self": 219 | if not self.skip_download: 220 | download_file(download_url, filepath) 221 | else: 222 | if not self.skip_download: # pragma: no cover 223 | subprocess.call( 224 | external_downloader( 225 | directory, 226 | filename, 227 | download_url, 228 | self.downloader, 229 | self.debug, 230 | ) 231 | ) 232 | --------------------------------------------------------------------------------