├── test ├── __init__.py ├── test_config.py ├── test_oauth.py └── test_cookies.py ├── requirements.txt ├── setup.cfg ├── bin └── gallery-dl ├── scripts ├── hook-gallery_dl.py ├── util.py ├── pyinstaller.py ├── run_tests.sh ├── build_testresult_db.py ├── bash_completion.py ├── create_test_data.py └── release.sh ├── gallery_dl ├── version.py ├── __main__.py ├── downloader │ ├── text.py │ ├── __init__.py │ ├── common.py │ └── ytdl.py ├── postprocessor │ ├── mtime.py │ ├── common.py │ ├── __init__.py │ ├── classify.py │ ├── metadata.py │ ├── exec.py │ └── zip.py ├── extractor │ ├── 8chan.py │ ├── 4chan.py │ ├── mangapanda.py │ ├── ngomik.py │ ├── recursive.py │ ├── message.py │ ├── mangastream.py │ ├── chan.py │ ├── idolcomplex.py │ ├── directlink.py │ ├── adultempire.py │ ├── nsfwalbum.py │ ├── imgth.py │ ├── mangafox.py │ ├── rule34.py │ ├── lineblog.py │ ├── erolord.py │ ├── yandere.py │ ├── safebooru.py │ ├── senmanga.py │ ├── e621.py │ ├── hypnohub.py │ ├── test.py │ ├── khinsider.py │ ├── gfycat.py │ ├── konachan.py │ ├── 3dbooru.py │ ├── 2chan.py │ ├── slideshare.py │ ├── danbooru.py │ ├── vanillarock.py │ ├── myportfolio.py │ ├── hentaicafe.py │ ├── readcomiconline.py │ ├── hentainexus.py │ ├── fuskator.py │ ├── hentai2read.py │ ├── pururin.py │ ├── hbrowse.py │ ├── hentaihere.py │ ├── fallenangels.py │ ├── bobx.py │ ├── warosu.py │ ├── paheal.py │ ├── komikcast.py │ ├── plurk.py │ ├── sankakucomplex.py │ └── shopify.py ├── exception.py ├── config.py └── oauth.py ├── .travis.yml ├── .gitignore ├── Makefile ├── snap ├── local │ └── launchers │ │ └── gallery-dl-launch └── snapcraft.yaml └── setup.py /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.11.0 2 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = gallery_dl/__init__.py,gallery_dl/__main__.py,setup.py,build,scripts,archive 3 | ignore = E203,E226,W504 4 | -------------------------------------------------------------------------------- /bin/gallery-dl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: UTF-8 -*- 3 | 4 | import gallery_dl 5 | 6 | if __name__ == '__main__': 7 | gallery_dl.main() 8 | -------------------------------------------------------------------------------- /scripts/hook-gallery_dl.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from gallery_dl import extractor, downloader, postprocessor 4 | 5 | hiddenimports = [ 6 | package.__name__ + "." + module 7 | for package in (extractor, downloader, postprocessor) 8 | for module in package.modules 9 | ] 10 | -------------------------------------------------------------------------------- /scripts/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import sys 4 | import os.path 5 | 6 | ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 7 | sys.path.insert(0, os.path.realpath(ROOTDIR)) 8 | 9 | 10 | def path(*segments, join=os.path.join): 11 | return join(ROOTDIR, *segments) 12 | -------------------------------------------------------------------------------- /gallery_dl/version.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2016-2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | __version__ = "1.11.0-dev" 10 | -------------------------------------------------------------------------------- /scripts/pyinstaller.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """Build a standalone executable using PyInstaller""" 5 | 6 | import PyInstaller.__main__ 7 | import util 8 | 9 | PyInstaller.__main__.run([ 10 | "--onefile", 11 | "--console", 12 | "--name", "gallery-dl." + ("exe" if PyInstaller.is_win else "bin"), 13 | "--additional-hooks-dir", util.path("scripts"), 14 | "--distpath", util.path("dist"), 15 | "--workpath", util.path("build"), 16 | "--specpath", util.path("build"), 17 | util.path("gallery_dl", "__main__.py"), 18 | ]) 19 | -------------------------------------------------------------------------------- /gallery_dl/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2017 Mike Fährmann 5 | # 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 2 as 8 | # published by the Free Software Foundation. 9 | 10 | import sys 11 | 12 | if __package__ is None and not hasattr(sys, "frozen"): 13 | import os.path 14 | path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 15 | sys.path.insert(0, os.path.realpath(path)) 16 | 17 | import gallery_dl 18 | 19 | if __name__ == "__main__": 20 | gallery_dl.main() 21 | -------------------------------------------------------------------------------- /scripts/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | 5 | TESTS_CORE=(config cookies downloader extractor oauth postprocessor text util) 6 | TESTS_RESULTS=(results) 7 | 8 | 9 | # select tests 10 | case "${1:-${GALLERYDL_TESTS:-core}}" in 11 | core) TESTS=( ${TESTS_CORE[@]} );; 12 | results) TESTS=( ${TESTS_RESULTS[@]} );; 13 | *) TESTS=( );; 14 | esac 15 | 16 | 17 | # transform each array element to test_###.py 18 | TESTS=( ${TESTS[@]/#/test_} ) 19 | TESTS=( ${TESTS[@]/%/.py} ) 20 | 21 | 22 | # run 'nosetests' with selected tests 23 | # (or all tests if ${TESTS} is empty) 24 | nosetests --verbose -w "${DIR}/../test" ${TESTS[@]} 25 | -------------------------------------------------------------------------------- /gallery_dl/downloader/text.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2014-2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Downloader module for text: URLs""" 10 | 11 | from .common import DownloaderBase 12 | 13 | 14 | class TextDownloader(DownloaderBase): 15 | scheme = "text" 16 | 17 | def download(self, url, pathfmt): 18 | if self.part: 19 | pathfmt.part_enable(self.partdir) 20 | self.out.start(pathfmt.path) 21 | with pathfmt.open("wb") as file: 22 | file.write(url.encode()[5:]) 23 | return True 24 | 25 | 26 | __downloader__ = TextDownloader 27 | -------------------------------------------------------------------------------- /gallery_dl/postprocessor/mtime.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Use metadata as file modification time""" 10 | 11 | from .common import PostProcessor 12 | from ..text import parse_int 13 | 14 | 15 | class MtimePP(PostProcessor): 16 | 17 | def __init__(self, pathfmt, options): 18 | PostProcessor.__init__(self) 19 | self.key = options.get("key", "date") 20 | 21 | def run(self, pathfmt): 22 | mtime = pathfmt.kwdict.get(self.key) 23 | ts = getattr(mtime, "timestamp", None) 24 | pathfmt.kwdict["_mtime"] = ts() if ts else parse_int(mtime) 25 | 26 | 27 | __postprocessor__ = MtimePP 28 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.4" 4 | - "3.5" 5 | - "3.6" 6 | - "pypy3" 7 | env: 8 | - GALLERYDL_TESTS=core 9 | matrix: 10 | include: 11 | - python: "3.7" 12 | dist: xenial 13 | - python: "3.8-dev" 14 | dist: xenial 15 | - python: "3.6" 16 | env: GALLERYDL_TESTS=results 17 | - language: minimal 18 | dist: xenial 19 | env: GALLERYDL_TESTS=snap 20 | addons: 21 | snaps: 22 | - name: snapcraft 23 | classic: true 24 | install: 25 | - true 26 | script: 27 | - sudo apt update 28 | - snapcraft --destructive-mode 29 | - sudo snap try 30 | - snap run gallery-dl --verbose https://twitter.com/ubuntu/status/1121001597092364288 31 | 32 | git: 33 | depth: 3 34 | quiet: true 35 | branches: 36 | only: 37 | - master 38 | - /^v\d+\.\d+\.\d+(-\S*)?$/ 39 | - /^test(-\w+)+$/ 40 | 41 | install: 42 | - pip install -r requirements.txt pyOpenSSL 43 | script: 44 | - ./scripts/run_tests.sh 45 | -------------------------------------------------------------------------------- /gallery_dl/postprocessor/common.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2018-2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Common classes and constants used by postprocessor modules.""" 10 | 11 | import logging 12 | 13 | 14 | class PostProcessor(): 15 | """Base class for postprocessors""" 16 | 17 | def __init__(self): 18 | name = self.__class__.__name__[:-2].lower() 19 | self.log = logging.getLogger("postprocessor." + name) 20 | 21 | @staticmethod 22 | def prepare(pathfmt): 23 | """Update file paths, etc.""" 24 | 25 | @staticmethod 26 | def run(pathfmt): 27 | """Execute the postprocessor for a file""" 28 | 29 | @staticmethod 30 | def run_after(pathfmt): 31 | """Execute postprocessor after moving a file to its target location""" 32 | 33 | @staticmethod 34 | def finalize(): 35 | """Cleanup""" 36 | 37 | def __repr__(self): 38 | return self.__class__.__name__ 39 | -------------------------------------------------------------------------------- /gallery_dl/extractor/8chan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2014-2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Extract images and videos from https://8ch.net/""" 10 | 11 | from . import chan 12 | 13 | 14 | class InfinitychanThreadExtractor(chan.ChanThreadExtractor): 15 | """Extractor for images from threads from 8ch.net""" 16 | category = "8chan" 17 | filename_fmt = "{time}-{filename}{ext}" 18 | pattern = r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+)" 19 | test = ("https://8ch.net/builders/res/3.html", { 20 | "url": "5d85c0509f907f217aea379f862b41bf3d01f645", 21 | "keyword": "0c497190c0c0f826925fde09815351d01869c783", 22 | }) 23 | api_url = "https://8ch.net/{board}/res/{thread}.json" 24 | file_url = "https://media.8ch.net/{board}/src/{tim}{ext}" 25 | file_url_v2 = "https://media.8ch.net/file_store/{tim}{ext}" 26 | 27 | def build_url(self, post): 28 | fmt = self.file_url if len(post["tim"]) < 64 else self.file_url_v2 29 | return fmt.format_map(post) 30 | -------------------------------------------------------------------------------- /gallery_dl/postprocessor/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2018-2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Post-processing modules""" 10 | 11 | import importlib 12 | import logging 13 | 14 | modules = [ 15 | "classify", 16 | "exec", 17 | "metadata", 18 | "mtime", 19 | "ugoira", 20 | "zip", 21 | ] 22 | 23 | log = logging.getLogger("postprocessor") 24 | 25 | 26 | def find(name): 27 | """Return a postprocessor class with the given name""" 28 | try: 29 | return _cache[name] 30 | except KeyError: 31 | pass 32 | 33 | klass = None 34 | if name in modules: # prevent unwanted imports 35 | try: 36 | module = importlib.import_module("." + name, __package__) 37 | except ImportError: 38 | pass 39 | else: 40 | klass = module.__postprocessor__ 41 | _cache[name] = klass 42 | return klass 43 | 44 | 45 | # -------------------------------------------------------------------- 46 | # internals 47 | 48 | _cache = {} 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | archive/ 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | # Manpages 62 | gallery-dl.1 63 | gallery-dl.conf.5 64 | 65 | # Bash completion 66 | gallery-dl.bash_completion 67 | 68 | # Snap packaging specific 69 | /snap/.snapcraft/ 70 | /parts/ 71 | /stage/ 72 | /prime/ 73 | 74 | /*.snap 75 | /*_source.tar.bz2 76 | -------------------------------------------------------------------------------- /gallery_dl/downloader/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2015-2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Downloader modules""" 10 | 11 | import importlib 12 | 13 | modules = [ 14 | "http", 15 | "text", 16 | "ytdl", 17 | ] 18 | 19 | 20 | def find(scheme): 21 | """Return downloader class suitable for handling the given scheme""" 22 | try: 23 | return _cache[scheme] 24 | except KeyError: 25 | pass 26 | 27 | klass = None 28 | if scheme == "https": 29 | scheme = "http" 30 | if scheme in modules: # prevent unwanted imports 31 | try: 32 | module = importlib.import_module("." + scheme, __package__) 33 | except ImportError: 34 | pass 35 | else: 36 | klass = module.__downloader__ 37 | 38 | if scheme == "http": 39 | _cache["http"] = _cache["https"] = klass 40 | else: 41 | _cache[scheme] = klass 42 | return klass 43 | 44 | 45 | # -------------------------------------------------------------------- 46 | # internals 47 | 48 | _cache = {} 49 | -------------------------------------------------------------------------------- /gallery_dl/downloader/common.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2014-2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Common classes and constants used by downloader modules.""" 10 | 11 | import os 12 | import logging 13 | from .. import config, util 14 | 15 | 16 | class DownloaderBase(): 17 | """Base class for downloaders""" 18 | scheme = "" 19 | 20 | def __init__(self, extractor, output): 21 | self.session = extractor.session 22 | self.out = output 23 | self.log = logging.getLogger("downloader." + self.scheme) 24 | self.part = self.config("part", True) 25 | self.partdir = self.config("part-directory") 26 | 27 | if self.partdir: 28 | self.partdir = util.expand_path(self.partdir) 29 | os.makedirs(self.partdir, exist_ok=True) 30 | 31 | def config(self, key, default=None): 32 | """Interpolate downloader config value for 'key'""" 33 | return config.interpolate(("downloader", self.scheme, key), default) 34 | 35 | def download(self, url, pathfmt): 36 | """Write data from 'url' into the file specified by 'pathfmt'""" 37 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | PREFIX ?= /usr/local 3 | BINDIR ?= $(PREFIX)/bin 4 | MANDIR ?= $(PREFIX)/man 5 | SHAREDIR ?= $(PREFIX)/share 6 | PYTHON ?= /usr/bin/env python3 7 | 8 | # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local 9 | SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) 10 | 11 | all: man completion docs/supportedsites.rst 12 | 13 | clean: 14 | $(RM) gallery-dl.1 gallery-dl.conf.5 gallery-dl.bash_completion 15 | $(RM) -r build/ 16 | 17 | install: man completion 18 | $(PYTHON) setup.py install 19 | 20 | release: man completion docs/supportedsites.rst 21 | scripts/release.sh 22 | 23 | test: 24 | scripts/run_tests.sh 25 | 26 | executable: 27 | scripts/pyinstaller.py 28 | 29 | completion: gallery-dl.bash_completion 30 | 31 | man: gallery-dl.1 gallery-dl.conf.5 32 | 33 | .PHONY: all clean install release test executable completion man 34 | 35 | docs/supportedsites.rst: gallery_dl/*/*.py scripts/supportedsites.py 36 | $(PYTHON) scripts/supportedsites.py 37 | 38 | gallery-dl.1: gallery_dl/option.py scripts/man.py 39 | $(PYTHON) scripts/man.py 40 | 41 | gallery-dl.conf.5: docs/configuration.rst scripts/man.py 42 | $(PYTHON) scripts/man.py 43 | 44 | gallery-dl.bash_completion: gallery_dl/option.py scripts/bash_completion.py 45 | $(PYTHON) scripts/bash_completion.py 46 | -------------------------------------------------------------------------------- /snap/local/launchers/gallery-dl-launch: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This is the maintainence launcher for the snap, make necessary runtime environment changes to make the snap work here. You may also insert security confinement/deprecation/obsoletion notice of the snap here. 3 | 4 | set \ 5 | -o errexit \ 6 | -o errtrace \ 7 | -o nounset \ 8 | -o pipefail 9 | 10 | # Use user's real home directory for canonical configuration path access 11 | declare REALHOME="$( 12 | getent passwd "${USER}" \ 13 | | cut --delimiter=: --fields=6 14 | )" 15 | HOME="${REALHOME}" 16 | 17 | if ! test -f "${SNAP_USER_COMMON}"/marker_disable_interface_warning; then 18 | # Warn if the `removable-media` interface isn't connected 19 | if ! ls /media &>/dev/null; then 20 | printf -- \ 21 | "It seems that this snap isn't connected to the \`removable-media\` security confinement interface. If you want to save the files under \`/media\`, \`/run/media\`, or \`/mnt\` directories you need to connect this snap to the \`removable-media\` interface by running the following command in a terminal:\\n\\n sudo snap connect %s:removable-media\\n\\n" \ 22 | "${SNAP_NAME}" \ 23 | >&2 24 | printf -- \ 25 | "To disable this warning create an empty file at the following path:\\n\\n %s/marker_disable_interface_warning\\n\\n" \ 26 | "${SNAP_USER_COMMON}" \ 27 | >&2 28 | fi 29 | fi 30 | 31 | # Finally run the next part of the command chain 32 | exec "${@}" 33 | -------------------------------------------------------------------------------- /gallery_dl/extractor/4chan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2015-2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Extract images and videos from https://www.4chan.org/""" 10 | 11 | from . import chan 12 | from .. import text 13 | 14 | 15 | class FourchanThreadExtractor(chan.ChanThreadExtractor): 16 | """Extractor for images from threads from 4chan.org""" 17 | category = "4chan" 18 | pattern = (r"(?:https?://)?boards\.4chan(?:nel)?\.org" 19 | r"/([^/]+)/thread/(\d+)") 20 | test = ( 21 | ("https://boards.4chan.org/tg/thread/15396072/", { 22 | "url": "39082ad166161966d7ba8e37f2173a824eb540f0", 23 | "keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a", 24 | "content": "20b7b51afa51c9c31a0020a0737b889532c8d7ec", 25 | }), 26 | ("https://boards.4channel.org/tg/thread/15396072/", { 27 | "url": "39082ad166161966d7ba8e37f2173a824eb540f0", 28 | "keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a", 29 | }), 30 | ) 31 | api_url = "https://a.4cdn.org/{board}/thread/{thread}.json" 32 | file_url = "https://i.4cdn.org/{board}/{tim}{ext}" 33 | 34 | def update(self, post, data=None): 35 | chan.ChanThreadExtractor.update(self, post, data) 36 | post["filename"] = text.unescape(post["filename"]) 37 | -------------------------------------------------------------------------------- /gallery_dl/extractor/mangapanda.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2015-2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Extract manga-chapters and entire manga from https://www.mangapanda.com/""" 10 | 11 | from .mangareader import MangareaderMangaExtractor, MangareaderChapterExtractor 12 | 13 | 14 | class MangapandaBase(): 15 | """Base class for mangapanda extractors""" 16 | category = "mangapanda" 17 | root = "https://www.mangapanda.com" 18 | 19 | 20 | class MangapandaChapterExtractor(MangapandaBase, MangareaderChapterExtractor): 21 | """Extractor for manga-chapters from mangapanda.com""" 22 | pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?&#]+)/(\d+))" 23 | test = ("https://www.mangapanda.com/red-storm/2", { 24 | "url": "1f633f776e950531ba9b1e81965316458e785261", 25 | "keyword": "b24df4b9cc36383fb6a44e06d32a3884a4dcb5fb", 26 | }) 27 | 28 | 29 | class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor): 30 | """Extractor for manga from mangapanda.com""" 31 | chapterclass = MangapandaChapterExtractor 32 | pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com(/[^/?&#]+)/?$" 33 | test = ("https://www.mangapanda.com/mushishi", { 34 | "url": "357f965732371cac1990fee8b480f62e29141a42", 35 | "keyword": "031b3ea085921c552de017ecbb9b906e462229c9", 36 | }) 37 | -------------------------------------------------------------------------------- /scripts/build_testresult_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """Collect results of extractor unit tests""" 5 | 6 | import sys 7 | import os.path 8 | import datetime 9 | 10 | import util 11 | from gallery_dl import extractor, job, config 12 | from test.test_results import setup_test_config 13 | 14 | 15 | # filter test cases 16 | 17 | tests = [ 18 | (idx, extr, url, result) 19 | 20 | for extr in extractor.extractors() 21 | if hasattr(extr, "test") and extr.test 22 | if len(sys.argv) <= 1 or extr.category in sys.argv 23 | 24 | for idx, (url, result) in enumerate(extr._get_tests()) 25 | if result 26 | ] 27 | 28 | 29 | # setup target directory 30 | 31 | path = util.path("archive", "testdb", str(datetime.date.today())) 32 | os.makedirs(path, exist_ok=True) 33 | 34 | 35 | for idx, extr, url, result in tests: 36 | 37 | # filename 38 | name = "{}-{}-{}.json".format(extr.category, extr.subcategory, idx) 39 | print(name) 40 | 41 | # config values 42 | setup_test_config() 43 | 44 | if "options" in result: 45 | for key, value in result["options"]: 46 | config.set(key.split("."), value) 47 | if "range" in result: 48 | config.set(("image-range",), result["range"]) 49 | config.set(("chapter-range",), result["range"]) 50 | 51 | # write test data 52 | try: 53 | with open(os.path.join(path, name), "w") as outfile: 54 | job.DataJob(url, file=outfile, ensure_ascii=False).run() 55 | except KeyboardInterrupt: 56 | sys.exit() 57 | -------------------------------------------------------------------------------- /scripts/bash_completion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2019 Mike Fährmann 5 | # 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 2 as 8 | # published by the Free Software Foundation. 9 | 10 | """Generate bash completion script from gallery-dl's argument parser""" 11 | 12 | import util 13 | from gallery_dl import option 14 | 15 | 16 | TEMPLATE = """_gallery_dl() 17 | { 18 | local cur prev 19 | COMPREPLY=() 20 | cur="${COMP_WORDS[COMP_CWORD]}" 21 | prev="${COMP_WORDS[COMP_CWORD-1]}" 22 | 23 | if [[ "${prev}" =~ ^(%(fileopts)s)$ ]]; then 24 | COMPREPLY=( $(compgen -f -- "${cur}") ) 25 | elif [[ "${prev}" =~ ^(%(diropts)s)$ ]]; then 26 | COMPREPLY=( $(compgen -d -- "${cur}") ) 27 | else 28 | COMPREPLY=( $(compgen -W "%(opts)s" -- "${cur}") ) 29 | fi 30 | } 31 | 32 | complete -F _gallery_dl gallery-dl 33 | """ 34 | 35 | opts = [] 36 | diropts = [] 37 | fileopts = [] 38 | for action in option.build_parser()._actions: 39 | 40 | if action.metavar in ("DEST",): 41 | diropts.extend(action.option_strings) 42 | 43 | elif action.metavar in ("FILE", "CFG"): 44 | fileopts.extend(action.option_strings) 45 | 46 | for opt in action.option_strings: 47 | if opt.startswith("--"): 48 | opts.append(opt) 49 | 50 | PATH = util.path("gallery-dl.bash_completion") 51 | with open(PATH, "w", encoding="utf-8") as file: 52 | file.write(TEMPLATE % { 53 | "opts" : " ".join(opts), 54 | "diropts" : "|".join(diropts), 55 | "fileopts": "|".join(fileopts), 56 | }) 57 | -------------------------------------------------------------------------------- /gallery_dl/extractor/ngomik.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2018-2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Extract manga-chapters and entire manga from http://ngomik.in/""" 10 | 11 | from .common import ChapterExtractor 12 | from .. import text 13 | import re 14 | 15 | 16 | class NgomikChapterExtractor(ChapterExtractor): 17 | """Extractor for manga-chapters from ngomik.in""" 18 | category = "ngomik" 19 | root = "http://ngomik.in" 20 | pattern = (r"(?:https?://)?(?:www\.)?ngomik\.in" 21 | r"(/[^/?&#]+-chapter-[^/?&#]+)") 22 | test = ( 23 | ("https://www.ngomik.in/14-sai-no-koi-chapter-1-6/", { 24 | "url": "8e67fdf751bbc79bc6f4dead7675008ddb8e32a4", 25 | "keyword": "204d177f09d438fd50c9c28d98c73289194640d8", 26 | }), 27 | ("https://ngomik.in/break-blade-chapter-26/", { 28 | "count": 34, 29 | }), 30 | ) 31 | 32 | def metadata(self, page): 33 | info = text.extract(page, '', "")[0] 34 | manga, _, chapter = info.partition(" Chapter ") 35 | chapter, sep, minor = chapter.partition(" ")[0].partition(".") 36 | 37 | return { 38 | "manga": text.unescape(manga), 39 | "chapter": text.parse_int(chapter), 40 | "chapter_minor": sep + minor, 41 | "lang": "id", 42 | "language": "Indonesian", 43 | } 44 | 45 | @staticmethod 46 | def images(page): 47 | readerarea = text.extract(page, 'id="readerarea"', 'class="chnav"')[0] 48 | return [ 49 | (text.unescape(url), None) 50 | for url in re.findall(r"\ssrc=[\"']?([^\"' >]+)", readerarea) 51 | ] 52 | -------------------------------------------------------------------------------- /gallery_dl/extractor/recursive.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2015-2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Recursive extractor""" 10 | 11 | from .common import Extractor, Message 12 | from .. import extractor, util 13 | import requests 14 | import re 15 | 16 | 17 | class RecursiveExtractor(Extractor): 18 | """Extractor that fetches URLs from a remote or local source""" 19 | category = "recursive" 20 | pattern = r"r(?:ecursive)?:" 21 | test = ("recursive:https://pastebin.com/raw/FLwrCYsT", { 22 | "url": "eee86d65c346361b818e8f4b2b307d9429f136a2", 23 | }) 24 | 25 | def items(self): 26 | blist = self.config( 27 | "blacklist", {"directlink"} | util.SPECIAL_EXTRACTORS) 28 | 29 | self.session.mount("file://", FileAdapter()) 30 | page = self.request(self.url.partition(":")[2]).text 31 | 32 | yield Message.Version, 1 33 | with extractor.blacklist(blist): 34 | for match in re.finditer(r"https?://[^\s\"']+", page): 35 | yield Message.Queue, match.group(0), {} 36 | 37 | 38 | class FileAdapter(requests.adapters.BaseAdapter): 39 | """Requests adapter for local files""" 40 | 41 | def send(self, request, **kwargs): 42 | response = requests.Response() 43 | try: 44 | response.raw = open(request.url[7:], "rb") 45 | except OSError: 46 | import io 47 | response.raw = io.BytesIO() 48 | response.status_code = requests.codes.bad_request 49 | else: 50 | response.raw.release_conn = response.raw.close 51 | response.status_code = requests.codes.ok 52 | return response 53 | 54 | def close(self): 55 | pass 56 | -------------------------------------------------------------------------------- /scripts/create_test_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2015-2019 Mike Fährmann 5 | # 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 2 as 8 | # published by the Free Software Foundation. 9 | 10 | """Create testdata for extractor tests""" 11 | 12 | import argparse 13 | 14 | import util # noqa 15 | from gallery_dl import extractor 16 | from test.test_results import ResultJob, setup_test_config 17 | 18 | 19 | TESTDATA_FMT = """ 20 | test = ("{}", {{ 21 | "url": "{}", 22 | "keyword": "{}", 23 | "content": "{}", 24 | }}) 25 | """ 26 | 27 | TESTDATA_EXCEPTION_FMT = """ 28 | test = ("{}", {{ 29 | "exception": exception.{}, 30 | }}) 31 | """ 32 | 33 | 34 | def main(): 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument("--content", action="store_true") 37 | parser.add_argument("--recreate", action="store_true") 38 | parser.add_argument("urls", nargs="*") 39 | args = parser.parse_args() 40 | 41 | if args.recreate: 42 | urls = [ 43 | test[0] 44 | for extr in extractor.extractors() if extr.category in args.urls 45 | for test in extr.test 46 | ] 47 | else: 48 | urls = args.urls 49 | 50 | setup_test_config() 51 | 52 | for url in urls: 53 | tjob = ResultJob(url, content=args.content) 54 | try: 55 | tjob.run() 56 | except Exception as exc: 57 | fmt = TESTDATA_EXCEPTION_FMT 58 | data = (exc.__class__.__name__,) 59 | else: 60 | fmt = TESTDATA_FMT 61 | data = (tjob.hash_url.hexdigest(), 62 | tjob.hash_keyword.hexdigest(), 63 | tjob.hash_content.hexdigest()) 64 | print(tjob.extractor.__class__.__name__) 65 | print(fmt.format(url, *data)) 66 | 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /gallery_dl/postprocessor/classify.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2018 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Categorize files by file extension""" 10 | 11 | from .common import PostProcessor 12 | import os 13 | 14 | 15 | class ClassifyPP(PostProcessor): 16 | 17 | DEFAULT_MAPPING = { 18 | "Music" : ("mp3", "aac", "flac", "ogg", "wma", "m4a", "wav"), 19 | "Video" : ("flv", "ogv", "avi", "mp4", "mpg", "mpeg", "3gp", "mkv", 20 | "webm", "vob", "wmv"), 21 | "Pictures" : ("jpg", "jpeg", "png", "gif", "bmp", "svg", "webp"), 22 | "Archives" : ("zip", "rar", "7z", "tar", "gz", "bz2"), 23 | } 24 | 25 | def __init__(self, pathfmt, options): 26 | PostProcessor.__init__(self) 27 | mapping = options.get("mapping", self.DEFAULT_MAPPING) 28 | 29 | self.mapping = { 30 | ext: directory 31 | for directory, exts in mapping.items() 32 | for ext in exts 33 | } 34 | 35 | def prepare(self, pathfmt): 36 | ext = pathfmt.extension 37 | if ext in self.mapping: 38 | # set initial paths to enable download skips 39 | self._build_paths(pathfmt, self.mapping[ext]) 40 | 41 | def run(self, pathfmt): 42 | ext = pathfmt.extension 43 | if ext in self.mapping: 44 | # rebuild paths in case the filename extension changed 45 | path = self._build_paths(pathfmt, self.mapping[ext]) 46 | os.makedirs(path, exist_ok=True) 47 | 48 | @staticmethod 49 | def _build_paths(pathfmt, extra): 50 | path = pathfmt.realdirectory + extra 51 | pathfmt.realpath = path + os.sep + pathfmt.filename 52 | pathfmt.path = pathfmt.directory + extra + os.sep + pathfmt.filename 53 | return path 54 | 55 | 56 | __postprocessor__ = ClassifyPP 57 | -------------------------------------------------------------------------------- /gallery_dl/extractor/message.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2015-2018 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | 10 | class Message(): 11 | """Enum for message identifiers 12 | 13 | Extractors yield their results as message-tuples, where the first element 14 | is one of the following identifiers. This message-identifier determines 15 | the type and meaning of the other elements in such a tuple. 16 | 17 | - Message.Version: 18 | - Message protocol version (currently always '1') 19 | - 2nd element specifies the version of all following messages as integer 20 | 21 | - Message.Directory: 22 | - Sets the target directory for all following images 23 | - 2nd element is a dictionary containing general metadata 24 | 25 | - Message.Url: 26 | - Image URL and its metadata 27 | - 2nd element is the URL as a string 28 | - 3rd element is a dictionary with image-specific metadata 29 | 30 | - Message.Headers: # obsolete 31 | - HTTP headers to use while downloading 32 | - 2nd element is a dictionary with header-name and -value pairs 33 | 34 | - Message.Cookies: # obsolete 35 | - Cookies to use while downloading 36 | - 2nd element is a dictionary with cookie-name and -value pairs 37 | 38 | - Message.Queue: 39 | - (External) URL that should be handled by another extractor 40 | - 2nd element is the (external) URL as a string 41 | - 3rd element is a dictionary containing URL-specific metadata 42 | 43 | - Message.Urllist: 44 | - Same as Message.Url, but its 2nd element is a list of multiple URLs 45 | - The additional URLs serve as a fallback if the primary one fails 46 | """ 47 | 48 | Version = 1 49 | Directory = 2 50 | Url = 3 51 | # Headers = 4 52 | # Cookies = 5 53 | Queue = 6 54 | Urllist = 7 55 | -------------------------------------------------------------------------------- /gallery_dl/postprocessor/metadata.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Write metadata to JSON files""" 10 | 11 | from .common import PostProcessor 12 | from .. import util 13 | 14 | 15 | class MetadataPP(PostProcessor): 16 | 17 | def __init__(self, pathfmt, options): 18 | PostProcessor.__init__(self) 19 | 20 | mode = options.get("mode", "json") 21 | ext = "txt" 22 | 23 | if mode == "custom": 24 | self.write = self._write_custom 25 | self.formatter = util.Formatter(options.get("format")) 26 | elif mode == "tags": 27 | self.write = self._write_tags 28 | else: 29 | self.write = self._write_json 30 | self.indent = options.get("indent", 4) 31 | self.ascii = options.get("ascii", False) 32 | ext = "json" 33 | 34 | self.extension = options.get("extension", ext) 35 | 36 | def run(self, pathfmt): 37 | path = "{}.{}".format(pathfmt.realpath, self.extension) 38 | with open(path, "w", encoding="utf-8") as file: 39 | self.write(file, pathfmt.kwdict) 40 | 41 | def _write_custom(self, file, kwdict): 42 | output = self.formatter.format_map(kwdict) 43 | file.write(output) 44 | 45 | def _write_tags(self, file, kwdict): 46 | tags = kwdict.get("tags") or kwdict.get("tag_string") 47 | 48 | if not tags: 49 | return 50 | 51 | if not isinstance(tags, list): 52 | taglist = tags.split(", ") 53 | if len(taglist) < len(tags) / 16: 54 | taglist = tags.split(" ") 55 | tags = taglist 56 | 57 | file.write("\n".join(tags)) 58 | file.write("\n") 59 | 60 | def _write_json(self, file, kwdict): 61 | util.dump_json(kwdict, file, self.ascii, self.indent) 62 | 63 | 64 | __postprocessor__ = MetadataPP 65 | -------------------------------------------------------------------------------- /gallery_dl/extractor/mangastream.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright 2015-2019 Mike Fährmann 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | """Extract manga-chapters from https://readms.net/""" 10 | 11 | from .common import ChapterExtractor 12 | from .. import text 13 | 14 | 15 | class MangastreamChapterExtractor(ChapterExtractor): 16 | """Extractor for manga-chapters from mangastream.com""" 17 | category = "mangastream" 18 | archive_fmt = "{chapter_id}_{page}" 19 | pattern = (r"(?:https?://)?(?:www\.)?(?:readms\.net|mangastream\.com)" 20 | r"/r(?:ead)?/([^/]*/([^/]+)/(\d+))") 21 | test = ( 22 | ("https://readms.net/r/onepunch_man/087/4874/1"), 23 | ("https://mangastream.com/r/onepunch_man/087/4874/1"), 24 | ) 25 | root = "https://readms.net" 26 | 27 | def __init__(self, match): 28 | self.part, self.chapter, self.chapter_id = match.groups() 29 | url = "{}/r/{}".format(self.root, self.part) 30 | ChapterExtractor.__init__(self, match, url) 31 | 32 | def metadata(self, page): 33 | manga, pos = text.extract( 34 | page, '