├── cyberdrop_dl ├── ui │ ├── __init__.py │ ├── progress │ │ ├── __init__.py │ │ ├── downloads_progress.py │ │ ├── statistic_progress.py │ │ └── scraping_progress.py │ ├── prompts │ │ ├── __init__.py │ │ ├── url_file_prompts.py │ │ └── general_prompts.py │ └── ui.py ├── clients │ ├── __init__.py │ └── errors.py ├── managers │ ├── __init__.py │ ├── cache_manager.py │ ├── db_manager.py │ ├── log_manager.py │ ├── progress_manager.py │ ├── download_manager.py │ ├── path_manager.py │ ├── args_manager.py │ └── client_manager.py ├── scraper │ ├── __init__.py │ ├── crawlers │ │ ├── __init__.py │ │ ├── saint_crawler.py │ │ ├── xbunkr_crawler.py │ │ ├── nudostartv_crawler.py │ │ ├── hotpic_crawler.py │ │ ├── postimg_crawler.py │ │ ├── imgbox_crawler.py │ │ ├── cyberdrop_crawler.py │ │ ├── fapello_crawler.py │ │ ├── erome_crawler.py │ │ ├── pimpandhost_crawler.py │ │ ├── mediafire_crawler.py │ │ ├── redgifs_crawler.py │ │ ├── realbooru_crawler.py │ │ ├── rule34xxx_crawler.py │ │ ├── rule34xyz_crawler.py │ │ ├── ehentai_crawler.py │ │ ├── pixeldrain_crawler.py │ │ ├── imgkiwi_crawler.py │ │ ├── scrolller_crawler.py │ │ ├── toonily_crawler.py │ │ ├── imgur_crawler.py │ │ ├── imgbb_crawler.py │ │ ├── imageban_crawler.py │ │ ├── rule34vault_crawler.py │ │ ├── omegascans_crawler.py │ │ ├── gofile_crawler.py │ │ ├── jpgchurch_crawler.py │ │ └── coomer_crawler.py │ └── jdownloader.py ├── utils │ ├── __init__.py │ ├── args │ │ ├── __init__.py │ │ ├── browser_cookie_extraction.py │ │ └── config_definitions.py │ ├── database │ │ ├── __init__.py │ │ ├── tables │ │ │ ├── __init__.py │ │ │ └── temp_table.py │ │ └── table_definitions.py │ ├── dataclasses │ │ ├── __init__.py │ │ ├── supported_domains.py │ │ └── url_objects.py │ └── transfer │ │ ├── __init__.py │ │ └── transfer_v4_db.py ├── downloader │ └── __init__.py ├── __init__.py └── main.py ├── .gitignore ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md └── workflows │ ├── publish-to-pypi.yaml │ └── codeql-analysis.yml ├── pyproject.toml └── README.md /cyberdrop_dl/ui/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/clients/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/managers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/downloader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/ui/progress/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/ui/prompts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/utils/args/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/utils/database/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/utils/dataclasses/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/utils/transfer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/utils/database/tables/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cyberdrop_dl/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "5.3.32" 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Project stuff 2 | /Test-Download-Dir/ 3 | /Test-AppData-Dir/ 4 | /AppData/ 5 | /Downloads/ 6 | 7 | # Python cache 8 | __pycache__ 9 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [Jules-WinnfieldX] 4 | custom: ['https://www.buymeacoffee.com/juleswinnft'] 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[FEATURE]" 5 | labels: enhancement 6 | assignees: Jules-WinnfieldX 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /cyberdrop_dl/ui/prompts/url_file_prompts.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pathlib import Path 3 | 4 | from InquirerPy import inquirer 5 | from rich.console import Console 6 | 7 | console = Console() 8 | 9 | def edit_urls_prompt(URLs_File: Path, vi_mode: bool, fix_strings=True) -> None: 10 | """Edit the URLs file""" 11 | console.clear() 12 | console.print(f"Editing URLs: {URLs_File}") 13 | with open(URLs_File, "r") as f: 14 | existing_urls = f.read() 15 | 16 | result = inquirer.text( 17 | message="URLs:", multiline=True, default=existing_urls, 18 | long_instruction="Press escape and then enter to finish editing.", 19 | vi_mode=vi_mode, 20 | ).execute() 21 | 22 | if fix_strings: 23 | result = result.replace(" ", "\n") 24 | result = re.sub(r"(\n)+", "\n", result) 25 | 26 | with open(URLs_File, "w") as f: 27 | f.write(result) 28 | -------------------------------------------------------------------------------- /.github/workflows/publish-to-pypi.yaml: -------------------------------------------------------------------------------- 1 | name: Publish To PyPi 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*.*.*' 7 | 8 | jobs: 9 | release: 10 | name: Release 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v4 15 | 16 | - name: Set up Python 3.11.4 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: "3.11.4" 20 | 21 | - name: Install Poetry 22 | run: | 23 | curl -sSL https://install.python-poetry.org | python - -y 24 | 25 | - name: Update PATH 26 | run: echo "$HOME/.local/bin" >> $GITHUB_PATH 27 | 28 | - name: Build project for distribution 29 | run: poetry build 30 | 31 | - name: Check Version 32 | id: check-version 33 | run: | 34 | [[ "$(poetry version --short)" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] || echo prerelease=true >> $GITHUB_OUTPUT 35 | 36 | - name: Publish to PyPI 37 | env: 38 | POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_TOKEN }} 39 | run: poetry publish -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "cyberdrop-dl" 3 | version = "5.3.32" 4 | description = "Bulk downloader for multiple file hosts" 5 | authors = ["Jules Winnfield "] 6 | readme = "README.md" 7 | repository = "https://github.com/Jules-WinnfieldX/CyberDropDownloader" 8 | documentation = "https://jules-winnfieldx.gitbook.io/cyberdrop-dl/" 9 | packages = [{include = "cyberdrop_dl"}] 10 | 11 | [tool.poetry.dependencies] 12 | python = "^3.11" 13 | aiohttp = "^3.9.1" 14 | aiolimiter = "^1.1.0" 15 | inquirerpy = "^0.3.4" 16 | platformdirs = "^4.2.2" 17 | rich = "^13.7.0" 18 | pyyaml = "^6.0.1" 19 | certifi = "^2024.2.2" 20 | browser-cookie3 = "^0.19.1" 21 | beautifulsoup4 = "^4.12.2" 22 | filedate = "^3.0" 23 | aiosqlite = "0.17.0" 24 | aiofiles = "0.8.0" 25 | asyncpraw = "^7.7.1" 26 | myjdapi = "^1.1.7" 27 | mediafire = "^0.6.1" 28 | mutagen = "^1.47.0" 29 | pillow = "^10.2.0" 30 | get-video-properties = "^0.1.1" 31 | 32 | [tool.poetry.scripts] 33 | cyberdrop-dl = "cyberdrop_dl.main:main" 34 | 35 | [build-system] 36 | requires = ["poetry-core>=1.0.0"] 37 | build-backend = "poetry.core.masonry.api" 38 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: Jules-WinnfieldX 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | Make sure to read the FAQ here first: https://github.com/Jules-WinnfieldX/CyberDropDownloader/wiki 13 | 14 | **To Help Reproduce** 15 | Steps to help reproduce the behavior: 16 | 1. Describe the bug accurately. 17 | 2. Provide an example album that showcases it (if applicable) 18 | 3. Make sure you are updated to the latest version to see if it's already fixed. 19 | 20 | **Screenshots** 21 | If applicable, add screenshots to help explain your problem. 22 | 23 | **Setup Information** 24 | - OS: [e.g. Windows 10, Mac OS X Sierra, etc] 25 | - Cyberdrop Downloader Version # 26 | 27 | **Downloader.log file** 28 | You need to include the downloader.log file after the issue occurs. Majority of the time it's the only way to diagnose issues. 29 | If you open a bug report and don't include the log file, it will be closed. 30 | 31 | **Additional context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /cyberdrop_dl/utils/database/tables/temp_table.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import aiosqlite 4 | 5 | from cyberdrop_dl.utils.database.table_definitions import create_temp 6 | 7 | 8 | class TempTable: 9 | def __init__(self, db_conn: aiosqlite.Connection): 10 | self.db_conn: aiosqlite.Connection = db_conn 11 | 12 | async def startup(self) -> None: 13 | """Startup process for the TempTable""" 14 | await self.db_conn.execute(create_temp) 15 | await self.db_conn.commit() 16 | 17 | async def get_temp_names(self) -> List[str]: 18 | """Gets the list of temp filenames""" 19 | cursor = await self.db_conn.cursor() 20 | await cursor.execute("SELECT downloaded_filename FROM temp;") 21 | filenames = await cursor.fetchall() 22 | filenames = [list(filename) for filename in filenames] 23 | return list(sum(filenames, ())) 24 | 25 | async def sql_insert_temp(self, downloaded_filename: str) -> None: 26 | """Inserts a temp filename into the downloads_temp table""" 27 | await self.db_conn.execute("""INSERT OR IGNORE INTO downloads_temp VALUES (?)""", (downloaded_filename,)) 28 | await self.db_conn.commit() 29 | -------------------------------------------------------------------------------- /cyberdrop_dl/utils/transfer/transfer_v4_db.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | from pathlib import Path 3 | 4 | from cyberdrop_dl.utils.database.table_definitions import create_history, create_temp 5 | 6 | 7 | def transfer_v4_db(db_path: Path, new_db_path: Path) -> None: 8 | """Transfers a V4 database into V5 possession""" 9 | old_db_connection = sqlite3.connect(db_path) 10 | if not new_db_path.exists(): 11 | new_db_path.parent.mkdir(parents=True, exist_ok=True) 12 | new_db_path.touch() 13 | new_db_connection = sqlite3.connect(new_db_path) 14 | 15 | new_db_connection.execute(create_history) 16 | new_db_connection.execute(create_temp) 17 | 18 | query = "SELECT domain, url_path, referer, download_path, download_filename, original_filename, completed FROM media WHERE completed = 1" 19 | old_data_history = old_db_connection.execute(query).fetchall() 20 | 21 | old_data_revised = [] 22 | for row in old_data_history: 23 | row = list(row) 24 | row[3] = str(Path(row[3]).parent) 25 | old_data_revised.append(tuple(row)) 26 | 27 | new_db_connection.executemany("insert or ignore into media (domain, url_path, referer, download_path, download_filename, original_filename, completed) values (?, ?, ?, ?, ?, ?, ?)", old_data_revised) 28 | del old_data_history 29 | 30 | new_db_connection.commit() 31 | old_db_connection.close() 32 | new_db_connection.close() 33 | -------------------------------------------------------------------------------- /cyberdrop_dl/utils/database/table_definitions.py: -------------------------------------------------------------------------------- 1 | create_history = """CREATE TABLE IF NOT EXISTS media (domain TEXT, 2 | url_path TEXT, 3 | referer TEXT, 4 | album_id TEXT, 5 | download_path TEXT, 6 | download_filename TEXT, 7 | original_filename TEXT, 8 | completed INTEGER NOT NULL, 9 | created_at TIMESTAMP, 10 | completed_at TIMESTAMP, 11 | PRIMARY KEY (domain, url_path, original_filename) 12 | );""" 13 | 14 | create_fixed_history = """CREATE TABLE IF NOT EXISTS media_copy (domain TEXT, 15 | url_path TEXT, 16 | referer TEXT, 17 | album_id TEXT, 18 | download_path TEXT, 19 | download_filename TEXT, 20 | original_filename TEXT, 21 | completed INTEGER NOT NULL, 22 | PRIMARY KEY (domain, url_path, original_filename) 23 | );""" 24 | 25 | create_temp = """CREATE TABLE IF NOT EXISTS temp (downloaded_filename TEXT);""" 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `cyberdrop-dl` 2 | 3 | **Bulk downloader for multiple file hosts** 4 | 5 | [![PyPI version](https://badge.fury.io/py/cyberdrop-dl.svg)](https://badge.fury.io/py/cyberdrop-dl) 6 | [![Downloads](https://static.pepy.tech/badge/cyberdrop-dl)](https://pepy.tech/project/cyberdrop-dl) 7 | [![Downloads](https://static.pepy.tech/badge/cyberdrop-dl/month)](https://pepy.tech/project/cyberdrop-dl) 8 | [![Downloads](https://static.pepy.tech/badge/cyberdrop-dl/week)](https://pepy.tech/project/cyberdrop-dl) 9 | 10 | [![Discord Banner 3](https://discordapp.com/api/guilds/1070206871564197908/widget.png?style=banner3)](https://discord.com/invite/kbZCxz22Qp) 11 | 12 | Brand new and improved! Cyberdrop-DL now has an updated paint job, fantastic new look. It's now easier to use than ever before! 13 | 14 | ![Screenshot 2023-12-03 190747](https://github.com/Jules-WinnfieldX/CyberDropDownloader/assets/61347133/aa6b7e21-a039-42e9-9308-ca62750a49cf) 15 | 16 | ## Supported Sites 17 | 18 | bunkr, bunkrr, celebforum, coomer, cyberdrop, cyberfile, e-hentai, erome, fapello, f95zone, gofile, hotpic, ibb.co, imageban, imgbox, imgur, img.kiwi, jpg.church, jpg.homes, jpg.fish, jpg.fishing, jpg.pet, jpeg.pet, jpg1.su, jpg2.su, jpg3.su, jpg4.su, host.church, kemono, leakedmodels, mediafire, nudostar.com, nudostar.tv, omegascans, pimpandhost, pixeldrain, postimg, realbooru, reddit, redd.it, redgifs, rule34.xxx, rule34.xyz, rule34vault, saint, scrolller, simpcity, socialmediagirls, toonily, xbunker, xbunkr 19 | 20 | ## Installation and More Information 21 | 22 | Read the [Wiki!](https://jules-winnfieldx.gitbook.io/cyberdrop-dl/) 23 | 24 | You can find how to install the program and how to use it there. You can also find a list of supported hosts, and frequently asked questions. 25 | 26 | 27 | -------------------------------------------------------------------------------- /cyberdrop_dl/utils/dataclasses/supported_domains.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import ClassVar, Tuple, List 3 | 4 | 5 | @dataclass 6 | class SupportedDomains: 7 | """The allows options for domains to skip when scraping and mappings""" 8 | supported_hosts: ClassVar[Tuple[str, ...]] = ("bunkr", "bunkrr", "celebforum", "coomer", "cyberdrop", "cyberfile", 9 | "e-hentai", "erome", "fapello", "f95zone", "gofile", "hotpic", 10 | "ibb.co", "imageban", "imgbox", "imgur", "img.kiwi", "jpg.church", 11 | "jpg.homes", "jpg.fish", "jpg.fishing", "jpg.pet", "jpeg.pet", 12 | "jpg1.su", "jpg2.su", "jpg3.su", "jpg4.su", "host.church", "kemono", 13 | "leakedmodels", "mediafire", "nudostar.com", "nudostar.tv", 14 | "omegascans", "pimpandhost", "pixeldrain", "postimg", "realbooru", 15 | "reddit", "redd.it", "redgifs", "rule34.xxx", "rule34.xyz", "rule34vault", "saint", 16 | "scrolller", "simpcity", "socialmediagirls", "toonily", "xbunker", 17 | "xbunkr") 18 | 19 | supported_forums: ClassVar[Tuple[str, ...]] = ("celebforum.to", "f95zone.to", "leakedmodels.com", "nudostar.com", 20 | "simpcity.su", "forums.socialmediagirls.com", "xbunker.nu") 21 | supported_forums_map = {"celebforum.to": "celebforum", "f95zone.to": "f95zone", "leakedmodels.com": "leakedmodels", 22 | "nudostar.com": "nudostar", "simpcity.su": "simpcity", 23 | "forums.socialmediagirls.com": "socialmediagirls", "xbunker.nu": "xbunker"} 24 | 25 | sites: List[str] 26 | -------------------------------------------------------------------------------- /cyberdrop_dl/managers/cache_manager.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import field 4 | from pathlib import Path 5 | from typing import Any, Dict, TYPE_CHECKING 6 | 7 | import yaml 8 | 9 | if TYPE_CHECKING: 10 | from cyberdrop_dl.managers.manager import Manager 11 | 12 | 13 | def _save_yaml(file: Path, data: Dict) -> None: 14 | """Saves a dict to a yaml file""" 15 | file.parent.mkdir(parents=True, exist_ok=True) 16 | with open(file, 'w') as yaml_file: 17 | yaml.dump(data, yaml_file) 18 | 19 | 20 | def _load_yaml(file: Path) -> Dict: 21 | """Loads a yaml file and returns it as a dict""" 22 | with open(file, 'r') as yaml_file: 23 | return yaml.load(yaml_file.read(), Loader=yaml.FullLoader) 24 | 25 | 26 | class CacheManager: 27 | def __init__(self, manager: 'Manager'): 28 | self.manager = manager 29 | 30 | self.cache_file: Path = field(init=False) 31 | self._cache = {} 32 | 33 | def startup(self, cache_file: Path) -> None: 34 | """Ensures that the cache file exists""" 35 | self.cache_file = cache_file 36 | if not self.cache_file.is_file(): 37 | self.save('default_config', "Default") 38 | 39 | self.load() 40 | if self.manager.args_manager.appdata_dir: 41 | self.save('first_startup_completed', True) 42 | 43 | def load(self) -> None: 44 | """Loads the cache file into memory""" 45 | self._cache = _load_yaml(self.cache_file) 46 | 47 | def get(self, key: str) -> Any: 48 | """Returns the value of a key in the cache""" 49 | return self._cache.get(key, None) 50 | 51 | def save(self, key: str, value: Any) -> None: 52 | """Saves a key and value to the cache""" 53 | self._cache[key] = value 54 | _save_yaml(self.cache_file, self._cache) 55 | 56 | def remove(self, key: str) -> None: 57 | """Removes a key from the cache""" 58 | if key in self._cache: 59 | del self._cache[key] 60 | _save_yaml(self.cache_file, self._cache) 61 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/saint_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from aiolimiter import AsyncLimiter 6 | from yarl import URL 7 | 8 | from cyberdrop_dl.clients.errors import ScrapeFailure 9 | from cyberdrop_dl.scraper.crawler import Crawler 10 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 11 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper 12 | 13 | if TYPE_CHECKING: 14 | from cyberdrop_dl.managers.manager import Manager 15 | 16 | 17 | class SaintCrawler(Crawler): 18 | def __init__(self, manager: Manager): 19 | super().__init__(manager, "saint", "Saint") 20 | self.primary_base_domain = URL("https://saint2.su") 21 | self.request_limiter = AsyncLimiter(10, 1) 22 | 23 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 24 | 25 | async def fetch(self, scrape_item: ScrapeItem) -> None: 26 | """Determines where to send the scrape item based on the url""" 27 | task_id = await self.scraping_progress.add_task(scrape_item.url) 28 | scrape_item.url = self.primary_base_domain.with_path(scrape_item.url.path) 29 | 30 | await self.video(scrape_item) 31 | 32 | await self.scraping_progress.remove_task(task_id) 33 | 34 | @error_handling_wrapper 35 | async def video(self, scrape_item: ScrapeItem) -> None: 36 | """Scrapes an album""" 37 | if await self.check_complete_from_referer(scrape_item): 38 | return 39 | 40 | async with self.request_limiter: 41 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 42 | try: 43 | link = URL(soup.select_one('video[id=main-video] source').get('src')) 44 | except AttributeError: 45 | raise ScrapeFailure(404, f"Could not find video source for {scrape_item.url}") 46 | filename, ext = await get_filename_and_ext(link.name) 47 | await self.handle_file(link, scrape_item, filename, ext) 48 | -------------------------------------------------------------------------------- /cyberdrop_dl/utils/dataclasses/url_objects.py: -------------------------------------------------------------------------------- 1 | from dataclasses import field 2 | from pathlib import Path 3 | from typing import TYPE_CHECKING, Union 4 | 5 | from cyberdrop_dl.utils.utilities import sanitize_folder 6 | 7 | if TYPE_CHECKING: 8 | from rich.progress import TaskID 9 | from yarl import URL 10 | 11 | 12 | class MediaItem: 13 | def __init__(self, url: "URL", referer: "URL", album_id: Union[str, None], download_folder: Path, filename: str, ext: str, original_filename: str): 14 | self.url: URL = url 15 | self.referer: URL = referer 16 | self.album_id: Union[str, None] = album_id 17 | self.download_folder: Path = download_folder 18 | self.filename: str = filename 19 | self.ext: str = ext 20 | self.download_filename: str = field(init=False) 21 | self.original_filename: str = original_filename 22 | self.file_lock_reference_name: str = field(init=False) 23 | self.datetime: str = field(init=False) 24 | 25 | self.filesize: int = field(init=False) 26 | self.current_attempt: int = field(init=False) 27 | 28 | self.partial_file: Path = field(init=False) 29 | self.complete_file: Path = field(init=False) 30 | self.task_id: TaskID = field(init=False) 31 | 32 | 33 | class ScrapeItem: 34 | def __init__(self, url: "URL", parent_title: str, part_of_album: bool = False, album_id: Union[str, None] = None, possible_datetime: int = None, 35 | retry: bool = False, retry_path: Path = None): 36 | self.url: URL = url 37 | self.parent_title: str = parent_title 38 | self.part_of_album: bool = part_of_album 39 | self.album_id: Union[str, None] = album_id 40 | self.possible_datetime: int = possible_datetime 41 | self.retry: bool = retry 42 | self.retry_path: Path = retry_path 43 | 44 | async def add_to_parent_title(self, title: str) -> None: 45 | """Adds a title to the parent title""" 46 | if not title or self.retry: 47 | return 48 | title = await sanitize_folder(title) 49 | self.parent_title = (self.parent_title + "/" + title) if self.parent_title else title 50 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/xbunkr_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from aiolimiter import AsyncLimiter 6 | from yarl import URL 7 | 8 | from cyberdrop_dl.clients.errors import NoExtensionFailure 9 | from cyberdrop_dl.scraper.crawler import Crawler 10 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 11 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper, log 12 | 13 | if TYPE_CHECKING: 14 | from cyberdrop_dl.managers.manager import Manager 15 | 16 | 17 | class XBunkrCrawler(Crawler): 18 | def __init__(self, manager: Manager): 19 | super().__init__(manager, "xbunkr", "XBunkr") 20 | self.request_limiter = AsyncLimiter(10, 1) 21 | 22 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 23 | 24 | async def fetch(self, scrape_item: ScrapeItem) -> None: 25 | """Determines where to send the scrape item based on the url""" 26 | task_id = await self.scraping_progress.add_task(scrape_item.url) 27 | 28 | if "media" in scrape_item.url.host: 29 | filename, ext = await get_filename_and_ext(scrape_item.url.name) 30 | await self.handle_file(scrape_item.url, scrape_item, filename, ext) 31 | else: 32 | await self.album(scrape_item) 33 | 34 | await self.scraping_progress.remove_task(task_id) 35 | 36 | @error_handling_wrapper 37 | async def album(self, scrape_item: ScrapeItem) -> None: 38 | """Scrapes a profile""" 39 | async with self.request_limiter: 40 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 41 | 42 | title = await self.create_title(soup.select_one("h1[id=title]").text, scrape_item.url.parts[2], None) 43 | 44 | links = soup.select("a[class=image]") 45 | for link in links: 46 | link = URL(link.get('href')) 47 | try: 48 | filename, ext = await get_filename_and_ext(link.name) 49 | except NoExtensionFailure: 50 | await log(f"Couldn't get extension for {str(link)}", 30) 51 | continue 52 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 53 | await self.handle_file(link, new_scrape_item, filename, ext) 54 | -------------------------------------------------------------------------------- /cyberdrop_dl/managers/db_manager.py: -------------------------------------------------------------------------------- 1 | from dataclasses import field 2 | from pathlib import Path 3 | from typing import TYPE_CHECKING 4 | 5 | import aiosqlite 6 | 7 | from cyberdrop_dl.utils.database.tables.history_table import HistoryTable 8 | from cyberdrop_dl.utils.database.tables.temp_table import TempTable 9 | 10 | if TYPE_CHECKING: 11 | from cyberdrop_dl.managers.manager import Manager 12 | 13 | 14 | class DBManager: 15 | def __init__(self, manager: 'Manager', db_path: Path): 16 | self.manager = manager 17 | self._db_conn: aiosqlite.Connection = field(init=False) 18 | self._db_path: Path = db_path 19 | 20 | self.ignore_history: bool = False 21 | 22 | self.history_table: HistoryTable = field(init=False) 23 | self.temp_table: TempTable = field(init=False) 24 | 25 | async def startup(self) -> None: 26 | """Startup process for the DBManager""" 27 | self._db_conn = await aiosqlite.connect(self._db_path) 28 | 29 | self.ignore_history = self.manager.config_manager.settings_data['Runtime_Options']['ignore_history'] 30 | 31 | self.history_table = HistoryTable(self._db_conn) 32 | self.temp_table = TempTable(self._db_conn) 33 | 34 | self.history_table.ignore_history = self.ignore_history 35 | 36 | await self._pre_allocate() 37 | 38 | await self.history_table.startup() 39 | await self.temp_table.startup() 40 | 41 | async def close(self) -> None: 42 | """Close the DBManager""" 43 | await self._db_conn.close() 44 | 45 | async def _pre_allocate(self) -> None: 46 | """We pre-allocate 100MB of space to the SQL file just in case the user runs out of disk space""" 47 | create_pre_allocation_table = "CREATE TABLE IF NOT EXISTS t(x);" 48 | drop_pre_allocation_table = "DROP TABLE t;" 49 | 50 | fill_pre_allocation = "INSERT INTO t VALUES(zeroblob(100*1024*1024));" # 100 mb 51 | check_pre_allocation = "PRAGMA freelist_count;" 52 | 53 | result = await self._db_conn.execute(check_pre_allocation) 54 | free_space = await result.fetchone() 55 | 56 | if free_space[0] <= 1024: 57 | await self._db_conn.execute(create_pre_allocation_table) 58 | await self._db_conn.commit() 59 | await self._db_conn.execute(fill_pre_allocation) 60 | await self._db_conn.commit() 61 | await self._db_conn.execute(drop_pre_allocation_table) 62 | await self._db_conn.commit() 63 | -------------------------------------------------------------------------------- /cyberdrop_dl/clients/errors.py: -------------------------------------------------------------------------------- 1 | class InvalidContentTypeFailure(Exception): 2 | """This error will be thrown when the content type isn't as expected""" 3 | def __init__(self, *, message: str = "Invalid content type"): 4 | self.message = message 5 | super().__init__(self.message) 6 | 7 | 8 | class NoExtensionFailure(Exception): 9 | """This error will be thrown when no extension is given for a file""" 10 | def __init__(self, *, message: str = "Extension missing for file"): 11 | self.message = message 12 | super().__init__(self.message) 13 | 14 | 15 | class PasswordProtected(Exception): 16 | """This error will be thrown when a file is password protected""" 17 | def __init__(self, *, message: str = "File/Folder is password protected"): 18 | self.message = message 19 | super().__init__(self.message) 20 | 21 | 22 | class DDOSGuardFailure(Exception): 23 | """This error will be thrown when DDoS-Guard is detected""" 24 | 25 | def __init__(self, status: int, message: str = "DDoS-Guard detected"): 26 | self.status = status 27 | self.message = message 28 | super().__init__(self.message) 29 | super().__init__(self.status) 30 | 31 | 32 | class DownloadFailure(Exception): 33 | """This error will be thrown when a request fails""" 34 | def __init__(self, status: int, message: str = "Something went wrong"): 35 | self.status = status 36 | self.message = message 37 | super().__init__(self.message) 38 | super().__init__(self.status) 39 | 40 | 41 | class ScrapeFailure(Exception): 42 | """This error will be thrown when a request fails""" 43 | def __init__(self, status: int, message: str = "Something went wrong"): 44 | self.status = status 45 | self.message = message 46 | super().__init__(self.message) 47 | super().__init__(self.status) 48 | 49 | 50 | class FailedLoginFailure(Exception): 51 | """This error will be thrown when the login fails for a site""" 52 | def __init__(self, *, status: int, message: str = "Failed login."): 53 | self.status = status 54 | self.message = message 55 | super().__init__(self.message) 56 | super().__init__(self.status) 57 | 58 | 59 | class JDownloaderFailure(Exception): 60 | """Basic failure template for JDownloader""" 61 | def __init__(self, message: str = "Something went wrong"): 62 | self.message = message 63 | super().__init__(self.message) 64 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ master ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ master ] 20 | schedule: 21 | - cron: '30 19 * * 5' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'python' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Learn more about CodeQL language support at https://git.io/codeql-language-support 38 | 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v2 42 | 43 | # Initializes the CodeQL tools for scanning. 44 | - name: Initialize CodeQL 45 | uses: github/codeql-action/init@v2 46 | with: 47 | languages: ${{ matrix.language }} 48 | # If you wish to specify custom queries, you can do so here or in a config file. 49 | # By default, queries listed here will override any specified in a config file. 50 | # Prefix the list here with "+" to use these queries and those in the config file. 51 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 52 | 53 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 54 | # If this step fails, then you should remove it and run the build manually (see below) 55 | - name: Autobuild 56 | uses: github/codeql-action/autobuild@v2 57 | 58 | # ℹ️ Command-line programs to run using the OS shell. 59 | # 📚 https://git.io/JvXDl 60 | 61 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 62 | # and modify them (or add more) to build your code if your project 63 | # uses a compiled language 64 | 65 | #- run: | 66 | # make bootstrap 67 | # make release 68 | 69 | - name: Perform CodeQL Analysis 70 | uses: github/codeql-action/analyze@v2 71 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/jdownloader.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import field 4 | from typing import TYPE_CHECKING 5 | 6 | from myjdapi import myjdapi 7 | 8 | from cyberdrop_dl.clients.errors import JDownloaderFailure 9 | from cyberdrop_dl.managers.manager import Manager 10 | from cyberdrop_dl.utils.utilities import log 11 | 12 | if TYPE_CHECKING: 13 | from yarl import URL 14 | 15 | 16 | class JDownloader: 17 | """Class that handles connecting and passing links to JDownloader""" 18 | def __init__(self, manager: Manager): 19 | self.enabled = manager.config_manager.settings_data['Runtime_Options']['send_unsupported_to_jdownloader'] 20 | self.jdownloader_device = manager.config_manager.authentication_data['JDownloader']['jdownloader_device'] 21 | self.jdownloader_username = manager.config_manager.authentication_data['JDownloader']['jdownloader_username'] 22 | self.jdownloader_password = manager.config_manager.authentication_data['JDownloader']['jdownloader_password'] 23 | self.download_directory = manager.path_manager.download_dir 24 | self.jdownloader_agent = field(init=False) 25 | 26 | async def jdownloader_setup(self) -> None: 27 | """Setup function for JDownloader""" 28 | try: 29 | if not self.jdownloader_username or not self.jdownloader_password or not self.jdownloader_device: 30 | raise JDownloaderFailure("JDownloader credentials were not provided.") 31 | jd = myjdapi.Myjdapi() 32 | jd.set_app_key("CYBERDROP-DL") 33 | jd.connect(self.jdownloader_username, self.jdownloader_password) 34 | self.jdownloader_agent = jd.get_device(self.jdownloader_device) 35 | except (myjdapi.MYJDApiException, JDownloaderFailure) as e: 36 | await log("Failed JDownloader setup", 40) 37 | await log(e.message, 40) 38 | self.enabled = False 39 | 40 | async def direct_unsupported_to_jdownloader(self, url: URL, title: str) -> None: 41 | """Sends links to JDownloader""" 42 | try: 43 | assert url.host is not None 44 | assert self.jdownloader_agent is not None 45 | self.jdownloader_agent.linkgrabber.add_links([{ 46 | "autostart": False, 47 | "links": str(url), 48 | "packageName": title if title else "Cyberdrop-DL", 49 | "destinationFolder": str(self.download_directory.absolute()), 50 | "overwritePackagizerRules": True 51 | }]) 52 | 53 | except (JDownloaderFailure, AssertionError) as e: 54 | await log(f"Failed to send {url} to JDownloader", 40) 55 | await log(e.message, 40) 56 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/nudostartv_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from aiolimiter import AsyncLimiter 6 | from yarl import URL 7 | 8 | from cyberdrop_dl.scraper.crawler import Crawler 9 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 10 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper 11 | 12 | if TYPE_CHECKING: 13 | from cyberdrop_dl.managers.manager import Manager 14 | 15 | 16 | class NudoStarTVCrawler(Crawler): 17 | def __init__(self, manager: Manager): 18 | super().__init__(manager, "nudostartv", "NudoStarTV") 19 | self.request_limiter = AsyncLimiter(10, 1) 20 | 21 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 22 | 23 | async def fetch(self, scrape_item: ScrapeItem) -> None: 24 | """Determines where to send the scrape item based on the url""" 25 | task_id = await self.scraping_progress.add_task(scrape_item.url) 26 | 27 | scrape_item.url = URL(str(scrape_item.url) + "/") 28 | await self.profile(scrape_item) 29 | 30 | await self.scraping_progress.remove_task(task_id) 31 | 32 | @error_handling_wrapper 33 | async def profile(self, scrape_item: ScrapeItem) -> None: 34 | """Scrapes a profile""" 35 | async with self.request_limiter: 36 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 37 | 38 | title = await self.create_title(soup.select_one('title').get_text().split("/")[0], None, None) 39 | content = soup.select('div[id=list_videos_common_videos_list_items] div a') 40 | for page in content: 41 | link = URL(page.get('href')) 42 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 43 | await self.image(new_scrape_item) 44 | next_page = soup.select_one('li[class=next] a') 45 | if next_page: 46 | link = URL(next_page.get('href')) 47 | new_scrape_item = await self.create_scrape_item(scrape_item, link, "") 48 | self.manager.task_group.create_task(self.run(new_scrape_item)) 49 | 50 | @error_handling_wrapper 51 | async def image(self, scrape_item: ScrapeItem) -> None: 52 | """Scrapes an album""" 53 | async with self.request_limiter: 54 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 55 | content = soup.select('div[class=block-video] a img') 56 | for image in content: 57 | link = URL(image.get('src')) 58 | filename, ext = await get_filename_and_ext(link.name) 59 | await self.handle_file(link, scrape_item, filename, ext) 60 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/hotpic_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from aiolimiter import AsyncLimiter 6 | from yarl import URL 7 | 8 | from cyberdrop_dl.scraper.crawler import Crawler 9 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 10 | from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext, log 11 | 12 | if TYPE_CHECKING: 13 | from cyberdrop_dl.managers.manager import Manager 14 | 15 | 16 | class HotPicCrawler(Crawler): 17 | def __init__(self, manager: Manager): 18 | super().__init__(manager, "hotpic", "HotPic") 19 | self.primary_base_domain = URL("https://hotpic.cc") 20 | self.request_limiter = AsyncLimiter(10, 1) 21 | 22 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 23 | 24 | async def fetch(self, scrape_item: ScrapeItem) -> None: 25 | """Determines where to send the scrape item based on the url""" 26 | task_id = await self.scraping_progress.add_task(scrape_item.url) 27 | 28 | if "album" in scrape_item.url.parts: 29 | await self.album(scrape_item) 30 | elif "i" in scrape_item.url.parts: 31 | await self.image(scrape_item) 32 | else: 33 | await log(f"Scrape Failed: Unknown URL Path for {scrape_item.url}", 40) 34 | await self.manager.progress_manager.scrape_stats_progress.add_failure("Unsupported Link") 35 | 36 | await self.scraping_progress.remove_task(task_id) 37 | 38 | @error_handling_wrapper 39 | async def album(self, scrape_item: ScrapeItem) -> None: 40 | """Scrapes an album""" 41 | async with self.request_limiter: 42 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 43 | 44 | title = await self.create_title(soup.select_one("title").text.rsplit(" - ")[0], scrape_item.url.parts[2], None) 45 | await scrape_item.add_to_parent_title(title) 46 | scrape_item.part_of_album = True 47 | 48 | files = soup.select("a[class*=spotlight]") 49 | for file in files: 50 | link = URL(file.get("href")) 51 | filename, ext = await get_filename_and_ext(link.name) 52 | await self.handle_file(link, scrape_item, filename, ext) 53 | 54 | @error_handling_wrapper 55 | async def image(self, scrape_item: ScrapeItem) -> None: 56 | """Scrapes an image""" 57 | if await self.check_complete_from_referer(scrape_item): 58 | return 59 | 60 | async with self.request_limiter: 61 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 62 | 63 | link = URL(soup.select_one("img[id*=main-image]").get("src")) 64 | filename, ext = await get_filename_and_ext(link.name) 65 | await self.handle_file(link, scrape_item, filename, ext) 66 | -------------------------------------------------------------------------------- /cyberdrop_dl/utils/args/browser_cookie_extraction.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from functools import wraps 4 | from typing import TYPE_CHECKING 5 | 6 | import browser_cookie3 7 | from InquirerPy import inquirer 8 | from rich.console import Console 9 | 10 | from cyberdrop_dl.utils.dataclasses.supported_domains import SupportedDomains 11 | 12 | if TYPE_CHECKING: 13 | from typing import Dict 14 | 15 | from cyberdrop_dl.managers.manager import Manager 16 | 17 | 18 | def cookie_wrapper(func): 19 | """Wrapper handles errors for url scraping""" 20 | @wraps(func) 21 | def wrapper(self, *args, **kwargs): 22 | try: 23 | return func(self, *args, **kwargs) 24 | except PermissionError: 25 | console = Console() 26 | console.clear() 27 | console.print("We've encountered a Permissions Error. Please close all browsers and try again.", style="bold red") 28 | console.print("If you are still having issues, make sure all browsers processes are closed in a Task Manager.", style="bold red") 29 | console.print("Nothing has been saved.", style="bold red") 30 | inquirer.confirm(message="Press enter to return menu.").execute() 31 | return 32 | return wrapper 33 | 34 | 35 | # noinspection PyProtectedMember 36 | @cookie_wrapper 37 | def get_forum_cookies(manager: Manager, browser: str) -> None: 38 | """Get the cookies for the forums""" 39 | auth_args: Dict = manager.config_manager.authentication_data 40 | for forum in SupportedDomains.supported_forums: 41 | try: 42 | cookie = get_cookie(browser, forum) 43 | auth_args['Forums'][f'{SupportedDomains.supported_forums_map[forum]}_xf_user_cookie'] = cookie._cookies[forum]['/']['xf_user'].value 44 | except KeyError: 45 | try: 46 | cookie = get_cookie(browser, "www." + forum) 47 | auth_args['Forums'][f'{SupportedDomains.supported_forums_map[forum]}_xf_user_cookie'] = cookie._cookies["www." + forum]['/']['xf_user'].value 48 | except KeyError: 49 | pass 50 | 51 | manager.cache_manager.save("browser", browser) 52 | 53 | 54 | def get_cookie(browser: str, domain: str): 55 | """Get the cookies for a specific domain""" 56 | if browser == 'chrome': 57 | cookie = browser_cookie3.chrome(domain_name=domain) 58 | elif browser == 'firefox': 59 | cookie = browser_cookie3.firefox(domain_name=domain) 60 | elif browser == 'edge': 61 | cookie = browser_cookie3.edge(domain_name=domain) 62 | elif browser == 'safari': 63 | cookie = browser_cookie3.safari(domain_name=domain) 64 | elif browser == 'opera': 65 | cookie = browser_cookie3.opera(domain_name=domain) 66 | elif browser == 'brave': 67 | cookie = browser_cookie3.brave(domain_name=domain) 68 | else: 69 | raise ValueError('Invalid browser specified') 70 | 71 | return cookie 72 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/postimg_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import itertools 4 | from typing import TYPE_CHECKING 5 | 6 | from aiolimiter import AsyncLimiter 7 | from yarl import URL 8 | 9 | from cyberdrop_dl.scraper.crawler import Crawler 10 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 11 | from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext 12 | 13 | if TYPE_CHECKING: 14 | from cyberdrop_dl.managers.manager import Manager 15 | 16 | 17 | class PostImgCrawler(Crawler): 18 | def __init__(self, manager: Manager): 19 | super().__init__(manager, "postimg", "PostImg") 20 | self.api_address = URL('https://postimg.cc/json') 21 | self.request_limiter = AsyncLimiter(10, 1) 22 | 23 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 24 | 25 | async def fetch(self, scrape_item: ScrapeItem) -> None: 26 | """Determines where to send the scrape item based on the url""" 27 | task_id = await self.scraping_progress.add_task(scrape_item.url) 28 | 29 | if "i.postimg.cc" in scrape_item.url.host: 30 | filename, ext = await get_filename_and_ext(scrape_item.url.name) 31 | await self.handle_file(scrape_item.url, scrape_item, filename, ext) 32 | elif "gallery" in scrape_item.url.parts: 33 | await self.album(scrape_item) 34 | else: 35 | await self.image(scrape_item) 36 | 37 | await self.scraping_progress.remove_task(task_id) 38 | 39 | @error_handling_wrapper 40 | async def album(self, scrape_item: ScrapeItem) -> None: 41 | """Scrapes an album""" 42 | data = {"action": "list", "album": scrape_item.url.raw_name, "page": 0} 43 | for i in itertools.count(1): 44 | data["page"] = i 45 | async with self.request_limiter: 46 | JSON_Resp = await self.client.post_data(self.domain, self.api_address, data=data) 47 | 48 | title = await self.create_title(scrape_item.url.raw_name, scrape_item.url.parts[2], None) 49 | 50 | for image in JSON_Resp['images']: 51 | link = URL(image[4]) 52 | filename, ext = image[2], image[3] 53 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 54 | await self.handle_file(link, new_scrape_item, filename, ext) 55 | 56 | if not JSON_Resp['has_page_next']: 57 | break 58 | 59 | @error_handling_wrapper 60 | async def image(self, scrape_item: ScrapeItem) -> None: 61 | """Scrapes an image""" 62 | if await self.check_complete_from_referer(scrape_item): 63 | return 64 | 65 | async with self.request_limiter: 66 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 67 | 68 | link = URL(soup.select_one("a[id=download]").get('href').replace("?dl=1", "")) 69 | filename, ext = await get_filename_and_ext(link.name) 70 | await self.handle_file(link, scrape_item, filename, ext) 71 | -------------------------------------------------------------------------------- /cyberdrop_dl/ui/progress/downloads_progress.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, TYPE_CHECKING 2 | 3 | from rich.console import Group 4 | from rich.panel import Panel 5 | from rich.progress import Progress, BarColumn 6 | 7 | if TYPE_CHECKING: 8 | from cyberdrop_dl.managers.manager import Manager 9 | 10 | 11 | class DownloadsProgress: 12 | """Class that keeps track of completed, skipped and failed files""" 13 | 14 | def __init__(self, manager: 'Manager'): 15 | self.manager = manager 16 | self.progress = Progress("[progress.description]{task.description}", 17 | BarColumn(bar_width=None), 18 | "[progress.percentage]{task.percentage:>3.2f}%", 19 | "{task.completed} of {task.total} Files") 20 | self.progress_group = Group(self.progress) 21 | 22 | self.total_files = 0 23 | self.completed_files_task_id = self.progress.add_task("[green]Completed", total=0) 24 | self.completed_files = 0 25 | self.previously_completed_files_task_id = self.progress.add_task("[yellow]Previously Downloaded", total=0) 26 | self.previously_completed_files = 0 27 | self.skipped_files_task_id = self.progress.add_task("[yellow]Skipped By Configuration", total=0) 28 | self.skipped_files = 0 29 | self.failed_files_task_id = self.progress.add_task("[red]Failed", total=0) 30 | self.failed_files = 0 31 | 32 | async def get_progress(self) -> Panel: 33 | """Returns the progress bar""" 34 | return Panel(self.progress_group, title=f"Config: {self.manager.config_manager.loaded_config}", border_style="green", padding=(1, 1)) 35 | 36 | async def update_total(self) -> None: 37 | """Updates the total number of files to be downloaded""" 38 | self.total_files = self.total_files + 1 39 | self.progress.update(self.completed_files_task_id, total=self.total_files) 40 | self.progress.update(self.previously_completed_files_task_id, total=self.total_files) 41 | self.progress.update(self.skipped_files_task_id, total=self.total_files) 42 | self.progress.update(self.failed_files_task_id, total=self.total_files) 43 | 44 | async def add_completed(self) -> None: 45 | """Adds a completed file to the progress bar""" 46 | self.progress.advance(self.completed_files_task_id, 1) 47 | self.completed_files += 1 48 | 49 | async def add_previously_completed(self, increase_total: bool = True) -> None: 50 | """Adds a previously completed file to the progress bar""" 51 | if increase_total: 52 | await self.update_total() 53 | self.previously_completed_files += 1 54 | self.progress.advance(self.previously_completed_files_task_id, 1) 55 | 56 | async def add_skipped(self) -> None: 57 | """Adds a skipped file to the progress bar""" 58 | self.progress.advance(self.skipped_files_task_id, 1) 59 | self.skipped_files += 1 60 | 61 | async def add_failed(self) -> None: 62 | """Adds a failed file to the progress bar""" 63 | self.progress.advance(self.failed_files_task_id, 1) 64 | self.failed_files += 1 65 | 66 | async def return_totals(self) -> Tuple[int, int, int, int]: 67 | """Returns the total number of completed, previously completed, skipped and failed files""" 68 | return self.completed_files, self.previously_completed_files, self.skipped_files, self.failed_files 69 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/imgbox_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | from typing import TYPE_CHECKING 6 | 7 | from aiolimiter import AsyncLimiter 8 | from yarl import URL 9 | 10 | from cyberdrop_dl.scraper.crawler import Crawler 11 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 12 | from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext 13 | 14 | if TYPE_CHECKING: 15 | from cyberdrop_dl.managers.manager import Manager 16 | 17 | 18 | class ImgBoxCrawler(Crawler): 19 | def __init__(self, manager: Manager): 20 | super().__init__(manager, "imgbox", "ImgBox") 21 | self.primary_base_domain = URL("https://imgbox.com") 22 | self.request_limiter = AsyncLimiter(10, 1) 23 | 24 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 25 | 26 | async def fetch(self, scrape_item: ScrapeItem) -> None: 27 | """Determines where to send the scrape item based on the url""" 28 | task_id = await self.scraping_progress.add_task(scrape_item.url) 29 | 30 | if "t" in scrape_item.url.host or "_" in scrape_item.url.name: 31 | scrape_item.url = self.primary_base_domain / scrape_item.url.name.split("_")[0] 32 | 33 | if "gallery/edit" in str(scrape_item.url): 34 | scrape_item.url = self.primary_base_domain / "g" / scrape_item.url.parts[-2] 35 | 36 | if "g" in scrape_item.url.parts: 37 | await self.album(scrape_item) 38 | else: 39 | await self.image(scrape_item) 40 | 41 | await self.scraping_progress.remove_task(task_id) 42 | 43 | @error_handling_wrapper 44 | async def album(self, scrape_item: ScrapeItem) -> None: 45 | """Scrapes an album""" 46 | async with self.request_limiter: 47 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 48 | 49 | title = await self.create_title(soup.select_one("div[id=gallery-view] h1").get_text().strip().rsplit(" - ", 1)[0], scrape_item.url.parts[2], None) 50 | 51 | scrape_item.part_of_album = True 52 | await scrape_item.add_to_parent_title(title) 53 | 54 | images = soup.find('div', attrs={'id': 'gallery-view-content'}) 55 | images = images.findAll("img") 56 | for link in images: 57 | link = URL(link.get('src').replace("thumbs", "images").replace("_b", "_o")) 58 | filename, ext = await get_filename_and_ext(link.name) 59 | await self.handle_file(link, scrape_item, filename, ext) 60 | 61 | @error_handling_wrapper 62 | async def image(self, scrape_item: ScrapeItem) -> None: 63 | """Scrapes an image""" 64 | if await self.check_complete_from_referer(scrape_item): 65 | return 66 | 67 | async with self.request_limiter: 68 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 69 | 70 | image = URL(soup.select_one("img[id=img]").get('src')) 71 | filename, ext = await get_filename_and_ext(image.name) 72 | await self.handle_file(image, scrape_item, filename, ext) 73 | 74 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 75 | 76 | async def parse_datetime(self, date: str) -> int: 77 | """Parses a datetime string into a unix timestamp""" 78 | date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S") 79 | return calendar.timegm(date.timetuple()) 80 | -------------------------------------------------------------------------------- /cyberdrop_dl/managers/log_manager.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING 2 | 3 | import aiofiles 4 | 5 | if TYPE_CHECKING: 6 | from pathlib import Path 7 | from yarl import URL 8 | 9 | from cyberdrop_dl.managers.manager import Manager 10 | 11 | 12 | class LogManager: 13 | def __init__(self, manager: 'Manager'): 14 | self.manager = manager 15 | self.main_log: Path = manager.path_manager.main_log 16 | self.last_post_log: Path = manager.path_manager.last_post_log 17 | self.unsupported_urls_log: Path = manager.path_manager.unsupported_urls_log 18 | self.download_error_log: Path = manager.path_manager.download_error_log 19 | self.scrape_error_log: Path = manager.path_manager.scrape_error_log 20 | 21 | def startup(self) -> None: 22 | """Startup process for the file manager""" 23 | self.main_log.unlink(missing_ok=True) 24 | self.main_log.touch(exist_ok=True) 25 | self.last_post_log.unlink(missing_ok=True) 26 | self.last_post_log.touch(exist_ok=True) 27 | self.unsupported_urls_log.unlink(missing_ok=True) 28 | self.unsupported_urls_log.touch(exist_ok=True) 29 | self.download_error_log.unlink(missing_ok=True) 30 | self.download_error_log.touch(exist_ok=True) 31 | self.scrape_error_log.unlink(missing_ok=True) 32 | self.scrape_error_log.touch(exist_ok=True) 33 | 34 | async def write_last_post_log(self, url: 'URL') -> None: 35 | """Writes to the last post log""" 36 | async with aiofiles.open(self.last_post_log, 'a') as f: 37 | await f.write(f"{url}\n") 38 | 39 | async def write_unsupported_urls_log(self, url: 'URL') -> None: 40 | """Writes to the unsupported urls log""" 41 | async with aiofiles.open(self.unsupported_urls_log, 'a') as f: 42 | await f.write(f"{url}\n") 43 | 44 | async def write_download_error_log(self, url: 'URL', error_message: str) -> None: 45 | """Writes to the download error log""" 46 | async with aiofiles.open(self.download_error_log, 'a') as f: 47 | await f.write(f"{url},{error_message}\n") 48 | 49 | async def write_scrape_error_log(self, url: 'URL', error_message: str) -> None: 50 | """Writes to the scrape error log""" 51 | async with aiofiles.open(self.scrape_error_log, 'a') as f: 52 | await f.write(f"{url},{error_message}\n") 53 | 54 | async def update_last_forum_post(self) -> None: 55 | """Updates the last forum post""" 56 | input_file = self.manager.path_manager.input_file 57 | base_urls = [] 58 | 59 | async with aiofiles.open(input_file, 'r') as f: 60 | current_urls = await f.readlines() 61 | 62 | for url in current_urls: 63 | if "http" not in url: 64 | continue 65 | if "post-" in url: 66 | url = url.rsplit("/", 1)[0] 67 | if not url.endswith("\n"): 68 | url += "\n" 69 | base_urls.append(url) 70 | 71 | last_post_file = self.last_post_log 72 | if not last_post_file.exists(): 73 | return 74 | 75 | async with aiofiles.open(last_post_file, 'r') as f: 76 | new_urls = await f.readlines() 77 | 78 | for url in new_urls: 79 | url_temp = url.rsplit("/", 1)[0] + "\n" 80 | if url_temp in base_urls: 81 | base_urls.remove(url_temp) 82 | base_urls.append(url) 83 | 84 | async with aiofiles.open(input_file, 'w') as f: 85 | await f.writelines(base_urls) 86 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/cyberdrop_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | from typing import TYPE_CHECKING 6 | 7 | from aiolimiter import AsyncLimiter 8 | from yarl import URL 9 | 10 | from cyberdrop_dl.scraper.crawler import Crawler 11 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 12 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper 13 | 14 | if TYPE_CHECKING: 15 | from cyberdrop_dl.managers.manager import Manager 16 | 17 | 18 | class CyberdropCrawler(Crawler): 19 | def __init__(self, manager: Manager): 20 | super().__init__(manager, "cyberdrop", "Cyberdrop") 21 | self.api_url = URL("https://api.cyberdrop.me/api/") 22 | self.primary_base_url = URL("https://cyberdrop.me/") 23 | self.request_limiter = AsyncLimiter(1.0, 2.0) 24 | 25 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 26 | 27 | async def fetch(self, scrape_item: ScrapeItem) -> None: 28 | """Determines where to send the scrape item based on the url""" 29 | task_id = await self.scraping_progress.add_task(scrape_item.url) 30 | 31 | if "a" in scrape_item.url.parts: 32 | scrape_item.url = scrape_item.url.with_query("nojs") 33 | await self.album(scrape_item) 34 | else: 35 | await self.file(scrape_item) 36 | 37 | await self.scraping_progress.remove_task(task_id) 38 | 39 | @error_handling_wrapper 40 | async def album(self, scrape_item: ScrapeItem) -> None: 41 | """Scrapes an album""" 42 | async with self.request_limiter: 43 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 44 | 45 | title = await self.create_title(soup.select_one("h1[id=title]").text, scrape_item.url.parts[2], None) 46 | date = await self.parse_datetime(soup.select("p[class=title]")[-1].text) 47 | 48 | links = soup.select("div[class*=image-container] a[class=image]") 49 | for link in links: 50 | link = link.get('href') 51 | if link.startswith("/"): 52 | link = self.primary_base_url.with_path(link) 53 | link = URL(link) 54 | 55 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True, None, date) 56 | self.manager.task_group.create_task(self.run(new_scrape_item)) 57 | 58 | @error_handling_wrapper 59 | async def file(self, scrape_item: ScrapeItem) -> None: 60 | """Scrapes a file""" 61 | if await self.check_complete_from_referer(scrape_item): 62 | return 63 | 64 | async with self.request_limiter: 65 | JSON_Resp = await self.client.get_json(self.domain, self.api_url / "file" / "info" / scrape_item.url.path[3:]) 66 | 67 | filename, ext = await get_filename_and_ext(JSON_Resp["name"]) 68 | 69 | async with self.request_limiter: 70 | JSON_Resp = await self.client.get_json(self.domain, self.api_url / "file" / "auth" / scrape_item.url.path[3:]) 71 | 72 | link = URL(JSON_Resp['url']) 73 | await self.handle_file(link, scrape_item, filename, ext) 74 | 75 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 76 | 77 | async def parse_datetime(self, date: str) -> int: 78 | """Parses a datetime string into a unix timestamp""" 79 | date = datetime.datetime.strptime(date, "%d.%m.%Y") 80 | return calendar.timegm(date.timetuple()) 81 | 82 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/fapello_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from aiolimiter import AsyncLimiter 6 | from yarl import URL 7 | 8 | from cyberdrop_dl.scraper.crawler import Crawler 9 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 10 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper 11 | 12 | if TYPE_CHECKING: 13 | from cyberdrop_dl.managers.manager import Manager 14 | 15 | 16 | class FapelloCrawler(Crawler): 17 | def __init__(self, manager: Manager): 18 | super().__init__(manager, "fapello", "Fapello") 19 | self.request_limiter = AsyncLimiter(5, 1) 20 | 21 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 22 | 23 | async def fetch(self, scrape_item: ScrapeItem) -> None: 24 | """Determines where to send the scrape item based on the url""" 25 | task_id = await self.scraping_progress.add_task(scrape_item.url) 26 | 27 | if not str(scrape_item.url).endswith("/"): 28 | scrape_item.url = URL(str(scrape_item.url) + "/") 29 | 30 | if scrape_item.url.parts[-2].isnumeric(): 31 | await self.post(scrape_item) 32 | else: 33 | await self.profile(scrape_item) 34 | 35 | await self.scraping_progress.remove_task(task_id) 36 | 37 | @error_handling_wrapper 38 | async def profile(self, scrape_item: ScrapeItem) -> None: 39 | """Scrapes a profile""" 40 | async with self.request_limiter: 41 | soup, response_url = await self.client.get_BS4_and_return_URL(self.domain, scrape_item.url) 42 | if response_url != scrape_item.url: 43 | return 44 | 45 | title = await self.create_title(soup.select_one('h2[class="font-semibold lg:text-2xl text-lg mb-2 mt-4"]').get_text(), None, None) 46 | 47 | content = soup.select("div[id=content] a") 48 | for post in content: 49 | if "javascript" in post.get('href'): 50 | video_tag = post.select_one('iframe') 51 | video_link = URL(video_tag.get('src')) 52 | new_scrape_item = await self.create_scrape_item(scrape_item, video_link, "", True) 53 | await self.handle_external_links(new_scrape_item) 54 | else: 55 | link = URL(post.get('href')) 56 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 57 | await self.handle_external_links(new_scrape_item) 58 | 59 | next_page = soup.select_one('div[id="next_page"] a') 60 | if next_page: 61 | next_page = next_page.get('href') 62 | if next_page: 63 | new_scrape_item = ScrapeItem(URL(next_page), scrape_item.parent_title) 64 | self.manager.task_group.create_task(self.run(new_scrape_item)) 65 | 66 | @error_handling_wrapper 67 | async def post(self, scrape_item: ScrapeItem) -> None: 68 | """Scrapes an album""" 69 | async with self.request_limiter: 70 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 71 | 72 | content = soup.select_one('div[class="flex justify-between items-center"]') 73 | content_tags = content.select("img") 74 | content_tags.extend(content.select("source")) 75 | 76 | for selection in content_tags: 77 | link = URL(selection.get('src')) 78 | filename, ext = await get_filename_and_ext(link.name) 79 | await self.handle_file(link, scrape_item, filename, ext) 80 | -------------------------------------------------------------------------------- /cyberdrop_dl/managers/progress_manager.py: -------------------------------------------------------------------------------- 1 | from dataclasses import field 2 | from typing import TYPE_CHECKING 3 | 4 | from rich.layout import Layout 5 | 6 | from cyberdrop_dl.ui.progress.downloads_progress import DownloadsProgress 7 | from cyberdrop_dl.ui.progress.file_progress import FileProgress 8 | from cyberdrop_dl.ui.progress.scraping_progress import ScrapingProgress 9 | from cyberdrop_dl.ui.progress.statistic_progress import DownloadStatsProgress, ScrapeStatsProgress 10 | from cyberdrop_dl.utils.utilities import log_with_color 11 | 12 | if TYPE_CHECKING: 13 | from cyberdrop_dl.managers.manager import Manager 14 | 15 | 16 | class ProgressManager: 17 | def __init__(self, manager: 'Manager'): 18 | # File Download Bars 19 | self.manager = manager 20 | self.file_progress: FileProgress = FileProgress(manager.config_manager.global_settings_data['UI_Options']['downloading_item_limit'], manager) 21 | 22 | # Scraping Printout 23 | self.scraping_progress: ScrapingProgress = ScrapingProgress(manager.config_manager.global_settings_data['UI_Options']['scraping_item_limit'], manager) 24 | 25 | # Overall Progress Bars & Stats 26 | self.download_progress: DownloadsProgress = DownloadsProgress(manager) 27 | self.download_stats_progress: DownloadStatsProgress = DownloadStatsProgress() 28 | self.scrape_stats_progress: ScrapeStatsProgress = ScrapeStatsProgress() 29 | 30 | self.ui_refresh_rate = manager.config_manager.global_settings_data['UI_Options']['refresh_rate'] 31 | 32 | self.layout: Layout = field(init=False) 33 | 34 | async def startup(self) -> None: 35 | """Startup process for the progress manager""" 36 | progress_layout = Layout() 37 | progress_layout.split_column( 38 | Layout(name="upper", ratio=1, minimum_size=8), 39 | Layout(renderable=await self.scraping_progress.get_progress(), name="Scraping", ratio=2), 40 | Layout(renderable=await self.file_progress.get_progress(), name="Downloads", ratio=2), 41 | ) 42 | progress_layout["upper"].split_row( 43 | Layout(renderable=await self.download_progress.get_progress(), name="Files", ratio=1), 44 | Layout(renderable=await self.scrape_stats_progress.get_progress(), name="Scrape Failures", ratio=1), 45 | Layout(renderable=await self.download_stats_progress.get_progress(), name="Download Failures", ratio=1), 46 | ) 47 | 48 | self.layout = progress_layout 49 | 50 | async def print_stats(self) -> None: 51 | """Prints the stats of the program""" 52 | await log_with_color("\nDownload Stats:", "cyan", 20) 53 | await log_with_color(f"Downloaded {self.download_progress.completed_files} files", "green", 20) 54 | await log_with_color(f"Previously Downloaded {self.download_progress.previously_completed_files} files", "yellow", 20) 55 | await log_with_color(f"Skipped By Config {self.download_progress.skipped_files} files", "yellow", 20) 56 | await log_with_color(f"Failed {self.download_stats_progress.failed_files} files", "red", 20) 57 | 58 | scrape_failures = await self.scrape_stats_progress.return_totals() 59 | await log_with_color("\nScrape Failures:", "cyan", 20) 60 | for key, value in scrape_failures.items(): 61 | await log_with_color(f"Scrape Failures ({key}): {value}", "red", 20) 62 | 63 | download_failures = await self.download_stats_progress.return_totals() 64 | await log_with_color("\nDownload Failures:", "cyan", 20) 65 | for key, value in download_failures.items(): 66 | await log_with_color(f"Download Failures ({key}): {value}", "red", 20) -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/erome_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from aiolimiter import AsyncLimiter 6 | from yarl import URL 7 | 8 | from cyberdrop_dl.scraper.crawler import Crawler 9 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 10 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper 11 | 12 | if TYPE_CHECKING: 13 | from cyberdrop_dl.managers.manager import Manager 14 | 15 | 16 | class EromeCrawler(Crawler): 17 | def __init__(self, manager: Manager): 18 | super().__init__(manager, "erome", "Erome") 19 | self.request_limiter = AsyncLimiter(10, 1) 20 | 21 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 22 | 23 | async def fetch(self, scrape_item: ScrapeItem) -> None: 24 | """Determines where to send the scrape item based on the url""" 25 | task_id = await self.scraping_progress.add_task(scrape_item.url) 26 | 27 | if "a" in scrape_item.url.parts: 28 | await self.album(scrape_item) 29 | else: 30 | await self.profile(scrape_item) 31 | 32 | await self.scraping_progress.remove_task(task_id) 33 | 34 | @error_handling_wrapper 35 | async def profile(self, scrape_item: ScrapeItem) -> None: 36 | """Scrapes a profile""" 37 | async with self.request_limiter: 38 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 39 | 40 | title = await self.create_title(scrape_item.url.name, None, None) 41 | albums = soup.select('a[class=album-link]') 42 | 43 | for album in albums: 44 | link = URL(album['href']) 45 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 46 | self.manager.task_group.create_task(self.run(new_scrape_item)) 47 | 48 | next_page = soup.select_one('a[rel="next"]') 49 | if next_page: 50 | next_page = next_page.get("href").split("page=")[-1] 51 | new_scrape_item = await self.create_scrape_item(scrape_item, scrape_item.url.with_query(f"page={next_page}"), "") 52 | self.manager.task_group.create_task(self.run(new_scrape_item)) 53 | 54 | @error_handling_wrapper 55 | async def album(self, scrape_item: ScrapeItem) -> None: 56 | """Scrapes an album""" 57 | album_id = scrape_item.url.parts[2] 58 | results = await self.get_album_results(album_id) 59 | 60 | async with self.request_limiter: 61 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 62 | 63 | title_portion = soup.select_one('title').text.rsplit(" - Porn")[0].strip() 64 | if not title_portion: 65 | title_portion = scrape_item.url.name 66 | title = await self.create_title(title_portion, scrape_item.url.parts[2], None) 67 | await scrape_item.add_to_parent_title(title) 68 | 69 | images = soup.select('img[class="img-front lasyload"]') 70 | vidoes = soup.select('div[class=media-group] div[class=video-lg] video source') 71 | 72 | for image in images: 73 | link = URL(image['data-src']) 74 | filename, ext = await get_filename_and_ext(link.name) 75 | if not await self.check_album_results(link, results): 76 | await self.handle_file(link, scrape_item, filename, ext) 77 | 78 | for video in vidoes: 79 | link = URL(video['src']) 80 | filename, ext = await get_filename_and_ext(link.name) 81 | if not await self.check_album_results(link, results): 82 | await self.handle_file(link, scrape_item, filename, ext) 83 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/pimpandhost_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | from datetime import datetime 5 | from typing import TYPE_CHECKING 6 | 7 | from aiolimiter import AsyncLimiter 8 | from yarl import URL 9 | 10 | from cyberdrop_dl.scraper.crawler import Crawler 11 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 12 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper 13 | 14 | if TYPE_CHECKING: 15 | from cyberdrop_dl.managers.manager import Manager 16 | 17 | 18 | class PimpAndHostCrawler(Crawler): 19 | def __init__(self, manager: Manager): 20 | super().__init__(manager, "pimpandhost", "PimpAndHost") 21 | self.request_limiter = AsyncLimiter(10, 1) 22 | 23 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 24 | 25 | async def fetch(self, scrape_item: ScrapeItem) -> None: 26 | """Determines where to send the scrape item based on the url""" 27 | task_id = await self.scraping_progress.add_task(scrape_item.url) 28 | 29 | if "album" in scrape_item.url.parts: 30 | await self.album(scrape_item) 31 | else: 32 | await self.image(scrape_item) 33 | 34 | await self.scraping_progress.remove_task(task_id) 35 | 36 | @error_handling_wrapper 37 | async def album(self, scrape_item: ScrapeItem) -> None: 38 | """Scrapes an album""" 39 | async with self.request_limiter: 40 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 41 | 42 | title = await self.create_title(soup.select_one("span[class=author-header__album-name]").get_text(), scrape_item.url.parts[2], None) 43 | date = soup.select_one("span[class=date-time]").get("title") 44 | date = await self.parse_datetime(date) 45 | 46 | files = soup.select('a[class*="image-wrapper center-cropped im-wr"]') 47 | for file in files: 48 | link = URL(file.get("href")) 49 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True, None, date) 50 | self.manager.task_group.create_task(self.run(new_scrape_item)) 51 | 52 | next_page = soup.select_one("li[class=next] a") 53 | if next_page: 54 | next_page = next_page.get("href") 55 | if next_page.startswith("/"): 56 | next_page = URL("https://pimpandhost.com" + next_page) 57 | new_scrape_item = await self.create_scrape_item(scrape_item, next_page, "", True, None, date) 58 | self.manager.task_group.create_task(self.run(new_scrape_item)) 59 | 60 | @error_handling_wrapper 61 | async def image(self, scrape_item: ScrapeItem) -> None: 62 | """Scrapes an image""" 63 | async with self.request_limiter: 64 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 65 | 66 | link = soup.select_one('.main-image-wrapper') 67 | link = link.get('data-src') 68 | link = URL("https:" + link) if link.startswith("//") else URL(link) 69 | 70 | date = soup.select_one("span[class=date-time]").get("title") 71 | date = await self.parse_datetime(date) 72 | 73 | new_scrape_item = await self.create_scrape_item(scrape_item, link, "", True, None, date) 74 | filename, ext = await get_filename_and_ext(link.name) 75 | await self.handle_file(link, new_scrape_item, filename, ext) 76 | 77 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 78 | 79 | async def parse_datetime(self, date: str) -> int: 80 | """Parses a datetime string into a unix timestamp""" 81 | date = datetime.strptime(date, '%A, %B %d, %Y %I:%M:%S%p %Z') 82 | return calendar.timegm(date.timetuple()) 83 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/mediafire_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | from typing import TYPE_CHECKING 6 | 7 | from aiolimiter import AsyncLimiter 8 | from mediafire import MediaFireApi, api 9 | from yarl import URL 10 | 11 | from cyberdrop_dl.clients.errors import ScrapeFailure 12 | from cyberdrop_dl.scraper.crawler import Crawler 13 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 14 | from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext 15 | 16 | if TYPE_CHECKING: 17 | from cyberdrop_dl.managers.manager import Manager 18 | 19 | 20 | class MediaFireCrawler(Crawler): 21 | def __init__(self, manager: Manager): 22 | super().__init__(manager, "mediafire", "mediafire") 23 | self.api = MediaFireApi() 24 | self.request_limiter = AsyncLimiter(5, 1) 25 | 26 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 27 | 28 | async def fetch(self, scrape_item: ScrapeItem) -> None: 29 | """Determines where to send the scrape item based on the url""" 30 | task_id = await self.scraping_progress.add_task(scrape_item.url) 31 | 32 | if "folder" in scrape_item.url.parts: 33 | await self.folder(scrape_item) 34 | else: 35 | await self.file(scrape_item) 36 | 37 | await self.scraping_progress.remove_task(task_id) 38 | 39 | @error_handling_wrapper 40 | async def folder(self, scrape_item: ScrapeItem) -> None: 41 | """Scrapes a folder of media""" 42 | folder_key = scrape_item.url.parts[2] 43 | folder_details = self.api.folder_get_info(folder_key=folder_key) 44 | 45 | title = await self.create_title(folder_details['folder_info']['name'], folder_key, None) 46 | 47 | chunk = 1 48 | chunk_size = 100 49 | while True: 50 | try: 51 | folder_contents = self.api.folder_get_content(folder_key=folder_key, content_type='files', chunk=chunk, chunk_size=chunk_size) 52 | except api.MediaFireConnectionError: 53 | raise ScrapeFailure(500, "MediaFire connection closed") 54 | files = folder_contents['folder_content']['files'] 55 | 56 | for file in files: 57 | date = await self.parse_datetime(file['created']) 58 | link = URL(file['links']['normal_download']) 59 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True, None, date) 60 | self.manager.task_group.create_task(self.run(new_scrape_item)) 61 | 62 | if folder_contents["folder_content"]["more_chunks"] == "yes": 63 | chunk += 1 64 | else: 65 | break 66 | 67 | @error_handling_wrapper 68 | async def file(self, scrape_item: ScrapeItem) -> None: 69 | """Scrapes a single file""" 70 | if await self.check_complete_from_referer(scrape_item): 71 | return 72 | 73 | async with self.request_limiter: 74 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 75 | 76 | date = await self.parse_datetime(soup.select('ul[class=details] li span')[-1].get_text()) 77 | scrape_item.possible_datetime = date 78 | link = URL(soup.select_one('a[id=downloadButton]').get('href')) 79 | filename, ext = await get_filename_and_ext(link.name) 80 | await self.handle_file(link, scrape_item, filename, ext) 81 | 82 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 83 | async def parse_datetime(self, date: str) -> int: 84 | """Parses a datetime string into a unix timestamp""" 85 | date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S") 86 | return calendar.timegm(date.timetuple()) 87 | -------------------------------------------------------------------------------- /cyberdrop_dl/ui/progress/statistic_progress.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | from rich.console import Group 4 | from rich.panel import Panel 5 | from rich.progress import Progress, BarColumn, TaskID 6 | 7 | 8 | class DownloadStatsProgress: 9 | """Class that keeps track of download failures and reasons""" 10 | 11 | def __init__(self): 12 | self.progress = Progress("[progress.description]{task.description}", 13 | BarColumn(bar_width=None), 14 | "[progress.percentage]{task.percentage:>3.2f}%", 15 | "{task.completed} of {task.total} Files") 16 | self.progress_group = Group(self.progress) 17 | 18 | self.failure_types: Dict[str, TaskID] = {} 19 | self.failed_files = 0 20 | 21 | async def get_progress(self) -> Panel: 22 | """Returns the progress bar""" 23 | return Panel(self.progress_group, title="Download Failures", border_style="green", padding=(1, 1)) 24 | 25 | async def update_total(self, total: int) -> None: 26 | """Updates the total number of files to be downloaded""" 27 | for key in self.failure_types: 28 | self.progress.update(self.failure_types[key], total=total) 29 | 30 | async def add_failure(self, failure_type: [str, int]) -> None: 31 | """Adds a failed file to the progress bar""" 32 | self.failed_files += 1 33 | if isinstance(failure_type, int): 34 | failure_type = str(failure_type) + " HTTP Status" 35 | 36 | if failure_type in self.failure_types: 37 | self.progress.advance(self.failure_types[failure_type], 1) 38 | else: 39 | self.failure_types[failure_type] = self.progress.add_task(failure_type, total=self.failed_files, completed=1) 40 | await self.update_total(self.failed_files) 41 | 42 | async def return_totals(self) -> Dict: 43 | """Returns the total number of failed files""" 44 | failures = {} 45 | for key, value in self.failure_types.items(): 46 | failures[key] = self.progress.tasks[value].completed 47 | return dict(sorted(failures.items())) 48 | 49 | 50 | class ScrapeStatsProgress: 51 | """Class that keeps track of scraping failures and reasons""" 52 | 53 | def __init__(self): 54 | self.progress = Progress("[progress.description]{task.description}", 55 | BarColumn(bar_width=None), 56 | "[progress.percentage]{task.percentage:>3.2f}%", 57 | "{task.completed} of {task.total} Files") 58 | self.progress_group = Group(self.progress) 59 | 60 | self.failure_types: Dict[str, TaskID] = {} 61 | self.failed_files = 0 62 | 63 | async def get_progress(self) -> Panel: 64 | """Returns the progress bar""" 65 | return Panel(self.progress_group, title="Scrape Failures", border_style="green", padding=(1, 1)) 66 | 67 | async def update_total(self, total: int) -> None: 68 | """Updates the total number of sites to be scraped""" 69 | for key in self.failure_types: 70 | self.progress.update(self.failure_types[key], total=total) 71 | 72 | async def add_failure(self, failure_type: [str, int]) -> None: 73 | """Adds a failed site to the progress bar""" 74 | self.failed_files += 1 75 | if isinstance(failure_type, int): 76 | failure_type = str(failure_type) + " HTTP Status" 77 | 78 | if failure_type in self.failure_types: 79 | self.progress.advance(self.failure_types[failure_type], 1) 80 | else: 81 | self.failure_types[failure_type] = self.progress.add_task(failure_type, total=self.failed_files, completed=1) 82 | await self.update_total(self.failed_files) 83 | 84 | async def return_totals(self) -> Dict: 85 | """Returns the total number of failed sites and reasons""" 86 | failures = {} 87 | for key, value in self.failure_types.items(): 88 | failures[key] = self.progress.tasks[value].completed 89 | return dict(sorted(failures.items())) 90 | -------------------------------------------------------------------------------- /cyberdrop_dl/managers/download_manager.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | import contextlib 5 | import shutil 6 | from base64 import b64encode 7 | from typing import TYPE_CHECKING 8 | 9 | from cyberdrop_dl.utils.utilities import FILE_FORMATS, log_debug 10 | 11 | if TYPE_CHECKING: 12 | from typing import Dict 13 | 14 | from cyberdrop_dl.managers.manager import Manager 15 | from cyberdrop_dl.utils.dataclasses.url_objects import MediaItem 16 | 17 | 18 | class FileLock: 19 | """Is this necessary? No. But I want it.""" 20 | def __init__(self): 21 | self._locked_files = {} 22 | 23 | async def check_lock(self, filename: str) -> None: 24 | """Checks if the file is locked""" 25 | try: 26 | await log_debug(f"Checking lock for {filename}", 40) 27 | await self._locked_files[filename].acquire() 28 | await log_debug(f"Lock for {filename} acquired", 40) 29 | except KeyError: 30 | await log_debug(f"Lock for {filename} does not exist", 40) 31 | self._locked_files[filename] = asyncio.Lock() 32 | await self._locked_files[filename].acquire() 33 | await log_debug(f"Lock for {filename} acquired", 40) 34 | 35 | async def release_lock(self, filename: str) -> None: 36 | """Releases the file lock""" 37 | with contextlib.suppress(KeyError, RuntimeError): 38 | await log_debug(f"Releasing lock for {filename}", 40) 39 | self._locked_files[filename].release() 40 | await log_debug(f"Lock for {filename} released", 40) 41 | 42 | 43 | class DownloadManager: 44 | def __init__(self, manager: Manager): 45 | self.manager = manager 46 | self._download_instances: Dict = {} 47 | 48 | self.file_lock = FileLock() 49 | 50 | self.download_limits = {'bunkr': 1, 'bunkrr': 1, 'cyberdrop': 1, 'cyberfile': 1, "pixeldrain": 2} 51 | 52 | async def get_download_limit(self, key: str) -> int: 53 | """Returns the download limit for a domain""" 54 | if key in self.download_limits: 55 | instances = self.download_limits[key] 56 | else: 57 | instances = self.manager.config_manager.global_settings_data['Rate_Limiting_Options']['max_simultaneous_downloads_per_domain'] 58 | 59 | if instances > self.manager.config_manager.global_settings_data['Rate_Limiting_Options']['max_simultaneous_downloads_per_domain']: 60 | instances = self.manager.config_manager.global_settings_data['Rate_Limiting_Options']['max_simultaneous_downloads_per_domain'] 61 | return instances 62 | 63 | async def basic_auth(self, username, password) -> str: 64 | """Returns a basic auth token""" 65 | token = b64encode(f"{username}:{password}".encode('utf-8')).decode("ascii") 66 | return f'Basic {token}' 67 | 68 | async def check_free_space(self) -> bool: 69 | """Checks if there is enough free space on the drive to continue operating""" 70 | free_space = shutil.disk_usage(self.manager.path_manager.download_dir.parent).free 71 | free_space_gb = free_space / 1024 ** 3 72 | return free_space_gb >= self.manager.config_manager.global_settings_data['General']['required_free_space'] 73 | 74 | async def check_allowed_filetype(self, media_item: MediaItem) -> bool: 75 | """Checks if the file type is allowed to download""" 76 | if media_item.ext in FILE_FORMATS['Images'] and self.manager.config_manager.settings_data['Ignore_Options']['exclude_images']: 77 | return False 78 | if media_item.ext in FILE_FORMATS['Videos'] and self.manager.config_manager.settings_data['Ignore_Options']['exclude_videos']: 79 | return False 80 | if media_item.ext in FILE_FORMATS['Audio'] and self.manager.config_manager.settings_data['Ignore_Options']['exclude_audio']: 81 | return False 82 | if (self.manager.config_manager.settings_data['Ignore_Options']['exclude_other'] and 83 | media_item.ext not in FILE_FORMATS['Images'] and media_item.ext not in FILE_FORMATS['Videos'] and 84 | media_item.ext not in FILE_FORMATS['Audio']): 85 | return False 86 | return True 87 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/redgifs_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from aiolimiter import AsyncLimiter 6 | from yarl import URL 7 | 8 | from cyberdrop_dl.scraper.crawler import Crawler 9 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 10 | from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext 11 | 12 | if TYPE_CHECKING: 13 | from cyberdrop_dl.managers.manager import Manager 14 | 15 | 16 | class RedGifsCrawler(Crawler): 17 | def __init__(self, manager: Manager): 18 | super().__init__(manager, "redgifs", "RedGifs") 19 | self.redgifs_api = URL("https://api.redgifs.com/") 20 | self.token = "" 21 | self.headers = {} 22 | self.request_limiter = AsyncLimiter(10, 1) 23 | 24 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 25 | 26 | async def fetch(self, scrape_item: ScrapeItem) -> None: 27 | """Determines where to send the scrape item based on the url""" 28 | task_id = await self.scraping_progress.add_task(scrape_item.url) 29 | 30 | if not self.token: 31 | await self.manage_token(self.redgifs_api / "v2/auth/temporary") 32 | 33 | if self.token: 34 | if "users" in scrape_item.url.parts: 35 | await self.user(scrape_item) 36 | else: 37 | await self.post(scrape_item) 38 | 39 | await self.scraping_progress.remove_task(task_id) 40 | 41 | @error_handling_wrapper 42 | async def user(self, scrape_item: ScrapeItem) -> None: 43 | """Scrapes a users page""" 44 | user_id = scrape_item.url.parts[-1].split(".")[0] 45 | 46 | page = 1 47 | total_pages = 1 48 | while page <= total_pages: 49 | async with self.request_limiter: 50 | JSON_Resp = await self.client.get_json(self.domain, (self.redgifs_api / "v2/users" / user_id / "search").with_query(f"order=new&count=40&page={page}"), headers_inc=self.headers) 51 | total_pages = JSON_Resp["pages"] 52 | gifs = JSON_Resp["gifs"] 53 | for gif in gifs: 54 | links = gif["urls"] 55 | date = gif["createDate"] 56 | title = await self.create_title(user_id, None, None) 57 | 58 | try: 59 | link = URL(links["hd"]) 60 | except (KeyError, TypeError): 61 | link = URL(links["sd"]) 62 | 63 | filename, ext = await get_filename_and_ext(link.name) 64 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True, date) 65 | await self.handle_file(link, new_scrape_item, filename, ext) 66 | page += 1 67 | 68 | @error_handling_wrapper 69 | async def post(self, scrape_item: ScrapeItem) -> None: 70 | """Scrapes a post""" 71 | post_id = scrape_item.url.parts[-1].split(".")[0] 72 | 73 | async with self.request_limiter: 74 | JSON_Resp = await self.client.get_json(self.domain, self.redgifs_api / "v2/gifs" / post_id, headers_inc=self.headers) 75 | 76 | title_part = JSON_Resp["gif"].get("title", "Loose Files") 77 | title = await self.create_title(title_part, None, None) 78 | links = JSON_Resp["gif"]["urls"] 79 | date = JSON_Resp["gif"]["createDate"] 80 | 81 | link = URL(links["hd"] if "hd" in links else links["sd"]) 82 | 83 | filename, ext = await get_filename_and_ext(link.name) 84 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True, date) 85 | await self.handle_file(link, new_scrape_item, filename, ext) 86 | 87 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 88 | 89 | @error_handling_wrapper 90 | async def manage_token(self, token_url: URL) -> None: 91 | """Gets/Sets the redgifs token and header""" 92 | async with self.request_limiter: 93 | json_obj = await self.client.get_json(self.domain, token_url) 94 | self.token = json_obj["token"] 95 | self.headers = {"Authorization": f"Bearer {self.token}"} 96 | -------------------------------------------------------------------------------- /cyberdrop_dl/managers/path_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dataclasses import field 3 | from pathlib import Path 4 | from typing import TYPE_CHECKING 5 | 6 | if TYPE_CHECKING: 7 | from cyberdrop_dl.managers.manager import Manager 8 | 9 | if os.getenv("PYCHARM_HOSTED") is not None: 10 | """This is for testing purposes only""" 11 | APP_STORAGE = Path("../AppData") 12 | DOWNLOAD_STORAGE = Path("../Downloads") 13 | else: 14 | APP_STORAGE = Path("./AppData") 15 | DOWNLOAD_STORAGE = Path("./Downloads") 16 | 17 | 18 | class PathManager: 19 | def __init__(self, manager: 'Manager'): 20 | self.manager = manager 21 | 22 | self.download_dir: Path = field(init=False) 23 | self.sorted_dir: Path = field(init=False) 24 | self.log_dir: Path = field(init=False) 25 | 26 | self.cache_dir: Path = field(init=False) 27 | self.config_dir: Path = field(init=False) 28 | 29 | self.input_file: Path = field(init=False) 30 | self.history_db: Path = field(init=False) 31 | 32 | self.main_log: Path = field(init=False) 33 | self.last_post_log: Path = field(init=False) 34 | self.unsupported_urls_log: Path = field(init=False) 35 | self.download_error_log: Path = field(init=False) 36 | self.scrape_error_log: Path = field(init=False) 37 | 38 | def pre_startup(self) -> None: 39 | if self.manager.args_manager.appdata_dir: 40 | global APP_STORAGE 41 | APP_STORAGE = Path(self.manager.args_manager.appdata_dir) / "AppData" 42 | 43 | self.cache_dir = APP_STORAGE / "Cache" 44 | self.config_dir = APP_STORAGE / "Configs" 45 | 46 | self.cache_dir.mkdir(parents=True, exist_ok=True) 47 | self.config_dir.mkdir(parents=True, exist_ok=True) 48 | 49 | def startup(self) -> None: 50 | """Startup process for the Directory Manager""" 51 | self.download_dir = self.manager.config_manager.settings_data['Files']['download_folder'] if not self.manager.args_manager.download_dir else self.manager.args_manager.download_dir 52 | self.sorted_dir = self.manager.config_manager.settings_data['Sorting']['sort_folder'] if not self.manager.args_manager.sort_folder else self.manager.args_manager.sort_folder 53 | self.log_dir = self.manager.config_manager.settings_data['Logs']['log_folder'] if not self.manager.args_manager.log_dir else self.manager.args_manager.log_dir 54 | self.input_file = self.manager.config_manager.settings_data['Files']['input_file'] if not self.manager.args_manager.input_file else self.manager.args_manager.input_file 55 | self.history_db = self.cache_dir / "cyberdrop.db" 56 | 57 | self.main_log = self.log_dir / (self.manager.config_manager.settings_data['Logs']['main_log_filename'] 58 | if not self.manager.args_manager.main_log_filename else self.manager.args_manager.main_log_filename) 59 | self.last_post_log = self.log_dir / (self.manager.config_manager.settings_data['Logs']['last_forum_post_filename'] 60 | if not self.manager.args_manager.last_forum_post_filename else self.manager.args_manager.last_forum_post_filename) 61 | self.unsupported_urls_log = self.log_dir / (self.manager.config_manager.settings_data['Logs']['unsupported_urls_filename'] 62 | if not self.manager.args_manager.unsupported_urls_filename else self.manager.args_manager.unsupported_urls_filename) 63 | self.download_error_log = self.log_dir / (self.manager.config_manager.settings_data['Logs']['download_error_urls_filename'] 64 | if not self.manager.args_manager.download_error_urls_filename else self.manager.args_manager.download_error_urls_filename) 65 | self.scrape_error_log = self.log_dir / (self.manager.config_manager.settings_data['Logs']['scrape_error_urls_filename'] 66 | if not self.manager.args_manager.scrape_error_urls_filename else self.manager.args_manager.scrape_error_urls_filename) 67 | 68 | self.log_dir.mkdir(parents=True, exist_ok=True) 69 | if not self.input_file.is_file(): 70 | self.input_file.touch(exist_ok=True) 71 | self.history_db.touch(exist_ok=True) 72 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/realbooru_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from aiolimiter import AsyncLimiter 6 | from yarl import URL 7 | 8 | from cyberdrop_dl.scraper.crawler import Crawler 9 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 10 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper, log 11 | 12 | if TYPE_CHECKING: 13 | from cyberdrop_dl.managers.manager import Manager 14 | 15 | 16 | class RealBooruCrawler(Crawler): 17 | def __init__(self, manager: Manager): 18 | super().__init__(manager, "realbooru", "RealBooru") 19 | self.primary_base_url = URL("https://realbooru.com") 20 | self.request_limiter = AsyncLimiter(10, 1) 21 | 22 | self.cookies_set = False 23 | 24 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 25 | 26 | async def fetch(self, scrape_item: ScrapeItem) -> None: 27 | """Determines where to send the scrape item based on the url""" 28 | task_id = await self.scraping_progress.add_task(scrape_item.url) 29 | 30 | await self.set_cookies() 31 | 32 | if "tags" in scrape_item.url.query_string: 33 | await self.tag(scrape_item) 34 | elif "id" in scrape_item.url.query_string: 35 | await self.file(scrape_item) 36 | else: 37 | await log(f"Scrape Failed: Unknown URL Path for {scrape_item.url}", 40) 38 | await self.manager.progress_manager.scrape_stats_progress.add_failure("Unsupported Link") 39 | 40 | await self.scraping_progress.remove_task(task_id) 41 | 42 | @error_handling_wrapper 43 | async def tag(self, scrape_item: ScrapeItem) -> None: 44 | """Scrapes an album""" 45 | async with self.request_limiter: 46 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 47 | 48 | title_portion = scrape_item.url.query['tags'].strip() 49 | title = await self.create_title(title_portion, None, None) 50 | 51 | content = soup.select("div[class=items] div a") 52 | for file_page in content: 53 | link = file_page.get('href') 54 | if link.startswith("/"): 55 | link = f"{self.primary_base_url}{link}" 56 | link = URL(link, encoded=True) 57 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 58 | self.manager.task_group.create_task(self.run(new_scrape_item)) 59 | 60 | next_page = soup.select_one("a[alt=next]") 61 | if next_page is not None: 62 | next_page = next_page.get("href") 63 | if next_page is not None: 64 | if next_page.startswith("?"): 65 | next_page = scrape_item.url.with_query(next_page[1:]) 66 | else: 67 | next_page = URL(next_page) 68 | new_scrape_item = await self.create_scrape_item(scrape_item, next_page, "") 69 | self.manager.task_group.create_task(self.run(new_scrape_item)) 70 | 71 | @error_handling_wrapper 72 | async def file(self, scrape_item: ScrapeItem) -> None: 73 | """Scrapes an image""" 74 | async with self.request_limiter: 75 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 76 | image = soup.select_one("img[id=image]") 77 | if image: 78 | link = URL(image.get('src')) 79 | filename, ext = await get_filename_and_ext(link.name) 80 | await self.handle_file(link, scrape_item, filename, ext) 81 | video = soup.select_one("video source") 82 | if video: 83 | link = URL(video.get('src')) 84 | filename, ext = await get_filename_and_ext(link.name) 85 | await self.handle_file(link, scrape_item, filename, ext) 86 | 87 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 88 | 89 | async def set_cookies(self): 90 | """Sets the cookies for the client""" 91 | if self.cookies_set: 92 | return 93 | 94 | self.client.client_manager.cookies.update_cookies({"resize-original": "1"}, response_url=self.primary_base_url) 95 | 96 | self.cookies_set = True 97 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/rule34xxx_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from aiolimiter import AsyncLimiter 6 | from yarl import URL 7 | 8 | from cyberdrop_dl.scraper.crawler import Crawler 9 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 10 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper, log 11 | 12 | if TYPE_CHECKING: 13 | from cyberdrop_dl.managers.manager import Manager 14 | 15 | 16 | class Rule34XXXCrawler(Crawler): 17 | def __init__(self, manager: Manager): 18 | super().__init__(manager, "rule34.xxx", "Rule34XXX") 19 | self.primary_base_url = URL("https://rule34.xxx") 20 | self.request_limiter = AsyncLimiter(10, 1) 21 | 22 | self.cookies_set = False 23 | 24 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 25 | 26 | async def fetch(self, scrape_item: ScrapeItem) -> None: 27 | """Determines where to send the scrape item based on the url""" 28 | task_id = await self.scraping_progress.add_task(scrape_item.url) 29 | 30 | await self.set_cookies() 31 | 32 | if "tags" in scrape_item.url.query_string: 33 | await self.tag(scrape_item) 34 | elif "id" in scrape_item.url.query_string: 35 | await self.file(scrape_item) 36 | else: 37 | await log(f"Scrape Failed: Unknown URL Path for {scrape_item.url}", 40) 38 | await self.manager.progress_manager.scrape_stats_progress.add_failure("Unsupported Link") 39 | 40 | await self.scraping_progress.remove_task(task_id) 41 | 42 | @error_handling_wrapper 43 | async def tag(self, scrape_item: ScrapeItem) -> None: 44 | """Scrapes an album""" 45 | async with self.request_limiter: 46 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 47 | 48 | title_portion = scrape_item.url.query['tags'].strip() 49 | title = await self.create_title(title_portion, None, None) 50 | 51 | content = soup.select("div[class=image-list] span a") 52 | for file_page in content: 53 | link = file_page.get('href') 54 | if link.startswith("/"): 55 | link = f"{self.primary_base_url}{link}" 56 | link = URL(link, encoded=True) 57 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 58 | self.manager.task_group.create_task(self.run(new_scrape_item)) 59 | 60 | next_page = soup.select_one("a[alt=next]") 61 | if next_page is not None: 62 | next_page = next_page.get("href") 63 | if next_page is not None: 64 | if next_page.startswith("?"): 65 | next_page = scrape_item.url.with_query(next_page[1:]) 66 | else: 67 | next_page = URL(next_page) 68 | new_scrape_item = await self.create_scrape_item(scrape_item, next_page, "") 69 | self.manager.task_group.create_task(self.run(new_scrape_item)) 70 | 71 | @error_handling_wrapper 72 | async def file(self, scrape_item: ScrapeItem) -> None: 73 | """Scrapes an image""" 74 | async with self.request_limiter: 75 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 76 | image = soup.select_one("img[id=image]") 77 | if image: 78 | link = URL(image.get('src')) 79 | filename, ext = await get_filename_and_ext(link.name) 80 | await self.handle_file(link, scrape_item, filename, ext) 81 | video = soup.select_one("video source") 82 | if video: 83 | link = URL(video.get('src')) 84 | filename, ext = await get_filename_and_ext(link.name) 85 | await self.handle_file(link, scrape_item, filename, ext) 86 | 87 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 88 | 89 | async def set_cookies(self): 90 | """Sets the cookies for the client""" 91 | if self.cookies_set: 92 | return 93 | 94 | self.client.client_manager.cookies.update_cookies({"resize-original": "1"}, response_url=self.primary_base_url) 95 | 96 | self.cookies_set = True 97 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/rule34xyz_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | from typing import TYPE_CHECKING 6 | 7 | from aiolimiter import AsyncLimiter 8 | from yarl import URL 9 | 10 | from cyberdrop_dl.scraper.crawler import Crawler 11 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 12 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper 13 | 14 | if TYPE_CHECKING: 15 | from cyberdrop_dl.managers.manager import Manager 16 | 17 | 18 | class Rule34XYZCrawler(Crawler): 19 | def __init__(self, manager: Manager): 20 | super().__init__(manager, "rule34.xyz", "Rule34XYZ") 21 | self.primary_base_url = URL("https://rule34.xyz") 22 | self.request_limiter = AsyncLimiter(10, 1) 23 | 24 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 25 | 26 | async def fetch(self, scrape_item: ScrapeItem) -> None: 27 | """Determines where to send the scrape item based on the url""" 28 | task_id = await self.scraping_progress.add_task(scrape_item.url) 29 | 30 | if "post" in scrape_item.url.parts: 31 | await self.file(scrape_item) 32 | else: 33 | await self.tag(scrape_item) 34 | 35 | await self.scraping_progress.remove_task(task_id) 36 | 37 | @error_handling_wrapper 38 | async def tag(self, scrape_item: ScrapeItem) -> None: 39 | """Scrapes an album""" 40 | async with self.request_limiter: 41 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 42 | 43 | title = await self.create_title(scrape_item.url.parts[1], None, None) 44 | 45 | content_block = soup.select_one('div[class="box-grid ng-star-inserted"]') 46 | content = content_block.select("a[class=boxInner]") 47 | for file_page in content: 48 | link = file_page.get('href') 49 | if link.startswith("/"): 50 | link = f"{self.primary_base_url}{link}" 51 | link = URL(link) 52 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 53 | self.manager.task_group.create_task(self.run(new_scrape_item)) 54 | if not content: 55 | return 56 | 57 | if len(scrape_item.url.parts) > 2: 58 | page = int(scrape_item.url.parts[-1]) 59 | next_page = scrape_item.url.with_path(f"/{scrape_item.url.parts[1]}/page/{page + 1}") 60 | else: 61 | next_page = scrape_item.url.with_path(f"/{scrape_item.url.parts[1]}/page/2") 62 | new_scrape_item = await self.create_scrape_item(scrape_item, next_page, "") 63 | self.manager.task_group.create_task(self.run(new_scrape_item)) 64 | 65 | @error_handling_wrapper 66 | async def file(self, scrape_item: ScrapeItem) -> None: 67 | """Scrapes an image""" 68 | async with self.request_limiter: 69 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 70 | 71 | date = await self.parse_datetime(soup.select_one('div[class="posted ng-star-inserted"]').text.split("(")[1].split(")")[0]) 72 | scrape_item.date = date 73 | 74 | image = soup.select_one('img[class*="img shadow-base"]') 75 | if image: 76 | link = image.get('src') 77 | if link.startswith("/"): 78 | link = f"{self.primary_base_url}{link}" 79 | link = URL(link) 80 | filename, ext = await get_filename_and_ext(link.name) 81 | await self.handle_file(link, scrape_item, filename, ext) 82 | video = soup.select_one("video source") 83 | if video: 84 | link = video.get('src') 85 | if link.startswith("/"): 86 | link = f"{self.primary_base_url}{link}" 87 | link = URL(link) 88 | filename, ext = await get_filename_and_ext(link.name) 89 | await self.handle_file(link, scrape_item, filename, ext) 90 | 91 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 92 | 93 | async def parse_datetime(self, date: str) -> int: 94 | """Parses a datetime string into a unix timestamp""" 95 | date = datetime.datetime.strptime(date, "%b %d, %Y, %I:%M:%S %p") 96 | return calendar.timegm(date.timetuple()) 97 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/ehentai_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | from typing import TYPE_CHECKING 6 | 7 | from aiolimiter import AsyncLimiter 8 | from yarl import URL 9 | 10 | from cyberdrop_dl.scraper.crawler import Crawler 11 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 12 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper, log 13 | 14 | if TYPE_CHECKING: 15 | from cyberdrop_dl.managers.manager import Manager 16 | 17 | 18 | class EHentaiCrawler(Crawler): 19 | def __init__(self, manager: Manager): 20 | super().__init__(manager, "e-hentai", "E-Hentai") 21 | self.request_limiter = AsyncLimiter(10, 1) 22 | self.warnings_set = False 23 | 24 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 25 | 26 | async def fetch(self, scrape_item: ScrapeItem) -> None: 27 | """Determines where to send the scrape item based on the url""" 28 | task_id = await self.scraping_progress.add_task(scrape_item.url) 29 | 30 | if "g" in scrape_item.url.parts: 31 | if not self.warnings_set: 32 | await self.set_no_warnings(scrape_item) 33 | await self.album(scrape_item) 34 | elif "s" in scrape_item.url.parts: 35 | await self.image(scrape_item) 36 | else: 37 | await log(f"Scrape Failed: Unknown URL Path for {scrape_item.url}", 40) 38 | await self.manager.progress_manager.scrape_stats_progress.add_failure("Unsupported Link") 39 | 40 | await self.scraping_progress.remove_task(task_id) 41 | 42 | @error_handling_wrapper 43 | async def album(self, scrape_item: ScrapeItem) -> None: 44 | """Scrapes an album""" 45 | async with self.request_limiter: 46 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 47 | 48 | title = await self.create_title(soup.select_one("h1[id=gn]").get_text(), None, None) 49 | date = await self.parse_datetime(soup.select_one("td[class=gdt2]").get_text()) 50 | 51 | images = soup.select("div[class=gdtm] div a") 52 | for image in images: 53 | link = URL(image.get('href')) 54 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True, None, date) 55 | self.manager.task_group.create_task(self.run(new_scrape_item)) 56 | 57 | next_page_opts = soup.select('td[onclick="document.location=this.firstChild.href"]') 58 | next_page = None 59 | for maybe_next in next_page_opts: 60 | if maybe_next.get_text() == ">": 61 | next_page = maybe_next.select_one('a') 62 | break 63 | if next_page is not None: 64 | next_page = URL(next_page.get('href')) 65 | if next_page is not None: 66 | new_scrape_item = await self.create_scrape_item(scrape_item, next_page, "") 67 | self.manager.task_group.create_task(self.run(new_scrape_item)) 68 | 69 | @error_handling_wrapper 70 | async def image(self, scrape_item: ScrapeItem) -> None: 71 | """Scrapes an image""" 72 | if await self.check_complete_from_referer(scrape_item): 73 | return 74 | 75 | async with self.request_limiter: 76 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 77 | image = soup.select_one("img[id=img]") 78 | link = URL(image.get('src')) 79 | filename, ext = await get_filename_and_ext(link.name) 80 | await self.handle_file(link, scrape_item, filename, ext) 81 | 82 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 83 | 84 | async def parse_datetime(self, date: str) -> int: 85 | """Parses a datetime string into a unix timestamp""" 86 | if date.count(":") == 1: 87 | date = date + ":00" 88 | date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S") 89 | return calendar.timegm(date.timetuple()) 90 | 91 | @error_handling_wrapper 92 | async def set_no_warnings(self, scrape_item) -> None: 93 | """Sets the no warnings cookie""" 94 | self.warnings_set = True 95 | async with self.request_limiter: 96 | scrape_item.url = URL(str(scrape_item.url) + "/").update_query("nw=session") 97 | await self.client.get_BS4(self.domain, scrape_item.url) 98 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/pixeldrain_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | from typing import TYPE_CHECKING 6 | 7 | from aiolimiter import AsyncLimiter 8 | from yarl import URL 9 | 10 | from cyberdrop_dl.clients.errors import NoExtensionFailure 11 | from cyberdrop_dl.scraper.crawler import Crawler 12 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 13 | from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext 14 | 15 | if TYPE_CHECKING: 16 | from cyberdrop_dl.managers.manager import Manager 17 | 18 | 19 | class PixelDrainCrawler(Crawler): 20 | def __init__(self, manager: Manager): 21 | super().__init__(manager, "pixeldrain", "PixelDrain") 22 | self.api_address = URL('https://pixeldrain.com/api/') 23 | self.request_limiter = AsyncLimiter(10, 1) 24 | 25 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 26 | 27 | async def fetch(self, scrape_item: ScrapeItem) -> None: 28 | """Determines where to send the scrape item based on the url""" 29 | task_id = await self.scraping_progress.add_task(scrape_item.url) 30 | 31 | if "l" in scrape_item.url.parts: 32 | await self.folder(scrape_item) 33 | else: 34 | await self.file(scrape_item) 35 | 36 | await self.scraping_progress.remove_task(task_id) 37 | 38 | @error_handling_wrapper 39 | async def folder(self, scrape_item: ScrapeItem) -> None: 40 | """Scrapes a folder""" 41 | album_id = scrape_item.url.parts[2] 42 | results = await self.get_album_results(album_id) 43 | 44 | async with self.request_limiter: 45 | JSON_Resp = await self.client.get_json(self.domain, self.api_address / "list" / scrape_item.url.parts[-1]) 46 | 47 | title = await self.create_title(JSON_Resp['title'], scrape_item.url.parts[2], None) 48 | 49 | for file in JSON_Resp['files']: 50 | link = await self.create_download_link(file['id']) 51 | date = await self.parse_datetime(file['date_upload'].replace("T", " ").split(".")[0].strip("Z")) 52 | try: 53 | filename, ext = await get_filename_and_ext(file['name']) 54 | except NoExtensionFailure: 55 | if "image" or "video" in file["mime_type"]: 56 | filename, ext = await get_filename_and_ext(file['name'] + "." + file["mime_type"].split("/")[-1]) 57 | else: 58 | raise NoExtensionFailure() 59 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True, None, date) 60 | if not await self.check_album_results(link, results): 61 | await self.handle_file(link, new_scrape_item, filename, ext) 62 | 63 | @error_handling_wrapper 64 | async def file(self, scrape_item: ScrapeItem) -> None: 65 | """Scrapes a file""" 66 | async with self.request_limiter: 67 | JSON_Resp = await self.client.get_json(self.domain, self.api_address / "file" / scrape_item.url.parts[-1] / "info") 68 | 69 | link = await self.create_download_link(JSON_Resp['id']) 70 | date = await self.parse_datetime(JSON_Resp['date_upload'].replace("T", " ").split(".")[0]) 71 | try: 72 | filename, ext = await get_filename_and_ext(JSON_Resp['name']) 73 | except NoExtensionFailure: 74 | if "image" or "video" in JSON_Resp["mime_type"]: 75 | filename, ext = await get_filename_and_ext(JSON_Resp['name'] + "." + JSON_Resp["mime_type"].split("/")[-1]) 76 | else: 77 | raise NoExtensionFailure() 78 | new_scrape_item = await self.create_scrape_item(scrape_item, link, "", False, None, date) 79 | await self.handle_file(link, new_scrape_item, filename, ext) 80 | 81 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 82 | 83 | async def parse_datetime(self, date: str) -> int: 84 | """Parses a datetime string into a unix timestamp""" 85 | try: 86 | date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S") 87 | except ValueError: 88 | date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%SZ") 89 | return calendar.timegm(date.timetuple()) 90 | 91 | async def create_download_link(self, file_id: str) -> URL: 92 | """Creates a download link for a file""" 93 | link = (self.api_address / "file" / file_id).with_query('download') 94 | return link 95 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/imgkiwi_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | import re 6 | from typing import TYPE_CHECKING 7 | 8 | from aiolimiter import AsyncLimiter 9 | from yarl import URL 10 | 11 | from cyberdrop_dl.scraper.crawler import Crawler 12 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 13 | from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext 14 | 15 | if TYPE_CHECKING: 16 | from cyberdrop_dl.managers.manager import Manager 17 | 18 | 19 | class ImgKiwiCrawler(Crawler): 20 | def __init__(self, manager: Manager): 21 | super().__init__(manager, "img.kiwi", "ImgKiwi") 22 | self.request_limiter = AsyncLimiter(10, 1) 23 | 24 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 25 | 26 | async def fetch(self, scrape_item: ScrapeItem) -> None: 27 | """Determines where to send the scrape item based on the url""" 28 | task_id = await self.scraping_progress.add_task(scrape_item.url) 29 | 30 | if await self.check_direct_link(scrape_item.url): 31 | await self.handle_direct_link(scrape_item) 32 | elif "album" in scrape_item.url.parts: 33 | await self.album(scrape_item) 34 | else: 35 | await self.image(scrape_item) 36 | 37 | await self.scraping_progress.remove_task(task_id) 38 | 39 | @error_handling_wrapper 40 | async def album(self, scrape_item: ScrapeItem) -> None: 41 | """Scrapes an album""" 42 | async with self.request_limiter: 43 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 44 | 45 | title = await self.create_title(soup.select_one("a[data-text=album-name]").get_text(), scrape_item.url.parts[2], None) 46 | link_next = URL(soup.select_one("a[id=list-most-recent-link]").get("href")) 47 | 48 | while True: 49 | async with self.request_limiter: 50 | soup = await self.client.get_BS4(self.domain, link_next) 51 | links = soup.select("a[href*=image]") 52 | for link in links: 53 | link = URL(link.get('href')) 54 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 55 | self.manager.task_group.create_task(self.run(new_scrape_item)) 56 | 57 | link_next = soup.select_one('a[data-pagination=next]') 58 | if link_next is not None: 59 | link_next = link_next.get('href') 60 | if link_next is not None: 61 | link_next = URL(link_next) 62 | else: 63 | break 64 | else: 65 | break 66 | 67 | @error_handling_wrapper 68 | async def image(self, scrape_item: ScrapeItem) -> None: 69 | """Scrapes an image""" 70 | if await self.check_complete_from_referer(scrape_item): 71 | return 72 | 73 | async with self.request_limiter: 74 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 75 | 76 | link = soup.select_one("div[id=image-viewer-container] img").get('src') 77 | link = URL(link.replace(".md.", ".").replace(".th.", ".")) 78 | date = soup.select_one("p[class*=description-meta] span").get("title") 79 | date = await self.parse_datetime(date) 80 | scrape_item.possible_datetime = date 81 | 82 | filename, ext = await get_filename_and_ext(link.name) 83 | await self.handle_file(link, scrape_item, filename, ext) 84 | 85 | @error_handling_wrapper 86 | async def handle_direct_link(self, scrape_item: ScrapeItem) -> None: 87 | """Handles a direct link""" 88 | filename, ext = await get_filename_and_ext(scrape_item.url.name) 89 | await self.handle_file(scrape_item.url, scrape_item, filename, ext) 90 | 91 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 92 | 93 | async def parse_datetime(self, date: str) -> int: 94 | """Parses a datetime string into a unix timestamp""" 95 | date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S") 96 | return calendar.timegm(date.timetuple()) 97 | 98 | async def check_direct_link(self, url: URL) -> bool: 99 | """Determines if the url is a direct link or not""" 100 | mapping_direct = [r'img.kiwi/images/'] 101 | return any(re.search(domain, str(url)) for domain in mapping_direct) 102 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/scrolller_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | from typing import TYPE_CHECKING 5 | 6 | from aiolimiter import AsyncLimiter 7 | from yarl import URL 8 | 9 | from cyberdrop_dl.scraper.crawler import Crawler 10 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 11 | from cyberdrop_dl.utils.utilities import error_handling_wrapper, log, get_filename_and_ext 12 | 13 | if TYPE_CHECKING: 14 | from cyberdrop_dl.managers.manager import Manager 15 | 16 | 17 | class ScrolllerCrawler(Crawler): 18 | def __init__(self, manager: Manager): 19 | super().__init__(manager, "scrolller", "Scrolller") 20 | self.scrolller_api = URL("https://api.scrolller.com/api/v2/graphql") 21 | self.headers = {"Content-Type": "application/json"} 22 | self.request_limiter = AsyncLimiter(10, 1) 23 | 24 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 25 | 26 | async def fetch(self, scrape_item: ScrapeItem) -> None: 27 | """Determines where to send the scrape item based on the url""" 28 | task_id = await self.scraping_progress.add_task(scrape_item.url) 29 | 30 | if "r" in scrape_item.url.parts: 31 | await self.subreddit(scrape_item) 32 | else: 33 | await log(f"Scrape Failed: Unknown URL Path for {scrape_item.url}", 40) 34 | await self.manager.progress_manager.scrape_stats_progress.add_failure("Unsupported Link") 35 | 36 | await self.scraping_progress.remove_task(task_id) 37 | 38 | @error_handling_wrapper 39 | async def subreddit(self, scrape_item: ScrapeItem) -> None: 40 | """Scrapes an album""" 41 | subreddit = scrape_item.url.parts[-1] 42 | title = await self.create_title(subreddit, None, None) 43 | await scrape_item.add_to_parent_title(title) 44 | scrape_item.part_of_album = True 45 | 46 | request_body = { 47 | "query": """ 48 | query SubredditQuery( 49 | $url: String! 50 | $filter: SubredditPostFilter 51 | $iterator: String 52 | ) { 53 | getSubreddit(url: $url) { 54 | title 55 | children( 56 | limit: 10000 57 | iterator: $iterator 58 | filter: $filter 59 | disabledHosts: null 60 | ) { 61 | iterator 62 | items { 63 | title 64 | mediaSources { 65 | url 66 | } 67 | blurredMediaSources { 68 | url 69 | } 70 | } 71 | } 72 | } 73 | } 74 | """, 75 | "variables": { 76 | "url": f"/r/{subreddit}", 77 | "filter": None, 78 | "hostsDown": None 79 | }, 80 | } 81 | 82 | iterator = None 83 | iterations = 0 84 | 85 | while True: 86 | request_body["variables"]["iterator"] = iterator 87 | data = await self.client.post_data(self.domain, self.scrolller_api, data=json.dumps(request_body)) 88 | 89 | if data: 90 | items = data["data"]["getSubreddit"]["children"]["items"] 91 | 92 | for item in items: 93 | media_sources = [item for item in item['mediaSources'] if ".webp" not in item['url']] 94 | if media_sources: 95 | highest_res_image_url = URL(media_sources[-1]['url']) 96 | filename, ext = await get_filename_and_ext(highest_res_image_url.name) 97 | await self.handle_file(highest_res_image_url, scrape_item, filename, ext) 98 | 99 | prev_iterator = iterator 100 | iterator = data["data"]["getSubreddit"]["children"]["iterator"] 101 | 102 | if not items or iterator == prev_iterator: 103 | break 104 | if iterations > 0 and iterator is None: 105 | break 106 | else: 107 | break 108 | 109 | iterations += 1 110 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/toonily_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | from typing import TYPE_CHECKING 6 | 7 | from aiolimiter import AsyncLimiter 8 | from yarl import URL 9 | 10 | from cyberdrop_dl.scraper.crawler import Crawler 11 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 12 | from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext 13 | 14 | if TYPE_CHECKING: 15 | from cyberdrop_dl.managers.manager import Manager 16 | 17 | 18 | class ToonilyCrawler(Crawler): 19 | def __init__(self, manager: Manager): 20 | super().__init__(manager, "toonily", "Toonily") 21 | self.primary_base_domain = URL("https://toonily.com") 22 | self.request_limiter = AsyncLimiter(10, 1) 23 | 24 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 25 | 26 | async def fetch(self, scrape_item: ScrapeItem) -> None: 27 | """Determines where to send the scrape item based on the url""" 28 | task_id = await self.scraping_progress.add_task(scrape_item.url) 29 | 30 | if "chapter" in scrape_item.url.name: 31 | await self.chapter(scrape_item) 32 | elif "webtoon" in scrape_item.url.parts: 33 | await self.series(scrape_item) 34 | else: 35 | await self.handle_direct_link(scrape_item) 36 | 37 | await self.scraping_progress.remove_task(task_id) 38 | 39 | @error_handling_wrapper 40 | async def series(self, scrape_item: ScrapeItem) -> None: 41 | """Scrapes an album""" 42 | async with self.request_limiter: 43 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 44 | 45 | chapters = soup.select("li[class*=wp-manga-chapter] a") 46 | for chapter in chapters: 47 | chapter_path = chapter.get("href") 48 | if chapter_path.endswith("/"): 49 | chapter_path = chapter_path[:-1] 50 | if chapter_path.startswith("/"): 51 | chapter_path = self.primary_base_domain / chapter_path[1:] 52 | else: 53 | chapter_path = URL(chapter_path) 54 | new_scrape_item = await self.create_scrape_item(scrape_item, chapter_path, "", True) 55 | self.manager.task_group.create_task(self.run(new_scrape_item)) 56 | 57 | @error_handling_wrapper 58 | async def chapter(self, scrape_item: ScrapeItem) -> None: 59 | """Scrapes an image""" 60 | async with self.request_limiter: 61 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 62 | 63 | title_parts = soup.select_one("title").get_text().split(" - ") 64 | series_name = title_parts[0] 65 | chapter_title = title_parts[1] 66 | series_title = await self.create_title(series_name, None, None) 67 | await scrape_item.add_to_parent_title(series_title) 68 | await scrape_item.add_to_parent_title(chapter_title) 69 | 70 | scripts = soup.select("script") 71 | date = None 72 | for script in scripts: 73 | if "datePublished" in script.get_text(): 74 | date = script.get_text().split("datePublished\":\"")[1].split("+")[0] 75 | date = await self.parse_datetime(date) 76 | break 77 | 78 | scrape_item.possible_datetime = date if date else scrape_item.possible_datetime 79 | scrape_item.part_of_album = True 80 | 81 | images = soup.select('div[class="page-break no-gaps"] img') 82 | for image in images: 83 | link = image.get("data-src") 84 | if not link: 85 | continue 86 | link = URL(link) 87 | 88 | filename, ext = await get_filename_and_ext(link.name) 89 | await self.handle_file(link, scrape_item, filename, ext) 90 | 91 | @error_handling_wrapper 92 | async def handle_direct_link(self, scrape_item: ScrapeItem) -> None: 93 | """Handles a direct link""" 94 | scrape_item.url = scrape_item.url.with_name(scrape_item.url.name) 95 | filename, ext = await get_filename_and_ext(scrape_item.url.name) 96 | await self.handle_file(scrape_item.url, scrape_item, filename, ext) 97 | 98 | 99 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 100 | 101 | async def parse_datetime(self, date: str) -> int: 102 | """Parses a datetime string into a unix timestamp""" 103 | date = datetime.datetime.strptime(date, "%Y-%m-%dT%H:%M:%S") 104 | return calendar.timegm(date.timetuple()) 105 | 106 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/imgur_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from aiolimiter import AsyncLimiter 6 | from yarl import URL 7 | 8 | from cyberdrop_dl.clients.errors import ScrapeFailure, FailedLoginFailure 9 | from cyberdrop_dl.scraper.crawler import Crawler 10 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 11 | from cyberdrop_dl.utils.utilities import error_handling_wrapper, log, get_filename_and_ext 12 | 13 | if TYPE_CHECKING: 14 | from cyberdrop_dl.managers.manager import Manager 15 | 16 | 17 | class ImgurCrawler(Crawler): 18 | def __init__(self, manager: Manager): 19 | super().__init__(manager, "imgur", "Imgur") 20 | self.imgur_api = URL("https://api.imgur.com/3/") 21 | self.imgur_client_id = self.manager.config_manager.authentication_data["Imgur"]["imgur_client_id"] 22 | self.imgur_client_remaining = 12500 23 | self.headers = {"Authorization": f"Client-ID {self.imgur_client_id}"} 24 | self.request_limiter = AsyncLimiter(10, 1) 25 | 26 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 27 | 28 | async def fetch(self, scrape_item: ScrapeItem) -> None: 29 | """Determines where to send the scrape item based on the url""" 30 | task_id = await self.scraping_progress.add_task(scrape_item.url) 31 | 32 | if "i.imgur.com" in scrape_item.url.host: 33 | await self.handle_direct(scrape_item) 34 | elif "a" in scrape_item.url.parts: 35 | await self.album(scrape_item) 36 | else: 37 | await self.image(scrape_item) 38 | 39 | await self.scraping_progress.remove_task(task_id) 40 | 41 | @error_handling_wrapper 42 | async def album(self, scrape_item: ScrapeItem) -> None: 43 | """Scrapes an album""" 44 | if self.imgur_client_id == "": 45 | await log("To scrape imgur content, you need to provide a client id", 30) 46 | raise FailedLoginFailure(status=401, message="No Imgur Client ID provided") 47 | await self.check_imgur_credits() 48 | 49 | album_id = scrape_item.url.parts[-1] 50 | 51 | async with self.request_limiter: 52 | JSON_Obj = await self.client.get_json(self.domain, self.imgur_api / f"album/{album_id}", headers_inc=self.headers) 53 | title_part = JSON_Obj["data"].get("title", album_id) 54 | title = await self.create_title(title_part, scrape_item.url.parts[2], None) 55 | 56 | async with self.request_limiter: 57 | JSON_Obj = await self.client.get_json(self.domain, self.imgur_api / f"album/{album_id}/images", headers_inc=self.headers) 58 | 59 | for image in JSON_Obj["data"]: 60 | link = URL(image["link"]) 61 | date = image["datetime"] 62 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True, date) 63 | await self.handle_direct(new_scrape_item) 64 | 65 | @error_handling_wrapper 66 | async def image(self, scrape_item: ScrapeItem) -> None: 67 | """Scrapes an image""" 68 | if self.imgur_client_id == "": 69 | await log("To scrape imgur content, you need to provide a client id", 30) 70 | raise FailedLoginFailure(status=401, message="No Imgur Client ID provided") 71 | await self.check_imgur_credits() 72 | 73 | image_id = scrape_item.url.parts[-1] 74 | async with self.request_limiter: 75 | JSON_Obj = await self.client.get_json(self.domain, self.imgur_api / f"image/{image_id}", headers_inc=self.headers) 76 | 77 | date = JSON_Obj["data"]["datetime"] 78 | link = URL(JSON_Obj["data"]["link"]) 79 | new_scrape_item = await self.create_scrape_item(scrape_item, link, "", True, date) 80 | await self.handle_direct(new_scrape_item) 81 | 82 | @error_handling_wrapper 83 | async def handle_direct(self, scrape_item: ScrapeItem) -> None: 84 | """Scrapes an image""" 85 | filename, ext = await get_filename_and_ext(scrape_item.url.name) 86 | if ext.lower() == ".gifv" or ext.lower() == ".mp4": 87 | filename = filename.replace(ext, ".mp4") 88 | ext = ".mp4" 89 | scrape_item.url = URL("https://imgur.com/download") / filename.replace(ext, "") 90 | await self.handle_file(scrape_item.url, scrape_item, filename, ext) 91 | 92 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 93 | 94 | async def check_imgur_credits(self) -> None: 95 | """Checks the remaining credits""" 96 | credits_obj = await self.client.get_json(self.domain, self.imgur_api / "credits", headers_inc=self.headers) 97 | self.imgur_client_remaining = credits_obj["data"]["ClientRemaining"] 98 | if self.imgur_client_remaining < 100: 99 | raise ScrapeFailure(429, "Imgur API rate limit reached") 100 | -------------------------------------------------------------------------------- /cyberdrop_dl/ui/progress/scraping_progress.py: -------------------------------------------------------------------------------- 1 | from typing import List, TYPE_CHECKING 2 | 3 | from rich.console import Group 4 | from rich.panel import Panel 5 | from rich.progress import Progress, SpinnerColumn, TaskID 6 | from yarl import URL 7 | 8 | if TYPE_CHECKING: 9 | from cyberdrop_dl.managers.manager import Manager 10 | 11 | 12 | async def adjust_title(s: str, length: int = 40, placeholder: str = "...") -> str: 13 | """Collapse and truncate or pad the given string to fit in the given length""" 14 | return f"{s[:length - len(placeholder)]}{placeholder}" if len(s) >= length else s.ljust(length) 15 | 16 | 17 | class ScrapingProgress: 18 | """Class that manages the download progress of individual files""" 19 | def __init__(self, visible_tasks_limit: int, manager: 'Manager'): 20 | self.manager = manager 21 | 22 | self.progress = Progress(SpinnerColumn(), 23 | "[progress.description]{task.description}") 24 | self.overflow = Progress("[progress.description]{task.description}") 25 | self.queue = Progress("[progress.description]{task.description}") 26 | self.progress_group = Group(self.progress, self.overflow, self.queue) 27 | 28 | self.color = "plum3" 29 | self.type_str = "Files" 30 | self.progress_str = "[{color}]{description}" 31 | self.overflow_str = "[{color}]... And {number} Other Links" 32 | self.queue_str = "[{color}]... And {number} Links In Scrape Queue" 33 | self.overflow_task_id = self.overflow.add_task(self.overflow_str.format(color=self.color, number=0, type_str=self.type_str), visible=False) 34 | self.queue_task_id = self.queue.add_task(self.queue_str.format(color=self.color, number=0, type_str=self.type_str), visible=False) 35 | 36 | self.visible_tasks: List[TaskID] = [] 37 | self.invisible_tasks: List[TaskID] = [] 38 | self.tasks_visibility_limit = visible_tasks_limit 39 | 40 | async def get_progress(self) -> Panel: 41 | """Returns the progress bar""" 42 | return Panel(self.progress_group, title="Scraping", border_style="green", padding=(1, 1)) 43 | 44 | async def get_queue_length(self) -> int: 45 | """Returns the number of tasks in the scraper queue""" 46 | total = 0 47 | 48 | for scraper in self.manager.scrape_mapper.existing_crawlers.values(): 49 | total += scraper.waiting_items 50 | 51 | return total 52 | 53 | async def redraw(self, passed=False) -> None: 54 | """Redraws the progress bar""" 55 | while len(self.visible_tasks) > self.tasks_visibility_limit: 56 | task_id = self.visible_tasks.pop(0) 57 | self.invisible_tasks.append(task_id) 58 | self.progress.update(task_id, visible=False) 59 | while len(self.invisible_tasks) > 0 and len(self.visible_tasks) < self.tasks_visibility_limit: 60 | task_id = self.invisible_tasks.pop(0) 61 | self.visible_tasks.append(task_id) 62 | self.progress.update(task_id, visible=True) 63 | 64 | if len(self.invisible_tasks) > 0: 65 | self.overflow.update(self.overflow_task_id, description=self.overflow_str.format(color=self.color, number=len(self.invisible_tasks), type_str=self.type_str), visible=True) 66 | else: 67 | self.overflow.update(self.overflow_task_id, visible=False) 68 | 69 | queue_length = await self.get_queue_length() 70 | if queue_length > 0: 71 | self.queue.update(self.queue_task_id, description=self.queue_str.format(color=self.color, number=queue_length, type_str=self.type_str), visible=True) 72 | else: 73 | self.queue.update(self.queue_task_id, visible=False) 74 | 75 | if not passed: 76 | await self.manager.progress_manager.file_progress.redraw(True) 77 | 78 | async def add_task(self, url: URL) -> TaskID: 79 | """Adds a new task to the progress bar""" 80 | if len(self.visible_tasks) >= self.tasks_visibility_limit: 81 | task_id = self.progress.add_task(self.progress_str.format(color=self.color, description=str(url)), visible=False) 82 | self.invisible_tasks.append(task_id) 83 | else: 84 | task_id = self.progress.add_task(self.progress_str.format(color=self.color, description=str(url))) 85 | self.visible_tasks.append(task_id) 86 | await self.redraw() 87 | return task_id 88 | 89 | async def remove_task(self, task_id: TaskID) -> None: 90 | """Removes a task from the progress bar""" 91 | if task_id in self.visible_tasks: 92 | self.visible_tasks.remove(task_id) 93 | self.progress.update(task_id, visible=False) 94 | elif task_id in self.invisible_tasks: 95 | self.invisible_tasks.remove(task_id) 96 | elif task_id == self.overflow_task_id: 97 | self.overflow.update(task_id, visible=False) 98 | else: 99 | raise ValueError("Task ID not found") 100 | await self.redraw() 101 | -------------------------------------------------------------------------------- /cyberdrop_dl/managers/args_manager.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import field 4 | from pathlib import Path 5 | 6 | from cyberdrop_dl.utils.args.args import parse_args 7 | 8 | 9 | class ArgsManager: 10 | def __init__(self): 11 | self.parsed_args = {} 12 | 13 | self.proxy = "" 14 | self.flaresolverr = "" 15 | 16 | self.all_configs = False 17 | self.sort_all_configs = False 18 | self.retry = False 19 | 20 | self.immediate_download = False 21 | self.no_ui = False 22 | self.load_config_from_args = False 23 | self.load_config_name = "" 24 | self.other_links: list = [] 25 | 26 | # Files 27 | self.input_file = None 28 | self.download_dir = None 29 | self.config_file = None 30 | self.appdata_dir = None 31 | self.log_dir = None 32 | 33 | # Sorting 34 | self.sort_downloads = field(init=False) 35 | self.sort_folder = None 36 | 37 | # Logs 38 | self.main_log_filename = None 39 | self.last_forum_post_filename = None 40 | self.unsupported_urls_filename = None 41 | self.download_error_urls_filename = None 42 | self.scrape_error_urls_filename = None 43 | 44 | # UI 45 | self.vi_mode = None 46 | 47 | def startup(self) -> None: 48 | """Parses arguments and sets variables accordingly""" 49 | if self.parsed_args: 50 | return 51 | 52 | self.parsed_args = parse_args().__dict__ 53 | 54 | self.immediate_download = self.parsed_args['download'] 55 | self.load_config_name = self.parsed_args['config'] 56 | self.vi_mode = self.parsed_args['vi_mode'] 57 | 58 | if self.parsed_args['no_ui']: 59 | self.immediate_download = True 60 | self.no_ui = True 61 | 62 | if self.load_config_name: 63 | self.load_config_from_args = True 64 | 65 | if self.parsed_args['download_all_configs']: 66 | self.all_configs = True 67 | self.immediate_download = True 68 | 69 | if self.parsed_args['sort_all_configs']: 70 | self.sort_all_configs = True 71 | self.all_configs = True 72 | self.immediate_download = True 73 | 74 | if self.parsed_args['retry_failed']: 75 | self.retry = True 76 | self.immediate_download = True 77 | 78 | if self.parsed_args['input_file']: 79 | self.input_file = Path(self.parsed_args['input_file']) 80 | if self.parsed_args['output_folder']: 81 | self.download_dir = Path(self.parsed_args['output_folder']) 82 | if self.parsed_args['appdata_folder']: 83 | self.appdata_dir = Path(self.parsed_args['appdata_folder']) 84 | if self.parsed_args['config_file']: 85 | self.config_file = Path(self.parsed_args['config_file']) 86 | self.immediate_download = True 87 | if self.parsed_args['log_folder']: 88 | self.log_dir = Path(self.parsed_args['log_folder']) 89 | if self.parsed_args['sort_downloads']: 90 | self.sort_downloads = True 91 | if self.parsed_args['sort_folder']: 92 | self.sort_folder = Path(self.parsed_args['sort_folder']) 93 | 94 | if self.parsed_args['main_log_filename']: 95 | self.main_log_filename = self.parsed_args['main_log_filename'] 96 | if self.parsed_args['last_forum_post_filename']: 97 | self.last_forum_post_filename = self.parsed_args['last_forum_post_filename'] 98 | if self.parsed_args['unsupported_urls_filename']: 99 | self.unsupported_urls_filename = self.parsed_args['unsupported_urls_filename'] 100 | if self.parsed_args['download_error_urls_filename']: 101 | self.download_error_urls_filename = self.parsed_args['download_error_urls_filename'] 102 | if self.parsed_args['scrape_error_urls_filename']: 103 | self.scrape_error_urls_filename = self.parsed_args['scrape_error_urls_filename'] 104 | 105 | if self.parsed_args['proxy']: 106 | self.proxy = self.parsed_args['proxy'] 107 | if self.parsed_args['flaresolverr']: 108 | self.flaresolverr = self.parsed_args['flaresolverr'] 109 | 110 | self.other_links = self.parsed_args['links'] 111 | 112 | del self.parsed_args['download'] 113 | del self.parsed_args['download_all_configs'] 114 | del self.parsed_args['config'] 115 | del self.parsed_args['no_ui'] 116 | del self.parsed_args['retry_failed'] 117 | del self.parsed_args['input_file'] 118 | del self.parsed_args['output_folder'] 119 | del self.parsed_args['appdata_folder'] 120 | del self.parsed_args['config_file'] 121 | del self.parsed_args['log_folder'] 122 | del self.parsed_args['proxy'] 123 | del self.parsed_args['links'] 124 | del self.parsed_args['sort_downloads'] 125 | del self.parsed_args['sort_folder'] 126 | -------------------------------------------------------------------------------- /cyberdrop_dl/utils/args/config_definitions.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Dict 4 | 5 | from cyberdrop_dl.managers.path_manager import APP_STORAGE, DOWNLOAD_STORAGE 6 | 7 | 8 | authentication_settings: Dict = { 9 | "Forums": { 10 | "celebforum_xf_user_cookie": "", 11 | "celebforum_username": "", 12 | "celebforum_password": "", 13 | "f95zone_xf_user_cookie": "", 14 | "f95zone_username": "", 15 | "f95zone_password": "", 16 | "leakedmodels_xf_user_cookie": "", 17 | "leakedmodels_username": "", 18 | "leakedmodels_password": "", 19 | "nudostar_xf_user_cookie": "", 20 | "nudostar_username": "", 21 | "nudostar_password": "", 22 | "simpcity_xf_user_cookie": "", 23 | "simpcity_username": "", 24 | "simpcity_password": "", 25 | "socialmediagirls_xf_user_cookie": "", 26 | "socialmediagirls_username": "", 27 | "socialmediagirls_password": "", 28 | "xbunker_xf_user_cookie": "", 29 | "xbunker_username": "", 30 | "xbunker_password": "", 31 | }, 32 | "GoFile": { 33 | "gofile_api_key": "", 34 | }, 35 | "Imgur": { 36 | "imgur_client_id": "", 37 | }, 38 | "JDownloader": { 39 | "jdownloader_username": "", 40 | "jdownloader_password": "", 41 | "jdownloader_device": "", 42 | }, 43 | "PixelDrain": { 44 | "pixeldrain_api_key": "", 45 | }, 46 | "Reddit": { 47 | "reddit_personal_use_script": "", 48 | "reddit_secret": "", 49 | } 50 | } 51 | 52 | 53 | settings: Dict = { 54 | "Download_Options": { 55 | "block_download_sub_folders": False, 56 | "disable_download_attempt_limit": False, 57 | "disable_file_timestamps": False, 58 | "include_album_id_in_folder_name": False, 59 | "include_thread_id_in_folder_name": False, 60 | "remove_domains_from_folder_names": False, 61 | "remove_generated_id_from_filenames": False, 62 | "scrape_single_forum_post": False, 63 | "separate_posts": False, 64 | "skip_download_mark_completed": False, 65 | }, 66 | "Files": { 67 | "input_file": str(APP_STORAGE / "Configs" / "{config}" / "URLs.txt"), 68 | "download_folder": str(DOWNLOAD_STORAGE), 69 | }, 70 | "Logs": { 71 | "log_folder": str(APP_STORAGE / "Configs" / "{config}" / "Logs"), 72 | "main_log_filename": "downloader.log", 73 | "last_forum_post_filename": "Last_Scraped_Forum_Posts.txt", 74 | "unsupported_urls_filename": "Unsupported_URLs.txt", 75 | "download_error_urls_filename": "Download_Error_URLs.csv", 76 | "scrape_error_urls_filename": "Scrape_Error_URLs.csv", 77 | }, 78 | "File_Size_Limits": { 79 | "maximum_image_size": 0, 80 | "maximum_other_size": 0, 81 | "maximum_video_size": 0, 82 | "minimum_image_size": 0, 83 | "minimum_other_size": 0, 84 | "minimum_video_size": 0, 85 | }, 86 | "Ignore_Options": { 87 | "exclude_videos": False, 88 | "exclude_images": False, 89 | "exclude_audio": False, 90 | "exclude_other": False, 91 | "ignore_coomer_ads": False, 92 | "skip_hosts": [], 93 | "only_hosts": [], 94 | }, 95 | "Runtime_Options": { 96 | "ignore_history": False, 97 | "log_level": 10, 98 | "skip_check_for_partial_files": False, 99 | "skip_check_for_empty_folders": False, 100 | "delete_partial_files": False, 101 | "send_unsupported_to_jdownloader": False, 102 | "update_last_forum_post": False, 103 | }, 104 | "Sorting": { 105 | "sort_downloads": False, 106 | "sort_folder": str(DOWNLOAD_STORAGE / "Cyberdrop-DL Sorted Downloads"), 107 | "sort_incremementer_format": " ({i})", 108 | "sorted_audio": "{sort_dir}/{base_dir}/Audio/{filename}{ext}", 109 | "sorted_image": "{sort_dir}/{base_dir}/Images/{filename}{ext}", 110 | "sorted_other": "{sort_dir}/{base_dir}/Other/{filename}{ext}", 111 | "sorted_video": "{sort_dir}/{base_dir}/Videos/{filename}{ext}", 112 | } 113 | } 114 | 115 | 116 | global_settings: Dict = { 117 | "General": { 118 | "allow_insecure_connections": False, 119 | "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0", 120 | "proxy": "", 121 | "flaresolverr": "", 122 | "max_file_name_length": 95, 123 | "max_folder_name_length": 60, 124 | "required_free_space": 5, 125 | }, 126 | "Rate_Limiting_Options": { 127 | "connection_timeout": 15, 128 | "download_attempts": 10, 129 | "read_timeout": 300, 130 | "rate_limit": 50, 131 | "download_delay": 0.5, 132 | "max_simultaneous_downloads": 15, 133 | "max_simultaneous_downloads_per_domain": 5, 134 | }, 135 | "UI_Options": { 136 | "vi_mode": False, 137 | "refresh_rate": 10, 138 | "scraping_item_limit": 5, 139 | "downloading_item_limit": 5, 140 | } 141 | } 142 | 143 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/imgbb_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | import re 6 | from typing import TYPE_CHECKING 7 | 8 | from aiolimiter import AsyncLimiter 9 | from yarl import URL 10 | 11 | from cyberdrop_dl.scraper.crawler import Crawler 12 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 13 | from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext 14 | 15 | if TYPE_CHECKING: 16 | from cyberdrop_dl.managers.manager import Manager 17 | 18 | 19 | class ImgBBCrawler(Crawler): 20 | def __init__(self, manager: Manager): 21 | super().__init__(manager, "imgbb", "ImgBB") 22 | self.primary_base_domain = URL("https://ibb.co") 23 | self.request_limiter = AsyncLimiter(10, 1) 24 | 25 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 26 | 27 | async def fetch(self, scrape_item: ScrapeItem) -> None: 28 | """Determines where to send the scrape item based on the url""" 29 | task_id = await self.scraping_progress.add_task(scrape_item.url) 30 | 31 | if await self.check_direct_link(scrape_item.url): 32 | image_id = scrape_item.url.parts[1] 33 | scrape_item.url = self.primary_base_domain / image_id 34 | 35 | scrape_item.url = self.primary_base_domain / scrape_item.url.path[1:] 36 | if "album" in scrape_item.url.parts: 37 | await self.album(scrape_item) 38 | else: 39 | await self.image(scrape_item) 40 | 41 | await self.scraping_progress.remove_task(task_id) 42 | 43 | @error_handling_wrapper 44 | async def album(self, scrape_item: ScrapeItem) -> None: 45 | """Scrapes an album""" 46 | async with self.request_limiter: 47 | soup = await self.client.get_BS4(self.domain, scrape_item.url / "sub") 48 | 49 | title = await self.create_title(soup.select_one("a[data-text=album-name]").get_text(), scrape_item.url.parts[2], None) 50 | albums = soup.select("a[class='image-container --media']") 51 | for album in albums: 52 | sub_album_link = URL(album.get('href')) 53 | new_scrape_item = await self.create_scrape_item(scrape_item, sub_album_link, title, True) 54 | self.manager.task_group.create_task(self.run(new_scrape_item)) 55 | 56 | async with self.request_limiter: 57 | soup = await self.client.get_BS4(self.domain, scrape_item.url / "sub") 58 | link_next = URL(soup.select_one("a[id=list-most-recent-link]").get("href")) 59 | 60 | while True: 61 | async with self.request_limiter: 62 | soup = await self.client.get_BS4(self.domain, link_next) 63 | links = soup.select("a[class*=image-container]") 64 | for link in links: 65 | link = URL(link.get('href')) 66 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 67 | self.manager.task_group.create_task(self.run(new_scrape_item)) 68 | 69 | link_next = soup.select_one('a[data-pagination=next]') 70 | if link_next is not None: 71 | link_next = link_next.get('href') 72 | if link_next is not None: 73 | link_next = URL(link_next) 74 | else: 75 | break 76 | else: 77 | break 78 | 79 | @error_handling_wrapper 80 | async def image(self, scrape_item: ScrapeItem) -> None: 81 | """Scrapes an image""" 82 | if await self.check_complete_from_referer(scrape_item): 83 | return 84 | 85 | async with self.request_limiter: 86 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 87 | 88 | link = URL(soup.select_one("div[id=image-viewer-container] img").get('src')) 89 | date = soup.select_one("p[class*=description-meta] span").get("title") 90 | date = await self.parse_datetime(date) 91 | scrape_item.possible_datetime = date 92 | 93 | filename, ext = await get_filename_and_ext(link.name) 94 | await self.handle_file(link, scrape_item, filename, ext) 95 | 96 | @error_handling_wrapper 97 | async def handle_direct_link(self, scrape_item: ScrapeItem) -> None: 98 | """Handles a direct link""" 99 | scrape_item.url = scrape_item.url.with_name(scrape_item.url.name.replace('.md.', '.').replace('.th.', '.')) 100 | filename, ext = await get_filename_and_ext(scrape_item.url.name) 101 | await self.handle_file(scrape_item.url, scrape_item, filename, ext) 102 | 103 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 104 | 105 | async def parse_datetime(self, date: str) -> int: 106 | """Parses a datetime string into a unix timestamp""" 107 | date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S") 108 | return calendar.timegm(date.timetuple()) 109 | 110 | async def check_direct_link(self, url: URL) -> bool: 111 | """Determines if the url is a direct link or not""" 112 | mapping_direct = [r'i.ibb.co',] 113 | return any(re.search(domain, str(url)) for domain in mapping_direct) 114 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/imageban_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | from typing import TYPE_CHECKING 6 | 7 | from aiolimiter import AsyncLimiter 8 | from yarl import URL 9 | 10 | from cyberdrop_dl.scraper.crawler import Crawler 11 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 12 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper 13 | 14 | if TYPE_CHECKING: 15 | from cyberdrop_dl.managers.manager import Manager 16 | 17 | 18 | class ImageBanCrawler(Crawler): 19 | def __init__(self, manager: Manager): 20 | super().__init__(manager, "imageban", "ImageBan") 21 | self.request_limiter = AsyncLimiter(10, 1) 22 | 23 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 24 | 25 | async def fetch(self, scrape_item: ScrapeItem) -> None: 26 | """Determines where to send the scrape item based on the url""" 27 | task_id = await self.scraping_progress.add_task(scrape_item.url) 28 | 29 | if "a" in scrape_item.url.parts: 30 | await self.album(scrape_item) 31 | elif "c" in scrape_item.url.parts: 32 | await self.compilation(scrape_item) 33 | elif "show" in scrape_item.url.parts: 34 | await self.image(scrape_item) 35 | else: 36 | await self.handle_direct(scrape_item) 37 | 38 | await self.scraping_progress.remove_task(task_id) 39 | 40 | @error_handling_wrapper 41 | async def album(self, scrape_item: ScrapeItem) -> None: 42 | """Scrapes a gallery""" 43 | async with self.request_limiter: 44 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 45 | 46 | title = await self.create_title(soup.select_one("title").get_text().replace("Просмотр альбома: ", ""), scrape_item.url.parts[2], None) 47 | content_block = soup.select_one('div[class="row text-center"]') 48 | images = content_block.select("a") 49 | 50 | for image in images: 51 | link_path = image.get("href") 52 | 53 | if "javascript:void(0)" in link_path: 54 | continue 55 | 56 | if link_path.startswith("/"): 57 | link = URL("https://" + scrape_item.url.host + link_path) 58 | else: 59 | link = URL(link_path) 60 | 61 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 62 | self.manager.task_group.create_task(self.run(new_scrape_item)) 63 | 64 | next_page = soup.select_one('a[class*="page-link next"]') 65 | if next_page: 66 | link_path = next_page.get("href") 67 | if link_path.startswith("/"): 68 | link = URL("https://" + scrape_item.url.host + link_path) 69 | else: 70 | link = URL(link_path) 71 | new_scrape_item = await self.create_scrape_item(scrape_item, link, "", True) 72 | self.manager.task_group.create_task(self.run(new_scrape_item)) 73 | 74 | @error_handling_wrapper 75 | async def compilation(self, scrape_item: ScrapeItem) -> None: 76 | """Scrapes a compilation""" 77 | async with self.request_limiter: 78 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 79 | 80 | title = await self.create_title(soup.select_one("blockquote").get_text(), scrape_item.url.parts[2], None) 81 | await scrape_item.add_to_parent_title(title) 82 | content_block = soup.select("div[class=container-fluid]")[-1] 83 | images = content_block.select("img") 84 | 85 | for image in images: 86 | link = URL(image.get("src")) 87 | date = await self.parse_datetime(f"{(link.parts[2])}-{(link.parts[3])}-{(link.parts[4])}") 88 | scrape_item.possible_datetime = date 89 | filename, ext = await get_filename_and_ext(link.name) 90 | await self.handle_file(link, scrape_item, filename, ext) 91 | 92 | @error_handling_wrapper 93 | async def image(self, scrape_item: ScrapeItem) -> None: 94 | """Scrapes an image""" 95 | if await self.check_complete_from_referer(scrape_item): 96 | return 97 | 98 | async with self.request_limiter: 99 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 100 | 101 | date = await self.parse_datetime(f"{(scrape_item.url.parts[2])}-{(scrape_item.url.parts[3])}-{(scrape_item.url.parts[4])}") 102 | scrape_item.possible_datetime = date 103 | 104 | image = soup.select_one("img[id=img_main]") 105 | if image: 106 | link = URL(image.get("src")) 107 | filename, ext = await get_filename_and_ext(link.name) 108 | await self.handle_file(link, scrape_item, filename, ext) 109 | 110 | @error_handling_wrapper 111 | async def handle_direct(self, scrape_item: ScrapeItem) -> None: 112 | """Scrapes an image""" 113 | filename, ext = await get_filename_and_ext(scrape_item.url.name) 114 | await self.handle_file(scrape_item.url, scrape_item, filename, ext) 115 | 116 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 117 | 118 | async def parse_datetime(self, date: str) -> int: 119 | """Parses a datetime string into a unix timestamp""" 120 | date = datetime.datetime.strptime(date, "%Y-%m-%d") 121 | return calendar.timegm(date.timetuple()) -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/rule34vault_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | from typing import TYPE_CHECKING 6 | 7 | from aiolimiter import AsyncLimiter 8 | from yarl import URL 9 | 10 | from cyberdrop_dl.scraper.crawler import Crawler 11 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 12 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper 13 | 14 | if TYPE_CHECKING: 15 | from cyberdrop_dl.managers.manager import Manager 16 | 17 | 18 | class Rule34VaultCrawler(Crawler): 19 | def __init__(self, manager: Manager): 20 | super().__init__(manager, "rule34vault", "Rule34Vault") 21 | self.primary_base_url = URL("https://rule34vault.com") 22 | self.request_limiter = AsyncLimiter(10, 1) 23 | 24 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 25 | 26 | async def fetch(self, scrape_item: ScrapeItem) -> None: 27 | """Determines where to send the scrape item based on the url""" 28 | task_id = await self.scraping_progress.add_task(scrape_item.url) 29 | 30 | if "post" in scrape_item.url.parts: 31 | await self.file(scrape_item) 32 | elif "playlists" in scrape_item.url.parts: 33 | await self.playlist(scrape_item) 34 | else: 35 | await self.tag(scrape_item) 36 | 37 | await self.scraping_progress.remove_task(task_id) 38 | 39 | @error_handling_wrapper 40 | async def tag(self, scrape_item: ScrapeItem) -> None: 41 | """Scrapes an album""" 42 | async with self.request_limiter: 43 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 44 | 45 | title = await self.create_title(scrape_item.url.parts[1], None, None) 46 | 47 | content_block = soup.select_one('div[class="grid ng-star-inserted"]') 48 | content = content_block.select('a[class="box ng-star-inserted"]') 49 | for file_page in content: 50 | link = file_page.get('href') 51 | if link.startswith("/"): 52 | link = f"{self.primary_base_url}{link}" 53 | link = URL(link) 54 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 55 | self.manager.task_group.create_task(self.run(new_scrape_item)) 56 | if not content: 57 | return 58 | 59 | if len(scrape_item.url.parts) > 2: 60 | page = int(scrape_item.url.parts[-1]) 61 | next_page = scrape_item.url.with_path(f"/{scrape_item.url.parts[1]}/page/{page + 1}") 62 | else: 63 | next_page = scrape_item.url.with_path(f"/{scrape_item.url.parts[1]}/page/2") 64 | new_scrape_item = await self.create_scrape_item(scrape_item, next_page, "") 65 | self.manager.task_group.create_task(self.run(new_scrape_item)) 66 | 67 | @error_handling_wrapper 68 | async def playlist(self, scrape_item: ScrapeItem) -> None: 69 | """Scrapes a playlist""" 70 | async with self.request_limiter: 71 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 72 | 73 | title_str = soup.select_one('div[class*=title]').text 74 | title = await self.create_title(title_str, scrape_item.url.parts[-1], None) 75 | 76 | content_block = soup.select_one('div[class="grid ng-star-inserted"]') 77 | content = content_block.select('a[class="box ng-star-inserted"]') 78 | for file_page in content: 79 | link = file_page.get('href') 80 | if link.startswith("/"): 81 | link = f"{self.primary_base_url}{link}" 82 | link = URL(link) 83 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 84 | self.manager.task_group.create_task(self.run(new_scrape_item)) 85 | if not content: 86 | return 87 | 88 | if scrape_item.url.query: 89 | page = scrape_item.url.query.get("page") 90 | next_page = scrape_item.url.with_query({"page": int(page) + 1}) 91 | else: 92 | next_page = scrape_item.url.with_query({"page": 2}) 93 | new_scrape_item = await self.create_scrape_item(scrape_item, next_page, "") 94 | self.manager.task_group.create_task(self.run(new_scrape_item)) 95 | 96 | @error_handling_wrapper 97 | async def file(self, scrape_item: ScrapeItem) -> None: 98 | """Scrapes an image""" 99 | async with self.request_limiter: 100 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 101 | 102 | date = await self.parse_datetime(soup.select_one('div[class="text-primary ng-star-inserted"]').text.split("(")[1].split(")")[0]) 103 | scrape_item.date = date 104 | 105 | image = soup.select_one('img[class*="img ng-star-inserted"]') 106 | if image: 107 | link = image.get('src') 108 | if link.startswith("/"): 109 | link = f"{self.primary_base_url}{link}" 110 | link = URL(link) 111 | filename, ext = await get_filename_and_ext(link.name) 112 | await self.handle_file(link, scrape_item, filename, ext) 113 | video = soup.select_one("video source") 114 | if video: 115 | link = video.get('src') 116 | if link.startswith("/"): 117 | link = f"{self.primary_base_url}{link}" 118 | link = URL(link) 119 | filename, ext = await get_filename_and_ext(link.name) 120 | await self.handle_file(link, scrape_item, filename, ext) 121 | 122 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 123 | 124 | async def parse_datetime(self, date: str) -> int: 125 | """Parses a datetime string into a unix timestamp""" 126 | date = datetime.datetime.strptime(date, "%b %d, %Y, %I:%M:%S %p") 127 | return calendar.timegm(date.timetuple()) 128 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/omegascans_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | from typing import TYPE_CHECKING 6 | 7 | from aiolimiter import AsyncLimiter 8 | from yarl import URL 9 | 10 | from cyberdrop_dl.clients.errors import ScrapeFailure 11 | from cyberdrop_dl.scraper.crawler import Crawler 12 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 13 | from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext, log 14 | 15 | if TYPE_CHECKING: 16 | from cyberdrop_dl.managers.manager import Manager 17 | 18 | 19 | class OmegaScansCrawler(Crawler): 20 | def __init__(self, manager: Manager): 21 | super().__init__(manager, "omegascans", "OmegaScans") 22 | self.primary_base_domain = URL("https://omegascans.org") 23 | self.api_url = "https://api.omegascans.org/chapter/query?page={}&perPage={}&series_id={}" 24 | self.request_limiter = AsyncLimiter(10, 1) 25 | 26 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 27 | 28 | async def fetch(self, scrape_item: ScrapeItem) -> None: 29 | """Determines where to send the scrape item based on the url""" 30 | task_id = await self.scraping_progress.add_task(scrape_item.url) 31 | 32 | if "chapter" in scrape_item.url.name: 33 | await self.chapter(scrape_item) 34 | elif "series" in scrape_item.url.parts: 35 | await self.series(scrape_item) 36 | else: 37 | await self.handle_direct_link(scrape_item) 38 | 39 | await self.scraping_progress.remove_task(task_id) 40 | 41 | @error_handling_wrapper 42 | async def series(self, scrape_item: ScrapeItem) -> None: 43 | """Scrapes an album""" 44 | async with self.request_limiter: 45 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 46 | 47 | scripts = soup.select("script") 48 | for script in scripts: 49 | if "series_id" in script.get_text(): 50 | series_id = script.get_text().split('series_id\\":')[1].split(",")[0] 51 | break 52 | 53 | page_number = 1 54 | number_per_page = 30 55 | while True: 56 | api_url = URL(self.api_url.format(page_number, number_per_page, series_id)) 57 | async with self.request_limiter: 58 | JSON_Obj = await self.client.get_json(self.domain, api_url) 59 | if not JSON_Obj: 60 | break 61 | 62 | for chapter in JSON_Obj['data']: 63 | chapter_url = scrape_item.url / chapter['chapter_slug'] 64 | new_scrape_item = await self.create_scrape_item(scrape_item, chapter_url, "", True) 65 | self.manager.task_group.create_task(self.run(new_scrape_item)) 66 | 67 | if JSON_Obj['meta']['current_page'] == JSON_Obj['meta']['last_page']: 68 | break 69 | page_number += 1 70 | 71 | @error_handling_wrapper 72 | async def chapter(self, scrape_item: ScrapeItem) -> None: 73 | """Scrapes an image""" 74 | async with self.request_limiter: 75 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 76 | 77 | if "This chapter is premium" in soup.get_text(): 78 | await log("Scrape Failed: This chapter is premium", 40) 79 | raise ScrapeFailure(401, "This chapter is premium") 80 | 81 | title_parts = soup.select_one("title").get_text().split(" - ") 82 | series_name = title_parts[0] 83 | chapter_title = title_parts[1] 84 | series_title = await self.create_title(series_name, None, None) 85 | await scrape_item.add_to_parent_title(series_title) 86 | await scrape_item.add_to_parent_title(chapter_title) 87 | 88 | date = soup.select('h2[class="font-semibold font-sans text-muted-foreground text-xs"]')[-1].get_text() 89 | try: 90 | date = await self.parse_datetime_standard(date) 91 | except ValueError: 92 | scripts = soup.select("script") 93 | for script in scripts: 94 | if "created" in script.get_text(): 95 | date = script.get_text().split("created_at\\\":\\\"")[1].split(".")[0] 96 | date = await self.parse_datetime_other(date) 97 | break 98 | 99 | scrape_item.possible_datetime = date 100 | scrape_item.part_of_album = True 101 | 102 | images = soup.select("p[class*=flex] img") 103 | for image in images: 104 | link = image.get("src") 105 | if not link: 106 | link = image.get("data-src") 107 | if not link: 108 | continue 109 | link = URL(link) 110 | 111 | filename, ext = await get_filename_and_ext(link.name) 112 | await self.handle_file(link, scrape_item, filename, ext) 113 | 114 | @error_handling_wrapper 115 | async def handle_direct_link(self, scrape_item: ScrapeItem) -> None: 116 | """Handles a direct link""" 117 | scrape_item.url = scrape_item.url.with_name(scrape_item.url.name) 118 | filename, ext = await get_filename_and_ext(scrape_item.url.name) 119 | await self.handle_file(scrape_item.url, scrape_item, filename, ext) 120 | 121 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 122 | 123 | async def parse_datetime_standard(self, date: str) -> int: 124 | """Parses a datetime string into a unix timestamp""" 125 | date = datetime.datetime.strptime(date, "%m/%d/%Y") 126 | return calendar.timegm(date.timetuple()) 127 | 128 | async def parse_datetime_other(self, date: str) -> int: 129 | """Parses a datetime string into a unix timestamp""" 130 | date = datetime.datetime.strptime(date, "%Y-%m-%dT%H:%M:%S") 131 | return calendar.timegm(date.timetuple()) 132 | 133 | -------------------------------------------------------------------------------- /cyberdrop_dl/managers/client_manager.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | import ssl 5 | from http import HTTPStatus 6 | from typing import TYPE_CHECKING 7 | 8 | import aiohttp 9 | import certifi 10 | from aiohttp import ClientResponse, ContentTypeError 11 | from aiolimiter import AsyncLimiter 12 | 13 | from cyberdrop_dl.clients.download_client import DownloadClient 14 | from cyberdrop_dl.clients.errors import DownloadFailure, DDOSGuardFailure, ScrapeFailure 15 | from cyberdrop_dl.clients.scraper_client import ScraperClient 16 | from cyberdrop_dl.utils.utilities import CustomHTTPStatus 17 | 18 | if TYPE_CHECKING: 19 | from cyberdrop_dl.managers.manager import Manager 20 | 21 | 22 | class ClientManager: 23 | """Creates a 'client' that can be referenced by scraping or download sessions""" 24 | def __init__(self, manager: Manager): 25 | self.manager = manager 26 | 27 | self.connection_timeout = manager.config_manager.global_settings_data['Rate_Limiting_Options']['connection_timeout'] 28 | self.read_timeout = manager.config_manager.global_settings_data['Rate_Limiting_Options']['read_timeout'] 29 | self.rate_limit = manager.config_manager.global_settings_data['Rate_Limiting_Options']['rate_limit'] 30 | self.download_delay = manager.config_manager.global_settings_data['Rate_Limiting_Options']['download_delay'] 31 | self.user_agent = manager.config_manager.global_settings_data['General']['user_agent'] 32 | self.verify_ssl = not manager.config_manager.global_settings_data['General']['allow_insecure_connections'] 33 | self.simultaneous_per_domain = manager.config_manager.global_settings_data['Rate_Limiting_Options']['max_simultaneous_downloads_per_domain'] 34 | 35 | self.ssl_context = ssl.create_default_context(cafile=certifi.where()) if self.verify_ssl else False 36 | self.cookies = aiohttp.CookieJar(quote_cookie=False) 37 | self.proxy = manager.config_manager.global_settings_data['General']['proxy'] if not manager.args_manager.proxy else manager.args_manager.proxy 38 | self.flaresolverr = manager.config_manager.global_settings_data['General']['flaresolverr'] if not manager.args_manager.flaresolverr else manager.args_manager.flaresolverr 39 | 40 | self.domain_rate_limits = { 41 | "bunkrr": AsyncLimiter(5, 1), 42 | "cyberdrop": AsyncLimiter(5, 1), 43 | "coomer": AsyncLimiter(1, 1), 44 | "kemono": AsyncLimiter(1, 1), 45 | "pixeldrain": AsyncLimiter(10, 1), 46 | "other": AsyncLimiter(25, 1) 47 | } 48 | 49 | self.download_spacer = {'bunkr': 0.5, 'bunkrr': 0.5, 'cyberdrop': 0, 'cyberfile': 0, "pixeldrain": 0, "coomer": 0.5, "kemono": 0.5} 50 | 51 | self.global_rate_limiter = AsyncLimiter(self.rate_limit, 1) 52 | self.session_limit = asyncio.Semaphore(50) 53 | self.download_session_limit = asyncio.Semaphore(self.manager.config_manager.global_settings_data['Rate_Limiting_Options']['max_simultaneous_downloads']) 54 | 55 | self.scraper_session = ScraperClient(self) 56 | self.downloader_session = DownloadClient(manager, self) 57 | 58 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 59 | 60 | async def get_downloader_spacer(self, key: str) -> float: 61 | """Returns the download spacer for a domain""" 62 | if key in self.download_spacer: 63 | return self.download_spacer[key] 64 | return 0.1 65 | 66 | async def get_rate_limiter(self, domain: str) -> AsyncLimiter: 67 | """Get a rate limiter for a domain""" 68 | if domain in self.domain_rate_limits: 69 | return self.domain_rate_limits[domain] 70 | return self.domain_rate_limits["other"] 71 | 72 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 73 | 74 | async def check_http_status(self, response: ClientResponse, download: bool = False) -> None: 75 | """Checks the HTTP status code and raises an exception if it's not acceptable""" 76 | status = response.status 77 | headers = response.headers 78 | 79 | if download: 80 | if headers.get('ETag') in ['"eb669b6362e031fa2b0f1215480c4e30"', '"a9e4cee098dc6f1e09ec124299f26b30"']: 81 | raise DownloadFailure(status="Bunkr Maintenance", message="Bunkr under maintenance") 82 | if headers.get('ETag') == '"d835884373f4d6c8f24742ceabe74946"': 83 | raise DownloadFailure(status=HTTPStatus.NOT_FOUND, message="Imgur image has been removed") 84 | if headers.get('ETag') == '"65b7753c-528a"': 85 | raise DownloadFailure(status=HTTPStatus.NOT_FOUND, message="SC Scrape Image") 86 | 87 | if HTTPStatus.OK <= status < HTTPStatus.BAD_REQUEST: 88 | return 89 | 90 | if "gofile" in response.url.host.lower(): 91 | try: 92 | JSON_Resp = await response.json() 93 | if "notFound" in JSON_Resp["status"]: 94 | raise ScrapeFailure(404, "Does Not Exist") 95 | except ContentTypeError: 96 | pass 97 | 98 | if "imgur" in response.url.host.lower(): 99 | try: 100 | JSON_Resp = await response.json() 101 | if "status" in JSON_Resp: 102 | raise ScrapeFailure(JSON_Resp['status'], JSON_Resp['data']['error']) 103 | except ContentTypeError: 104 | pass 105 | 106 | try: 107 | phrase = HTTPStatus(status).phrase 108 | except ValueError: 109 | phrase = "Unknown" 110 | 111 | response_text = await response.text() 112 | if "DDoS-Guard" in response_text: 113 | raise DDOSGuardFailure(status="DDOS-Guard", message="DDoS-Guard detected") 114 | 115 | if not headers.get('Content-Type'): 116 | raise DownloadFailure(status=CustomHTTPStatus.IM_A_TEAPOT, message="No content-type in response header") 117 | 118 | raise DownloadFailure(status=status, message=f"HTTP status code {status}: {phrase}") 119 | -------------------------------------------------------------------------------- /cyberdrop_dl/ui/ui.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import Path 4 | from typing import TYPE_CHECKING 5 | 6 | from InquirerPy import inquirer 7 | from InquirerPy.validator import PathValidator 8 | from rich.console import Console 9 | 10 | from cyberdrop_dl import __version__ 11 | from cyberdrop_dl.ui.prompts.settings_authentication_prompts import edit_authentication_values_prompt 12 | from cyberdrop_dl.ui.prompts.general_prompts import ( 13 | main_prompt, select_config_prompt, donations_prompt, 14 | import_cyberdrop_v4_items_prompt, manage_configs_prompt) 15 | from cyberdrop_dl.ui.prompts.settings_global_prompts import edit_global_settings_prompt 16 | from cyberdrop_dl.ui.prompts.url_file_prompts import edit_urls_prompt 17 | from cyberdrop_dl.ui.prompts.settings_user_prompts import create_new_config_prompt, edit_config_values_prompt 18 | 19 | console = Console() 20 | 21 | if TYPE_CHECKING: 22 | from cyberdrop_dl.managers.manager import Manager 23 | 24 | 25 | def program_ui(manager: Manager): 26 | """Program UI""" 27 | while True: 28 | console.clear() 29 | console.print(f"[bold]Cyberdrop Downloader (V{str(__version__)})[/bold]") 30 | console.print(f"[bold]Current Config:[/bold] {manager.config_manager.loaded_config}") 31 | 32 | action = main_prompt(manager) 33 | 34 | # Download 35 | if action == 1: 36 | break 37 | 38 | # Download (All Configs) 39 | if action == 2: 40 | manager.args_manager.all_configs = True 41 | break 42 | 43 | # Retry Failed Downloads 44 | elif action == 3: 45 | manager.args_manager.retry = True 46 | break 47 | 48 | # Sort All Configs 49 | elif action == 4: 50 | manager.args_manager.sort_all_configs = True 51 | manager.args_manager.all_configs = True 52 | break 53 | 54 | # Edit URLs 55 | elif action == 5: 56 | input_file = manager.config_manager.settings_data['Files']['input_file'] if not manager.args_manager.input_file else manager.args_manager.input_file 57 | edit_urls_prompt(input_file, manager.vi_mode) 58 | 59 | # Select Config 60 | elif action == 6: 61 | configs = manager.config_manager.get_configs() 62 | selected_config = select_config_prompt(manager, configs) 63 | manager.config_manager.change_config(selected_config) 64 | 65 | elif action == 7: 66 | console.clear() 67 | console.print("Editing Input / Output File Paths") 68 | input_file = inquirer.filepath( 69 | message="Enter the input file path:", 70 | default=str(manager.config_manager.settings_data['Files']['input_file']), 71 | validate=PathValidator(is_file=True, message="Input is not a file"), 72 | vi_mode=manager.vi_mode, 73 | ).execute() 74 | download_folder = inquirer.text( 75 | message="Enter the download folder path:", 76 | default=str(manager.config_manager.settings_data['Files']['download_folder']), 77 | validate=PathValidator(is_dir=True, message="Input is not a directory"), 78 | vi_mode=manager.vi_mode, 79 | ).execute() 80 | 81 | manager.config_manager.settings_data['Files']['input_file'] = Path(input_file) 82 | manager.config_manager.settings_data['Files']['download_folder'] = Path(download_folder) 83 | manager.config_manager.write_updated_settings_config() 84 | 85 | # Manage Configs 86 | elif action == 8: 87 | while True: 88 | console.clear() 89 | console.print("[bold]Manage Configs[/bold]") 90 | console.print(f"[bold]Current Config:[/bold] {manager.config_manager.loaded_config}") 91 | 92 | action = manage_configs_prompt(manager) 93 | 94 | # Change Default Config 95 | if action == 1: 96 | configs = manager.config_manager.get_configs() 97 | selected_config = select_config_prompt(manager, configs) 98 | manager.config_manager.change_default_config(selected_config) 99 | 100 | # Create A Config 101 | elif action == 2: 102 | create_new_config_prompt(manager) 103 | 104 | # Delete A Config 105 | elif action == 3: 106 | configs = manager.config_manager.get_configs() 107 | if len(configs) != 1: 108 | selected_config = select_config_prompt(manager, configs) 109 | if selected_config == manager.config_manager.loaded_config: 110 | inquirer.confirm( 111 | message="You cannot delete the currently active config, press enter to continue.", 112 | default=False, 113 | vi_mode=manager.vi_mode, 114 | ).execute() 115 | continue 116 | manager.config_manager.delete_config(selected_config) 117 | else: 118 | inquirer.confirm( 119 | message="There is only one config, press enter to continue.", 120 | default=False, 121 | vi_mode=manager.vi_mode, 122 | ).execute() 123 | 124 | # Edit Config 125 | elif action == 4: 126 | edit_config_values_prompt(manager) 127 | 128 | # Edit Authentication Values 129 | elif action == 5: 130 | edit_authentication_values_prompt(manager) 131 | 132 | # Edit Global Settings 133 | elif action == 6: 134 | edit_global_settings_prompt(manager) 135 | 136 | # Done 137 | elif action == 7: 138 | break 139 | 140 | # Import Cyberdrop_V4 Items 141 | elif action == 9: 142 | import_cyberdrop_v4_items_prompt(manager) 143 | 144 | # Exit 145 | elif action == 10: 146 | exit(0) 147 | -------------------------------------------------------------------------------- /cyberdrop_dl/ui/prompts/general_prompts.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import os 4 | from typing import TYPE_CHECKING 5 | 6 | from InquirerPy import inquirer 7 | from InquirerPy.base.control import Choice 8 | from InquirerPy.separator import Separator 9 | from InquirerPy.validator import EmptyInputValidator, PathValidator 10 | from rich.console import Console 11 | 12 | from cyberdrop_dl.utils.transfer.transfer_v4_config import transfer_v4_config 13 | from cyberdrop_dl.utils.transfer.transfer_v4_db import transfer_v4_db 14 | 15 | if TYPE_CHECKING: 16 | from typing import List 17 | 18 | from cyberdrop_dl.managers.manager import Manager 19 | 20 | 21 | console = Console() 22 | 23 | 24 | def main_prompt(manager: Manager) -> int: 25 | """Main prompt for the program""" 26 | action = inquirer.select( 27 | message="What would you like to do?", 28 | choices=[ 29 | Choice(1, "Download"), 30 | Choice(2, "Download (All Configs)"), 31 | Choice(3, "Retry Failed Downloads"), 32 | Choice(4, "Sort All Configs"), 33 | Choice(5, "Edit URLs"), 34 | Separator(), 35 | Choice(6, f"Select Config (Current: {manager.config_manager.loaded_config})"), 36 | Choice(7, "Change URLs.txt file and Download Location"), 37 | Choice(8, "Edit Configs"), 38 | Separator(), 39 | Choice(9, "Import Cyberdrop_V4 Items"), 40 | Choice(10, "Exit"), 41 | ], long_instruction="ARROW KEYS: Navigate | ENTER: Select", 42 | vi_mode=manager.vi_mode, 43 | ).execute() 44 | 45 | return action 46 | 47 | 48 | def manage_configs_prompt(manager: Manager) -> int: 49 | """Manage Configs Prompt""" 50 | console.clear() 51 | action = inquirer.select( 52 | message="What would you like to do?", 53 | choices=[ 54 | Choice(1, "Change Default Config"), 55 | Choice(2, "Create A New Config"), 56 | Choice(3, "Delete A Config"), 57 | Separator(), 58 | Choice(4, "Edit Config"), 59 | Choice(5, "Edit Authentication Values"), 60 | Choice(6, "Edit Global Values"), 61 | Choice(7, "Done"), 62 | ], long_instruction="ARROW KEYS: Navigate | ENTER: Select", 63 | vi_mode=manager.vi_mode, 64 | ).execute() 65 | 66 | return action 67 | 68 | 69 | def select_config_prompt(manager: Manager, configs: List) -> str: 70 | """Select a config file from a list of configs""" 71 | choice = inquirer.fuzzy( 72 | choices=configs, 73 | multiselect=False, 74 | validate=lambda result: len(result) > 0, 75 | invalid_message="Need to select a config.", 76 | message="Select a config file:", 77 | long_instruction="ARROW KEYS: Navigate | TYPE: Filter | TAB: select, ENTER: Finish Selection", 78 | vi_mode=manager.vi_mode, 79 | ).execute() 80 | 81 | return choice 82 | 83 | 84 | def import_cyberdrop_v4_items_prompt(manager: Manager) -> None: 85 | """Import Cyberdrop_V4 Items""" 86 | while True: 87 | console.clear() 88 | console.print("Editing Config Values") 89 | action = inquirer.select( 90 | message="What would you like to do?", 91 | choices=[ 92 | Choice(1, "Import Config"), 93 | Choice(2, "Import download_history.sql"), 94 | Choice(3, "Done"), 95 | ], long_instruction="ARROW KEYS: Navigate | ENTER: Select", 96 | vi_mode=manager.vi_mode, 97 | ).execute() 98 | 99 | # Import Config 100 | if action == 1: 101 | new_config_name = inquirer.text( 102 | message="What should this config be called?", 103 | validate=EmptyInputValidator("Input should not be empty"), 104 | vi_mode=manager.vi_mode, 105 | ).execute() 106 | 107 | if (manager.path_manager.config_dir / new_config_name).is_dir(): 108 | console.print(f"Config with name '{new_config_name}' already exists!") 109 | inquirer.confirm(message="Press enter to return to the import menu.").execute() 110 | continue 111 | 112 | home_path = "~/" if os.name == "posix" else "C:\\" 113 | import_config_path = inquirer.filepath( 114 | message="Select the config file to import", 115 | default=home_path, 116 | validate=PathValidator(is_file=True, message="Input is not a file"), 117 | ).execute() 118 | 119 | transfer_v4_config(manager, import_config_path, new_config_name) 120 | 121 | # Import download_history.sql 122 | elif action == 2: 123 | home_path = "~/" if os.name == "posix" else "C:\\" 124 | import_download_history_path = inquirer.filepath( 125 | message="Select the download_history.sql file to import", 126 | default=home_path, 127 | validate=PathValidator(is_file=True, message="Input is not a file"), 128 | vi_mode=manager.vi_mode, 129 | ).execute() 130 | 131 | transfer_v4_db(import_download_history_path, manager.path_manager.history_db) 132 | 133 | # Done 134 | elif action == 3: 135 | break 136 | 137 | 138 | def donations_prompt(manager: Manager) -> None: 139 | """Donations prompt""" 140 | console.clear() 141 | console.print("[bold]Donations[/bold]") 142 | console.print("") 143 | console.print("I started making this program around three years ago at this point," 144 | "\nIt has grown larger than I could've imagined and I'm very proud of it." 145 | "\nI have put a lot of time and effort into this program and I'm glad that people are using it." 146 | "\nThanks to everyone that have supported me, " 147 | "it keeps me motivated to continue working on this program.") 148 | console.print("") 149 | console.print("If you'd like to support me and my work, you can donate to me via the following methods:") 150 | console.print("BuyMeACoffee: https://www.buymeacoffee.com/juleswinnft") 151 | console.print("Github Sponsor: https://github.com/sponsors/Jules-WinnfieldX") 152 | 153 | console.print("") 154 | console.print("Thank you for your support!") 155 | console.print("") 156 | inquirer.confirm(message="Press enter to return to the main menu.", vi_mode=manager.vi_mode).execute() 157 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/gofile_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import http 4 | import re 5 | from copy import deepcopy 6 | from typing import TYPE_CHECKING 7 | 8 | from aiolimiter import AsyncLimiter 9 | from yarl import URL 10 | 11 | from cyberdrop_dl.clients.errors import ScrapeFailure, DownloadFailure, PasswordProtected, NoExtensionFailure 12 | from cyberdrop_dl.scraper.crawler import Crawler 13 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 14 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper, log 15 | 16 | if TYPE_CHECKING: 17 | from cyberdrop_dl.clients.scraper_client import ScraperClient 18 | from cyberdrop_dl.managers.manager import Manager 19 | 20 | 21 | class GoFileCrawler(Crawler): 22 | def __init__(self, manager: Manager): 23 | super().__init__(manager, "gofile", "GoFile") 24 | self.api_address = URL("https://api.gofile.io") 25 | self.js_address = URL("https://gofile.io/dist/js/alljs.js") 26 | self.primary_base_domain = URL("https://gofile.io") 27 | self.token = "" 28 | self.websiteToken = "" 29 | self.headers = {} 30 | self.request_limiter = AsyncLimiter(10, 1) 31 | 32 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 33 | 34 | async def fetch(self, scrape_item: ScrapeItem) -> None: 35 | """Determines where to send the scrape item based on the url""" 36 | task_id = await self.scraping_progress.add_task(scrape_item.url) 37 | 38 | await self.get_token(self.api_address / "accounts", self.client) 39 | await self.get_website_token(self.js_address, self.client) 40 | 41 | await self.album(scrape_item) 42 | 43 | await self.scraping_progress.remove_task(task_id) 44 | 45 | @error_handling_wrapper 46 | async def album(self, scrape_item: ScrapeItem) -> None: 47 | """Scrapes an album""" 48 | content_id = scrape_item.url.name 49 | 50 | try: 51 | async with self.request_limiter: 52 | JSON_Resp = await self.client.get_json(self.domain, (self.api_address / "contents" / content_id).with_query({"wt": self.websiteToken}), headers_inc=self.headers) 53 | except DownloadFailure as e: 54 | if e.status == http.HTTPStatus.UNAUTHORIZED: 55 | self.websiteToken = "" 56 | self.manager.cache_manager.remove("gofile_website_token") 57 | await self.get_website_token(self.js_address, self.client) 58 | async with self.request_limiter: 59 | JSON_Resp = await self.client.get_json(self.domain, (self.api_address / "contents" / content_id).with_query({"wt": self.websiteToken}), headers_inc=self.headers) 60 | else: 61 | raise ScrapeFailure(e.status, e.message) 62 | 63 | if JSON_Resp["status"] == "error-notFound": 64 | raise ScrapeFailure(404, "Album not found") 65 | 66 | JSON_Resp = JSON_Resp['data'] 67 | 68 | if "password" in JSON_Resp: 69 | raise PasswordProtected() 70 | 71 | title = await self.create_title(JSON_Resp["name"], content_id, None) 72 | 73 | contents = JSON_Resp["children"] 74 | for content_id in contents: 75 | content = contents[content_id] 76 | if content["type"] == "folder": 77 | new_scrape_item = await self.create_scrape_item(scrape_item, self.primary_base_domain / "d" / content["code"], title, True) 78 | self.manager.task_group.create_task(self.run(new_scrape_item)) 79 | continue 80 | if content["link"] == "overloaded": 81 | link = URL(content["directLink"]) 82 | else: 83 | link = URL(content["link"]) 84 | try: 85 | filename, ext = await get_filename_and_ext(link.name) 86 | except NoExtensionFailure: 87 | await log(f"Scrape Failed: {link} (No File Extension)", 40) 88 | await self.manager.log_manager.write_scrape_error_log(link, " No File Extension") 89 | await self.manager.progress_manager.scrape_stats_progress.add_failure("No File Extension") 90 | continue 91 | duplicate_scrape_item = deepcopy(scrape_item) 92 | duplicate_scrape_item.possible_datetime = content["createTime"] 93 | duplicate_scrape_item.part_of_album = True 94 | await duplicate_scrape_item.add_to_parent_title(title) 95 | await self.handle_file(link, duplicate_scrape_item, filename, ext) 96 | 97 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 98 | 99 | @error_handling_wrapper 100 | async def get_token(self, create_acct_address: URL, session: ScraperClient) -> None: 101 | """Get the token for the API""" 102 | if self.token: 103 | self.headers["Authorization"] = f"Bearer {self.token}" 104 | return 105 | 106 | api_token = self.manager.config_manager.authentication_data["GoFile"]["gofile_api_key"] 107 | if api_token: 108 | self.token = api_token 109 | self.headers["Authorization"] = f"Bearer {self.token}" 110 | await self.set_cookie(session) 111 | return 112 | 113 | async with self.request_limiter: 114 | async with self.request_limiter: 115 | JSON_Resp = await session.post_data(self.domain, create_acct_address, data={}) 116 | if JSON_Resp["status"] == "ok": 117 | self.token = JSON_Resp["data"]["token"] 118 | self.headers["Authorization"] = f"Bearer {self.token}" 119 | await self.set_cookie(session) 120 | else: 121 | raise ScrapeFailure(403, "Couldn't generate GoFile token") 122 | 123 | @error_handling_wrapper 124 | async def get_website_token(self, js_address: URL, session: ScraperClient) -> None: 125 | """Creates an anon gofile account to use.""" 126 | if self.websiteToken: 127 | return 128 | 129 | website_token = self.manager.cache_manager.get("gofile_website_token") 130 | if website_token: 131 | self.websiteToken = website_token 132 | return 133 | 134 | async with self.request_limiter: 135 | text = await session.get_text(self.domain, js_address) 136 | text = str(text) 137 | self.websiteToken = re.search(r'fetchData\s=\s\{\swt:\s"(.*?)"', text).group(1) 138 | if not self.websiteToken: 139 | raise ScrapeFailure(403, "Couldn't generate GoFile websiteToken") 140 | self.manager.cache_manager.save("gofile_website_token", self.websiteToken) 141 | 142 | async def set_cookie(self, session: ScraperClient) -> None: 143 | """Sets the given token as a cookie into the session (and client)""" 144 | client_token = self.token 145 | morsel: http.cookies.Morsel = http.cookies.Morsel() 146 | morsel['domain'] = 'gofile.io' 147 | morsel.set('accountToken', client_token, client_token) 148 | session.client_manager.cookies.update_cookies({'gofile.io': morsel}) 149 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/jpgchurch_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | import re 6 | from typing import TYPE_CHECKING 7 | 8 | from aiolimiter import AsyncLimiter 9 | from yarl import URL 10 | 11 | from cyberdrop_dl.scraper.crawler import Crawler 12 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 13 | from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext 14 | 15 | if TYPE_CHECKING: 16 | from cyberdrop_dl.managers.manager import Manager 17 | 18 | 19 | class JPGChurchCrawler(Crawler): 20 | def __init__(self, manager: Manager): 21 | super().__init__(manager, "jpg.church", "JPGChurch") 22 | self.primary_base_domain = URL("https://jpg4.su") 23 | self.request_limiter = AsyncLimiter(10, 1) 24 | 25 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 26 | 27 | async def fetch(self, scrape_item: ScrapeItem) -> None: 28 | """Determines where to send the scrape item based on the url""" 29 | task_id = await self.scraping_progress.add_task(scrape_item.url) 30 | 31 | if await self.check_direct_link(scrape_item.url): 32 | await self.handle_direct_link(scrape_item) 33 | else: 34 | scrape_item.url = self.primary_base_domain / scrape_item.url.path[1:] 35 | if "a" in scrape_item.url.parts or "album" in scrape_item.url.parts: 36 | await self.album(scrape_item) 37 | elif 'image' in scrape_item.url.parts or 'img' in scrape_item.url.parts or 'images' in scrape_item.url.parts: 38 | await self.image(scrape_item) 39 | else: 40 | await self.profile(scrape_item) 41 | 42 | await self.scraping_progress.remove_task(task_id) 43 | 44 | @error_handling_wrapper 45 | async def profile(self, scrape_item: ScrapeItem) -> None: 46 | """Scrapes a user profile""" 47 | async with self.request_limiter: 48 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 49 | 50 | title = await self.create_title(soup.select_one('meta[property="og:title"]').get("content"), None, None) 51 | link_next = URL(soup.select_one("a[id=list-most-recent-link]").get("href")) 52 | 53 | while True: 54 | async with self.request_limiter: 55 | soup = await self.client.get_BS4(self.domain, link_next) 56 | links = soup.select("a[href*=img]") 57 | for link in links: 58 | link = URL(link.get('href')) 59 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True) 60 | self.manager.task_group.create_task(self.run(new_scrape_item)) 61 | 62 | link_next = soup.select_one('a[data-pagination=next]') 63 | if link_next is not None: 64 | link_next = link_next.get('href') 65 | if link_next is not None: 66 | link_next = URL(link_next) 67 | else: 68 | break 69 | else: 70 | break 71 | 72 | @error_handling_wrapper 73 | async def album(self, scrape_item: ScrapeItem) -> None: 74 | """Scrapes an album""" 75 | album_id = scrape_item.url.parts[2] 76 | results = await self.get_album_results(album_id) 77 | 78 | async with self.request_limiter: 79 | soup = await self.client.get_BS4(self.domain, scrape_item.url / "sub") 80 | 81 | title = await self.create_title(soup.select_one("a[data-text=album-name]").get_text(), scrape_item.url.parts[2], None) 82 | albums = soup.select("a[class='image-container --media']") 83 | for album in albums: 84 | sub_album_link = URL(album.get('href')) 85 | new_scrape_item = await self.create_scrape_item(scrape_item, sub_album_link, title, True) 86 | self.manager.task_group.create_task(self.run(new_scrape_item)) 87 | 88 | async with self.request_limiter: 89 | soup = await self.client.get_BS4(self.domain, scrape_item.url / "sub") 90 | link_next = URL(soup.select_one("a[id=list-most-recent-link]").get("href")) 91 | 92 | while True: 93 | async with self.request_limiter: 94 | soup = await self.client.get_BS4(self.domain, link_next) 95 | links = soup.select("a[href*=img] img") 96 | for link in links: 97 | link = URL(link.get('src')) 98 | new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True, album_id) 99 | if not await self.check_album_results(link, results): 100 | await self.handle_direct_link(new_scrape_item) 101 | 102 | link_next = soup.select_one('a[data-pagination=next]') 103 | if link_next is not None: 104 | link_next = link_next.get('href') 105 | if link_next is not None: 106 | link_next = URL(link_next) 107 | else: 108 | break 109 | else: 110 | break 111 | 112 | @error_handling_wrapper 113 | async def image(self, scrape_item: ScrapeItem) -> None: 114 | """Scrapes an image""" 115 | if await self.check_complete_from_referer(scrape_item): 116 | return 117 | 118 | async with self.request_limiter: 119 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 120 | 121 | link = URL(soup.select_one("div[id=image-viewer-container] img").get('src')) 122 | link = link.with_name(link.name.replace('.md.', '.').replace('.th.', '.')) 123 | date = soup.select_one("p[class*=description-meta] span").get("title") 124 | date = await self.parse_datetime(date) 125 | scrape_item.possible_datetime = date 126 | 127 | filename, ext = await get_filename_and_ext(link.name) 128 | await self.handle_file(link, scrape_item, filename, ext) 129 | 130 | @error_handling_wrapper 131 | async def handle_direct_link(self, scrape_item: ScrapeItem) -> None: 132 | """Handles a direct link""" 133 | scrape_item.url = scrape_item.url.with_name(scrape_item.url.name.replace('.md.', '.').replace('.th.', '.')) 134 | pattern = r"(jpg\.fish/)|(jpg\.fishing/)|(jpg\.church/)" 135 | scrape_item.url = URL(re.sub(pattern, r'host.church/', str(scrape_item.url))) 136 | filename, ext = await get_filename_and_ext(scrape_item.url.name) 137 | await self.handle_file(scrape_item.url, scrape_item, filename, ext) 138 | 139 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 140 | 141 | async def parse_datetime(self, date: str) -> int: 142 | """Parses a datetime string into a unix timestamp""" 143 | date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S") 144 | return calendar.timegm(date.timetuple()) 145 | 146 | async def check_direct_link(self, url: URL) -> bool: 147 | """Determines if the url is a direct link or not""" 148 | cdn_possibilities = r"^(?:(jpg.church\/images\/...)|(simp..jpg.church)|(jpg.fish\/images\/...)|(simp..jpg.fish)|(jpg.fishing\/images\/...)|(simp..jpg.fishing)|(simp..host.church))" 149 | if not re.match(cdn_possibilities, url.host): 150 | return False 151 | return True 152 | -------------------------------------------------------------------------------- /cyberdrop_dl/scraper/crawlers/coomer_crawler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import calendar 4 | import datetime 5 | from typing import TYPE_CHECKING, Tuple, Dict 6 | 7 | from aiolimiter import AsyncLimiter 8 | from yarl import URL 9 | 10 | from cyberdrop_dl.scraper.crawler import Crawler 11 | from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem 12 | from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper 13 | 14 | if TYPE_CHECKING: 15 | from cyberdrop_dl.managers.manager import Manager 16 | 17 | 18 | class CoomerCrawler(Crawler): 19 | def __init__(self, manager: Manager): 20 | super().__init__(manager, "coomer", "Coomer") 21 | self.primary_base_domain = URL("https://coomer.su") 22 | self.ddos_guard_domain = URL("https://*.coomer.su") 23 | self.api_url = URL("https://coomer.su/api/v1") 24 | self.request_limiter = AsyncLimiter(4, 1) 25 | 26 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 27 | 28 | async def fetch(self, scrape_item: ScrapeItem) -> None: 29 | """Determines where to send the scrape item based on the url""" 30 | task_id = await self.scraping_progress.add_task(scrape_item.url) 31 | 32 | if "thumbnails" in scrape_item.url.parts: 33 | parts = [x for x in scrape_item.url.parts if x not in ("thumbnail", "/")] 34 | link = URL(f"https://{scrape_item.url.host}/{'/'.join(parts)}") 35 | scrape_item.url = link 36 | await self.handle_direct_link(scrape_item) 37 | elif "post" in scrape_item.url.parts: 38 | await self.post(scrape_item) 39 | elif "onlyfans" in scrape_item.url.parts or "fansly" in scrape_item.url.parts: 40 | await self.profile(scrape_item) 41 | else: 42 | await self.handle_direct_link(scrape_item) 43 | 44 | await self.scraping_progress.remove_task(task_id) 45 | 46 | @error_handling_wrapper 47 | async def profile(self, scrape_item: ScrapeItem) -> None: 48 | """Scrapes a profile""" 49 | offset = 0 50 | service, user = await self.get_service_and_user(scrape_item) 51 | user_str = await self.get_user_str_from_profile(scrape_item) 52 | api_call = self.api_url / service / "user" / user 53 | while True: 54 | async with self.request_limiter: 55 | JSON_Resp = await self.client.get_json(self.domain, api_call.with_query({"o": offset})) 56 | offset += 50 57 | if not JSON_Resp: 58 | break 59 | 60 | for post in JSON_Resp: 61 | await self.handle_post_content(scrape_item, post, user, user_str) 62 | 63 | @error_handling_wrapper 64 | async def post(self, scrape_item: ScrapeItem) -> None: 65 | """Scrapes a post""" 66 | service, user, post_id = await self.get_service_user_and_post(scrape_item) 67 | user_str = await self.get_user_str_from_post(scrape_item) 68 | api_call = self.api_url / service / "user" / user / "post" / post_id 69 | async with self.request_limiter: 70 | post = await self.client.get_json(self.domain, api_call) 71 | await self.handle_post_content(scrape_item, post, user, user_str) 72 | 73 | @error_handling_wrapper 74 | async def handle_post_content(self, scrape_item: ScrapeItem, post: Dict, user: str, user_str: str) -> None: 75 | """Handles the content of a post""" 76 | if "#ad" in post['content'] and self.manager.config_manager.settings_data['Ignore_Options']['ignore_coomer_ads']: 77 | return 78 | 79 | date = post["published"].replace("T", " ") 80 | post_id = post["id"] 81 | post_title = post["title"] 82 | if not post_title: 83 | post_title = "Untitled" 84 | 85 | async def handle_file(file_obj): 86 | link = self.primary_base_domain / ("data" + file_obj['path']) 87 | link = link.with_query({"f": file_obj['name']}) 88 | await self.create_new_scrape_item(link, scrape_item, user_str, post_title, post_id, date) 89 | 90 | if post['file']: 91 | await handle_file(post['file']) 92 | 93 | for file in post['attachments']: 94 | await handle_file(file) 95 | 96 | @error_handling_wrapper 97 | async def handle_direct_link(self, scrape_item: ScrapeItem) -> None: 98 | """Handles a direct link""" 99 | try: 100 | filename, ext = await get_filename_and_ext(scrape_item.url.query["f"]) 101 | except KeyError: 102 | filename, ext = await get_filename_and_ext(scrape_item.url.name) 103 | await self.handle_file(scrape_item.url, scrape_item, filename, ext) 104 | 105 | """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" 106 | 107 | async def parse_datetime(self, date: str) -> int: 108 | """Parses a datetime string""" 109 | date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S") 110 | return calendar.timegm(date.timetuple()) 111 | 112 | async def get_user_str_from_post(self, scrape_item: ScrapeItem) -> str: 113 | """Gets the user string from a scrape item""" 114 | async with self.request_limiter: 115 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 116 | user = soup.select_one("a[class=post__user-name]").text 117 | return user 118 | 119 | async def get_user_str_from_profile(self, scrape_item: ScrapeItem) -> str: 120 | """Gets the user string from a scrape item""" 121 | async with self.request_limiter: 122 | soup = await self.client.get_BS4(self.domain, scrape_item.url) 123 | user = soup.select_one("span[itemprop=name]").text 124 | return user 125 | 126 | async def get_service_and_user(self, scrape_item: ScrapeItem) -> Tuple[str, str]: 127 | """Gets the service and user from a scrape item""" 128 | user = scrape_item.url.parts[3] 129 | service = scrape_item.url.parts[1] 130 | return service, user 131 | 132 | async def get_service_user_and_post(self, scrape_item: ScrapeItem) -> Tuple[str, str, str]: 133 | """Gets the service, user and post id from a scrape item""" 134 | user = scrape_item.url.parts[3] 135 | service = scrape_item.url.parts[1] 136 | post = scrape_item.url.parts[5] 137 | return service, user, post 138 | 139 | async def create_new_scrape_item(self, link: URL, old_scrape_item: ScrapeItem, user: str, title: str, post_id: str, 140 | date: str) -> None: 141 | """Creates a new scrape item with the same parent as the old scrape item""" 142 | post_title = None 143 | if self.manager.config_manager.settings_data['Download_Options']['separate_posts']: 144 | post_title = f"{date} - {title}" 145 | if self.manager.config_manager.settings_data['Download_Options']['include_album_id_in_folder_name']: 146 | post_title = post_id + " - " + post_title 147 | 148 | new_title = await self.create_title(user, None, None) 149 | new_scrape_item = await self.create_scrape_item(old_scrape_item, link, new_title, True, None, await self.parse_datetime(date)) 150 | await new_scrape_item.add_to_parent_title(post_title) 151 | self.manager.task_group.create_task(self.run(new_scrape_item)) 152 | -------------------------------------------------------------------------------- /cyberdrop_dl/main.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import contextlib 3 | import logging 4 | import os 5 | import sys 6 | import traceback 7 | 8 | from rich.live import Live 9 | 10 | from cyberdrop_dl.managers.manager import Manager 11 | from cyberdrop_dl.scraper.scraper import ScrapeMapper 12 | from cyberdrop_dl.ui.ui import program_ui 13 | from cyberdrop_dl.utils.sorting import Sorter 14 | from cyberdrop_dl.utils.utilities import check_latest_pypi, log_with_color, check_partials_and_empty_folders, log 15 | 16 | 17 | def startup() -> Manager: 18 | """ 19 | Starts the program and returns the manager 20 | This will also run the UI for the program 21 | After this function returns, the manager will be ready to use and scraping / downloading can begin 22 | """ 23 | 24 | try: 25 | manager = Manager() 26 | manager.startup() 27 | 28 | if not manager.args_manager.immediate_download: 29 | program_ui(manager) 30 | 31 | return manager 32 | 33 | except KeyboardInterrupt: 34 | print("\nExiting...") 35 | exit(0) 36 | 37 | 38 | async def runtime(manager: Manager) -> None: 39 | """Main runtime loop for the program, this will run until all scraping and downloading is complete""" 40 | scrape_mapper = ScrapeMapper(manager) 41 | 42 | # NEW CODE 43 | async with asyncio.TaskGroup() as task_group: 44 | manager.task_group = task_group 45 | await scrape_mapper.start() 46 | 47 | 48 | async def director(manager: Manager) -> None: 49 | """Runs the program and handles the UI""" 50 | configs = manager.config_manager.get_configs() 51 | configs_ran = [] 52 | manager.path_manager.startup() 53 | manager.log_manager.startup() 54 | 55 | logger_debug = logging.getLogger("cyberdrop_dl_debug") 56 | import cyberdrop_dl.utils.utilities 57 | if os.getenv("PYCHARM_HOSTED") is not None or manager.config_manager.settings_data['Runtime_Options']['log_level'] == -1: 58 | manager.config_manager.settings_data['Runtime_Options']['log_level'] = 10 59 | cyberdrop_dl.utils.utilities.DEBUG_VAR = True 60 | 61 | if cyberdrop_dl.utils.utilities.DEBUG_VAR: 62 | logger_debug.setLevel(manager.config_manager.settings_data['Runtime_Options']['log_level']) 63 | if os.getenv("PYCHARM_HOSTED") is not None: 64 | file_handler_debug = logging.FileHandler("../cyberdrop_dl_debug.log", mode="w") 65 | else: 66 | file_handler_debug = logging.FileHandler("./cyberdrop_dl_debug.log", mode="w") 67 | file_handler_debug.setLevel(manager.config_manager.settings_data['Runtime_Options']['log_level']) 68 | formatter = logging.Formatter("%(levelname)-8s : %(asctime)s : %(filename)s:%(lineno)d : %(message)s") 69 | file_handler_debug.setFormatter(formatter) 70 | logger_debug.addHandler(file_handler_debug) 71 | 72 | # aiosqlite_log = logging.getLogger("aiosqlite") 73 | # aiosqlite_log.setLevel(manager.config_manager.settings_data['Runtime_Options']['log_level']) 74 | # aiosqlite_log.addHandler(file_handler_debug) 75 | 76 | while True: 77 | logger = logging.getLogger("cyberdrop_dl") 78 | if manager.args_manager.all_configs: 79 | if len(logger.handlers) > 0: 80 | await log("Picking new config...", 20) 81 | 82 | configs_to_run = list(set(configs) - set(configs_ran)) 83 | configs_to_run.sort() 84 | manager.config_manager.change_config(configs_to_run[0]) 85 | configs_ran.append(configs_to_run[0]) 86 | if len(logger.handlers) > 0: 87 | await log(f"Changing config to {configs_to_run[0]}...", 20) 88 | old_file_handler = logger.handlers[0] 89 | logger.removeHandler(logger.handlers[0]) 90 | old_file_handler.close() 91 | 92 | logger.setLevel(manager.config_manager.settings_data['Runtime_Options']['log_level']) 93 | file_handler = logging.FileHandler(manager.path_manager.main_log, mode="w") 94 | 95 | if cyberdrop_dl.utils.utilities.DEBUG_VAR: 96 | manager.config_manager.settings_data['Runtime_Options']['log_level'] = 10 97 | file_handler.setLevel(manager.config_manager.settings_data['Runtime_Options']['log_level']) 98 | 99 | formatter = logging.Formatter("%(levelname)-8s : %(asctime)s : %(filename)s:%(lineno)d : %(message)s") 100 | file_handler.setFormatter(formatter) 101 | logger.addHandler(file_handler) 102 | 103 | await log("Starting Async Processes...", 20) 104 | await manager.async_startup() 105 | 106 | await log("Starting UI...", 20) 107 | if not manager.args_manager.sort_all_configs: 108 | try: 109 | if not manager.args_manager.no_ui: 110 | with Live(manager.progress_manager.layout, refresh_per_second=manager.config_manager.global_settings_data['UI_Options']['refresh_rate']): 111 | await runtime(manager) 112 | else: 113 | await runtime(manager) 114 | except Exception as e: 115 | print("\nAn error occurred, please report this to the developer") 116 | print(e) 117 | print(traceback.format_exc()) 118 | exit(1) 119 | 120 | clear_screen_proc = await asyncio.create_subprocess_shell('cls' if os.name == 'nt' else 'clear') 121 | await clear_screen_proc.wait() 122 | 123 | await log_with_color(f"Running Post-Download Processes For Config: {manager.config_manager.loaded_config}...", "green", 20) 124 | if isinstance(manager.args_manager.sort_downloads, bool): 125 | if manager.args_manager.sort_downloads: 126 | sorter = Sorter(manager) 127 | await sorter.sort() 128 | elif manager.config_manager.settings_data['Sorting']['sort_downloads'] and not manager.args_manager.retry: 129 | sorter = Sorter(manager) 130 | await sorter.sort() 131 | await check_partials_and_empty_folders(manager) 132 | 133 | if manager.config_manager.settings_data['Runtime_Options']['update_last_forum_post']: 134 | await log("Updating Last Forum Post...", 20) 135 | await manager.log_manager.update_last_forum_post() 136 | 137 | await log("Printing Stats...", 20) 138 | await manager.progress_manager.print_stats() 139 | 140 | await log("Checking for Program End...", 20) 141 | if not manager.args_manager.all_configs or not list(set(configs) - set(configs_ran)): 142 | break 143 | await asyncio.sleep(5) 144 | 145 | await log("Checking for Updates...", 20) 146 | await check_latest_pypi() 147 | 148 | await log("Closing Program...", 20) 149 | await manager.close() 150 | 151 | await log_with_color("\nFinished downloading. Enjoy :)", 'green', 20) 152 | 153 | 154 | def main(): 155 | manager = startup() 156 | 157 | loop = asyncio.new_event_loop() 158 | asyncio.set_event_loop(loop) 159 | with contextlib.suppress(RuntimeError): 160 | try: 161 | asyncio.run(director(manager)) 162 | except KeyboardInterrupt: 163 | print("\nTrying to Exit...") 164 | with contextlib.suppress(Exception): 165 | asyncio.run(manager.close()) 166 | exit(1) 167 | loop.close() 168 | sys.exit(0) 169 | 170 | 171 | if __name__ == '__main__': 172 | main() 173 | --------------------------------------------------------------------------------