├── test ├── unit │ ├── __init__.py │ ├── download_manager │ │ └── ok_video_ranking_test.py │ └── html_generator │ │ └── html_templates_test.py ├── integration │ ├── __init__.py │ ├── analysis │ │ ├── __init__.py │ │ └── get_author_posts_test.py │ ├── boosty_api │ │ ├── __init__.py │ │ └── boosty_api_test.py │ ├── .env.example │ ├── configuration.py │ └── fixtures.py └── ABOUT_TESTING.md ├── boosty_downloader ├── __init__.py └── src │ ├── __init__.py │ ├── domain │ ├── __init__.py │ ├── post.py │ └── post_data_chunks.py │ ├── application │ ├── __init__.py │ ├── di │ │ ├── __init__.py │ │ ├── download_context.py │ │ └── app_environment.py │ ├── exceptions │ │ ├── __init__.py │ │ └── application_errors.py │ ├── use_cases │ │ ├── __init__.py │ │ ├── check_total_posts.py │ │ ├── download_specific_post.py │ │ └── download_all_posts.py │ ├── mappers │ │ ├── image.py │ │ ├── file.py │ │ ├── external_video.py │ │ ├── __init__.py │ │ ├── ok_boosty_video.py │ │ ├── list.py │ │ ├── html_converter.py │ │ ├── post_mapper.py │ │ └── link_header_text.py │ ├── filtering.py │ └── ok_video_ranking.py │ ├── infrastructure │ ├── __init__.py │ ├── loggers │ │ ├── __init__.py │ │ ├── logger_instances.py │ │ ├── failed_downloads_logger.py │ │ └── base.py │ ├── html_reporter │ │ ├── __init__.py │ │ └── html_reporter.py │ ├── post_caching │ │ ├── __init__.py │ │ └── post_cache.py │ ├── update_checker │ │ ├── __init__.py │ │ └── pypi_checker.py │ ├── boosty_api │ │ ├── core │ │ │ ├── __init__.py │ │ │ ├── endpoints.py │ │ │ └── client.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ └── post │ │ │ │ ├── __init__.py │ │ │ │ ├── post_data_types │ │ │ │ ├── post_data_header.py │ │ │ │ ├── post_data_video.py │ │ │ │ ├── post_data_file.py │ │ │ │ ├── post_data_link.py │ │ │ │ ├── post_data_text.py │ │ │ │ ├── post_data_image.py │ │ │ │ ├── __init__.py │ │ │ │ ├── post_data_list.py │ │ │ │ └── post_data_ok_video.py │ │ │ │ ├── posts_request.py │ │ │ │ ├── extra.py │ │ │ │ ├── post.py │ │ │ │ └── base_post_data.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── filter_none_params.py │ │ │ ├── textual_post_extractor.py │ │ │ └── auth_parsers.py │ │ └── __init__.py │ ├── yaml_configuration │ │ ├── __init__.py │ │ ├── sample_config.py │ │ └── config.py │ ├── external_videos_downloader │ │ ├── __init__.py │ │ └── external_videos_downloader.py │ ├── html_generator │ │ ├── templates │ │ │ ├── image.html │ │ │ ├── video.html │ │ │ ├── list.html │ │ │ ├── text.html │ │ │ └── base.html │ │ ├── __init__.py │ │ ├── models.py │ │ └── renderer.py │ ├── path_sanitizer.py │ ├── human_readable_filesize.py │ └── file_downloader.py │ └── interfaces │ ├── __init__.py │ ├── help_panels.py │ ├── cli_options.py │ └── console_progress_reporter.py ├── assets ├── usage.png ├── example1.png ├── example2.png ├── auth_guide.png ├── config_guide.png ├── screenshot.png ├── total_check.png └── boosty-black-badge.png ├── .github ├── renovate.json ├── pull_request_template.md └── workflows │ ├── any-pr-validation.yaml │ ├── release-pr-validation.yaml │ └── release.yaml ├── pyrightconfig.json ├── ruff.toml ├── LICENSE ├── pyproject.toml ├── CONTRIBUTING.md ├── Makefile ├── CHANGELOG.md ├── README.md └── .gitignore /test/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/domain/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/boosty_api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/di/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/interfaces/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/exceptions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/use_cases/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/html_reporter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/post_caching/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/update_checker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/yaml_configuration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/external_videos_downloader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assets/usage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/usage.png -------------------------------------------------------------------------------- /assets/example1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/example1.png -------------------------------------------------------------------------------- /assets/example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/example2.png -------------------------------------------------------------------------------- /assets/auth_guide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/auth_guide.png -------------------------------------------------------------------------------- /assets/config_guide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/config_guide.png -------------------------------------------------------------------------------- /assets/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/screenshot.png -------------------------------------------------------------------------------- /assets/total_check.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/total_check.png -------------------------------------------------------------------------------- /assets/boosty-black-badge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/boosty-black-badge.png -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/html_generator/templates/image.html: -------------------------------------------------------------------------------- 1 | Image -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/__init__.py: -------------------------------------------------------------------------------- 1 | from .core.client import BoostyAPIClient 2 | 3 | __all__ = [ 4 | 'BoostyAPIClient', 5 | ] 6 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/core/endpoints.py: -------------------------------------------------------------------------------- 1 | """All constants for endpoints.""" 2 | 3 | BOOSTY_DEFAULT_BASE_URL = 'https://api.boosty.to/v1/' 4 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/html_generator/templates/video.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/loggers/logger_instances.py: -------------------------------------------------------------------------------- 1 | """Module contains loggers for different parts of the app""" 2 | 3 | from boosty_downloader.src.infrastructure.loggers.base import RichLogger 4 | 5 | downloader_logger = RichLogger('Boosty_Downloader') 6 | -------------------------------------------------------------------------------- /boosty_downloader/src/interfaces/help_panels.py: -------------------------------------------------------------------------------- 1 | """Defines panels for grouping arguments in the CLI help interface.""" 2 | 3 | from enum import Enum 4 | 5 | 6 | class HelpPanels(str, Enum): 7 | """Panels for grouping arguments in the CLI help.""" 8 | 9 | actions = 'Actions' 10 | filtering = 'Filtering' 11 | network = 'Network' 12 | -------------------------------------------------------------------------------- /.github/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "config:recommended" 5 | ], 6 | 7 | "dependencyDashboard": true, 8 | 9 | "updateLockFiles": true, 10 | 11 | "automerge": false, 12 | "autoApprove": false, 13 | "platformAutomerge": false, 14 | "baseBranches": ["dev"] 15 | } 16 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/yaml_configuration/sample_config.py: -------------------------------------------------------------------------------- 1 | """Helper functions for working with invalid values in the config.""" 2 | 3 | DEFAULT_YAML_CONFIG_VALUE = """ 4 | auth: 5 | # Insert your own cookie and auth header values here 6 | cookie: '' 7 | auth_header: '' 8 | downloading_settings: 9 | target_directory: ./boosty-downloads 10 | """ 11 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_header.py: -------------------------------------------------------------------------------- 1 | """Header of the posts""" 2 | 3 | from typing import Literal 4 | 5 | from pydantic import BaseModel 6 | 7 | 8 | class BoostyPostDataHeaderDTO(BaseModel): 9 | """Header content piece in posts""" 10 | 11 | type: Literal['header'] 12 | content: str 13 | modificator: str 14 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_video.py: -------------------------------------------------------------------------------- 1 | """Usual video links (on youtube and other services)""" 2 | 3 | from typing import Literal 4 | 5 | from pydantic import BaseModel 6 | 7 | 8 | class BoostyPostDataExternalVideoDTO(BaseModel): 9 | """Video content piece in posts""" 10 | 11 | type: Literal['video'] 12 | url: str 13 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/utils/filter_none_params.py: -------------------------------------------------------------------------------- 1 | """Just a little helper to make requests""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any 6 | 7 | 8 | def filter_none_params(kwargs: dict[str, Any | None]) -> dict[str, Any]: 9 | """Remove None values from kwargs""" 10 | return {k: v for k, v in kwargs.items() if v is not None} 11 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_file.py: -------------------------------------------------------------------------------- 1 | """The module with file representation of posts data""" 2 | 3 | from typing import Literal 4 | 5 | from pydantic import BaseModel 6 | 7 | 8 | class BoostyPostDataFileDTO(BaseModel): 9 | """File content piece in posts""" 10 | 11 | type: Literal['file'] 12 | url: str 13 | title: str 14 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_link.py: -------------------------------------------------------------------------------- 1 | """Module with link representation of posts data""" 2 | 3 | from typing import Literal 4 | 5 | from pydantic import BaseModel 6 | 7 | 8 | class BoostyPostDataLinkDTO(BaseModel): 9 | """Link content piece in posts""" 10 | 11 | type: Literal['link'] 12 | url: str 13 | content: str 14 | explicit: bool 15 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_text.py: -------------------------------------------------------------------------------- 1 | """The module with textual representation of posts data""" 2 | 3 | from typing import Literal 4 | 5 | from pydantic import BaseModel 6 | 7 | 8 | class BoostyPostDataTextDTO(BaseModel): 9 | """Textual content piece in posts""" 10 | 11 | type: Literal['text'] 12 | 13 | content: str 14 | modificator: str 15 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/path_sanitizer.py: -------------------------------------------------------------------------------- 1 | """The modules helps with path sanitization to make it work on different platforms""" 2 | 3 | import re 4 | 5 | 6 | def sanitize_string(string: str) -> str: 7 | """Remove unsafe filesystem characters from a string""" 8 | # Convert path to a string and sanitize it 9 | unsafe_chars = r'[<>:"/\\|?*]' 10 | return re.sub(unsafe_chars, '', str(string)) 11 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_image.py: -------------------------------------------------------------------------------- 1 | """The module with image representation of posts data""" 2 | 3 | from typing import Literal 4 | 5 | from pydantic import BaseModel 6 | 7 | 8 | class BoostyPostDataImageDTO(BaseModel): 9 | """Image content piece in posts""" 10 | 11 | type: Literal['image'] 12 | url: str 13 | width: int | None = None 14 | height: int | None = None 15 | -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "typeCheckingMode": "strict", 3 | "include": [ 4 | "boosty_downloader" 5 | ], 6 | "exclude": [ 7 | "**/node_modules", 8 | "**/__pycache__", 9 | "**/dist", 10 | "**/build" 11 | ], 12 | "defineConstant": { 13 | "DEBUG": true 14 | }, 15 | "reportMissingImports": "error", 16 | "reportMissingTypeStubs": false, 17 | "pythonVersion": "3.10", 18 | "pythonPlatform": "Windows", 19 | } -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/posts_request.py: -------------------------------------------------------------------------------- 1 | """Models for posts responses to boosty.to""" 2 | 3 | from pydantic import BaseModel 4 | 5 | from boosty_downloader.src.infrastructure.boosty_api.models.post.extra import Extra 6 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post import PostDTO 7 | 8 | 9 | class PostsResponse(BaseModel): 10 | """Model representing a response from a posts request""" 11 | 12 | posts: list[PostDTO] 13 | extra: Extra 14 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/extra.py: -------------------------------------------------------------------------------- 1 | """Models for meta info about posts or requests to boosty.to""" 2 | 3 | from pydantic import BaseModel, ConfigDict 4 | from pydantic.alias_generators import to_camel 5 | 6 | 7 | class Extra(BaseModel): 8 | """Meta info for posts request, can be used for pagination mainly""" 9 | 10 | is_last: bool 11 | offset: str 12 | 13 | model_config = ConfigDict( 14 | alias_generator=to_camel, 15 | populate_by_name=True, 16 | from_attributes=True, 17 | ) 18 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/mappers/image.py: -------------------------------------------------------------------------------- 1 | """Image content mapper module to transform Boosty API DTO to domain model.""" 2 | 3 | from boosty_downloader.src.domain.post import PostDataChunkImage 4 | from boosty_downloader.src.infrastructure.boosty_api.models.post.base_post_data import ( 5 | BoostyPostDataImageDTO, 6 | ) 7 | 8 | 9 | def to_domain_image_chunk(api_image: BoostyPostDataImageDTO) -> PostDataChunkImage: 10 | """Convert API PostDataImage to domain PostDataChunkImage.""" 11 | return PostDataChunkImage( 12 | url=api_image.url, 13 | ) 14 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/mappers/file.py: -------------------------------------------------------------------------------- 1 | """Mapping functions for converting API PostDataFile objects to domain PostDataChunkFile objects.""" 2 | 3 | from boosty_downloader.src.domain.post import PostDataChunkFile 4 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import ( 5 | BoostyPostDataFileDTO, 6 | ) 7 | 8 | 9 | def to_domain_file_chunk( 10 | api_file: BoostyPostDataFileDTO, signed_query: str 11 | ) -> PostDataChunkFile: 12 | """Convert API PostDataFile to domain PostDataChunkFile.""" 13 | return PostDataChunkFile( 14 | url=api_file.url + signed_query, 15 | filename=api_file.title, 16 | ) 17 | -------------------------------------------------------------------------------- /test/integration/.env.example: -------------------------------------------------------------------------------- 1 | 2 | # Valid Boosty authentication token 3 | BOOSTY_TOKEN=your_boosty_token_here 4 | 5 | # Cookies for Boosty authentication (if required) 6 | BOOSTY_COOKIES=your_boosty_cookies_here 7 | 8 | # URL or ID of a post that is publicly accessible 9 | BOOSTY_AVAILABLE_POST=https://boosty.to/author/posts/12345 10 | 11 | # URL or ID of a post that exists but is behind a paywall or private 12 | BOOSTY_UNAVAILABLE_POST=https://boosty.to/author/posts/67890 13 | 14 | # Username of an author that doesn't exist 15 | BOOSTY_NONEXISTENT_AUTHOR=nonexistent_author_username 16 | 17 | # Username of an existing author with public content 18 | BOOSTY_EXISTING_AUTHOR=existing_author_username 19 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/utils/textual_post_extractor.py: -------------------------------------------------------------------------------- 1 | """Module to extract textual content from a post by its chunks""" 2 | 3 | from __future__ import annotations 4 | 5 | import json 6 | from io import StringIO 7 | 8 | 9 | def extract_textual_content( 10 | content: str, 11 | ) -> str: 12 | """Extract textual content from a post chunk Link/Text""" 13 | buffer = StringIO() 14 | 15 | # Merge all the text and link fragments into one file 16 | try: 17 | json_data: list[str] = json.loads(content) 18 | except json.JSONDecodeError: 19 | return buffer.getvalue() 20 | 21 | if len(json_data) == 0: 22 | return buffer.getvalue() 23 | 24 | clean_text = str(json_data[0]) 25 | 26 | buffer.write(clean_text) 27 | 28 | return buffer.getvalue() 29 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/mappers/external_video.py: -------------------------------------------------------------------------------- 1 | """Mapping functions for converting external video API DTOs to domain objects.""" 2 | 3 | from boosty_downloader.src.domain.post import PostDataChunkExternalVideo 4 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import ( 5 | BoostyPostDataExternalVideoDTO, 6 | ) 7 | 8 | 9 | def to_external_video_content( 10 | api_video_dto: BoostyPostDataExternalVideoDTO, 11 | ) -> PostDataChunkExternalVideo: 12 | """ 13 | Convert API video data to domain external video content object. 14 | 15 | It uses the PostDataVideo DTO to extract the video URL and other metadata 16 | to create a domain external video content object. 17 | """ 18 | return PostDataChunkExternalVideo( 19 | url=api_video_dto.url, 20 | ) 21 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/__init__.py: -------------------------------------------------------------------------------- 1 | from .post_data_file import BoostyPostDataFileDTO 2 | from .post_data_header import BoostyPostDataHeaderDTO 3 | from .post_data_image import BoostyPostDataImageDTO 4 | from .post_data_link import BoostyPostDataLinkDTO 5 | from .post_data_list import BoostyPostDataListDTO 6 | from .post_data_ok_video import BoostyPostDataOkVideoDTO 7 | from .post_data_text import BoostyPostDataTextDTO 8 | from .post_data_video import BoostyPostDataExternalVideoDTO 9 | 10 | __all__ = [ 11 | 'BoostyPostDataExternalVideoDTO', 12 | 'BoostyPostDataFileDTO', 13 | 'BoostyPostDataHeaderDTO', 14 | 'BoostyPostDataImageDTO', 15 | 'BoostyPostDataLinkDTO', 16 | 'BoostyPostDataListDTO', 17 | 'BoostyPostDataOkVideoDTO', 18 | 'BoostyPostDataTextDTO', 19 | ] 20 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/html_generator/__init__.py: -------------------------------------------------------------------------------- 1 | """HTML generator module for independent HTML generation.""" 2 | 3 | from .models import ( 4 | HtmlGenChunk, 5 | HtmlGenFile, 6 | HtmlGenImage, 7 | HtmlGenList, 8 | HtmlGenText, 9 | HtmlGenVideo, 10 | HtmlListItem, 11 | HtmlListStyle, 12 | HtmlTextFragment, 13 | HtmlTextStyle, 14 | ) 15 | from .renderer import ( 16 | render_html, 17 | render_html_chunk, 18 | render_html_to_file, 19 | ) 20 | 21 | __all__ = [ 22 | 'HtmlGenChunk', 23 | 'HtmlGenFile', 24 | 'HtmlGenImage', 25 | 'HtmlGenList', 26 | 'HtmlGenText', 27 | 'HtmlGenVideo', 28 | 'HtmlListItem', 29 | 'HtmlListStyle', 30 | 'HtmlTextFragment', 31 | 'HtmlTextStyle', 32 | 'render_html', 33 | 'render_html_chunk', 34 | 'render_html_to_file', 35 | ] 36 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/human_readable_filesize.py: -------------------------------------------------------------------------------- 1 | """Module with functions for human-readable file size representation""" 2 | 3 | from __future__ import annotations 4 | 5 | 6 | def human_readable_size(size: float | None, decimal_places: int = 2) -> str: 7 | """ 8 | Return a human-readable string representing the size of a file. 9 | 10 | Usage example: 11 | path = Path("example.txt") 12 | 13 | file_size = path.stat().st_size # Get file size in bytes 14 | print(human_readable_size(file_size)) 15 | """ 16 | if size is None: 17 | return 'N/A' 18 | 19 | kb_size = 1024 20 | 21 | for unit in ['B', 'KB', 'MB', 'GB', 'TB']: 22 | if size < kb_size: 23 | return f'{size:.{decimal_places}f} {unit}' 24 | size /= kb_size 25 | return f'{size:.{decimal_places}f} PB' 26 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/utils/auth_parsers.py: -------------------------------------------------------------------------------- 1 | """Cookie and authorization parser module for raw-browser-data parsing""" 2 | 3 | from http.cookies import SimpleCookie 4 | 5 | import aiohttp 6 | 7 | 8 | def parse_session_cookie(cookie_string: str) -> aiohttp.CookieJar: 9 | """Parse the session cookie and return a dictionary with auth data for aiohttp client.""" 10 | if cookie_string.lower().startswith('cookie: '): 11 | cookie_string = cookie_string[8:].strip() 12 | 13 | cookie = SimpleCookie() 14 | cookie.load(cookie_string) 15 | 16 | jar = aiohttp.CookieJar() 17 | for key, morsel in cookie.items(): 18 | jar.update_cookies({key: morsel.value}) 19 | 20 | return jar 21 | 22 | 23 | def parse_auth_header(header: str) -> dict[str, str]: 24 | """Parse the authorization header and return a dictionary with auth data.""" 25 | return {'Authorization': header} 26 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/html_generator/templates/list.html: -------------------------------------------------------------------------------- 1 | {% macro render_item(item) -%} 2 |
  • 3 | {% for txt in item.data %} 4 | {{ render_chunk(txt) | safe }} 5 | {% endfor %} 6 | {% if item.nested_items %} 7 | {% if lst.style.value == 'ordered' %} 8 |
      9 | {% else %} 10 |
    17 | {% else %} 18 | 19 | {% endif %} 20 | {% endif %} 21 |
  • 22 | {%- endmacro %} 23 | 24 | {% if lst.style.value == 'ordered' %} 25 |
      26 | {% else %} 27 |
    34 | {% else %} 35 | 36 | {% endif %} -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_list.py: -------------------------------------------------------------------------------- 1 | """The module with list representation of posts data""" 2 | 3 | from typing import Literal 4 | 5 | from pydantic import BaseModel 6 | 7 | 8 | class BoostyPostDataListDataItemDTO(BaseModel): 9 | """Represents a single data item in a list of post data chunks.""" 10 | 11 | type: str 12 | modificator: str | None = '' 13 | content: str 14 | 15 | 16 | class BoostyPostDataListItemDTO(BaseModel): 17 | """Represents a single item in a list of post data chunks.""" 18 | 19 | items: list['BoostyPostDataListItemDTO'] = [] 20 | data: list[BoostyPostDataListDataItemDTO] = [] 21 | 22 | 23 | BoostyPostDataListItemDTO.model_rebuild() 24 | 25 | 26 | class BoostyPostDataListDTO(BaseModel): 27 | """Represents a list of post data chunks.""" 28 | 29 | type: Literal['list'] 30 | items: list[BoostyPostDataListItemDTO] 31 | style: Literal['ordered', 'unordered'] | None = None 32 | -------------------------------------------------------------------------------- /test/integration/configuration.py: -------------------------------------------------------------------------------- 1 | from pydantic import Field 2 | from pydantic_settings import BaseSettings, SettingsConfigDict 3 | 4 | 5 | class IntegrationTestConfig(BaseSettings): 6 | """ 7 | Loads and validates integration test config from environment variables. 8 | """ 9 | 10 | boosty_auth_token: str = Field(..., alias='BOOSTY_TOKEN') 11 | boosty_cookies: str = Field(..., alias='BOOSTY_COOKIES') 12 | 13 | boosty_available_post_url: str = Field(..., alias='BOOSTY_AVAILABLE_POST') 14 | boosty_unavailable_post_url: str = Field(..., alias='BOOSTY_UNAVAILABLE_POST') 15 | boosty_nonexistent_author: str = Field(..., alias='BOOSTY_NONEXISTENT_AUTHOR') 16 | boosty_existing_author: str = Field(..., alias='BOOSTY_EXISTING_AUTHOR') 17 | 18 | model_config = SettingsConfigDict(env_file='.env', extra='ignore') 19 | 20 | def summary(self) -> str: 21 | """ 22 | Prints all loaded config fields for debug purposes. 23 | """ 24 | return str(self.model_dump()) 25 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/mappers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | These modules contain mappers for converting Boosty API -> domain objects. 3 | 4 | This is the main entry point for data transformation. 5 | """ 6 | 7 | from boosty_downloader.src.application.ok_video_ranking import ( 8 | get_best_video, 9 | get_quality_ranking, 10 | ) 11 | 12 | from .external_video import to_external_video_content 13 | from .file import to_domain_file_chunk 14 | from .image import to_domain_image_chunk 15 | from .link_header_text import to_domain_text_chunk 16 | from .list import to_domain_list_chunk 17 | from .ok_boosty_video import to_ok_boosty_video_content 18 | from .post_mapper import map_post_dto_to_domain 19 | 20 | __all__ = [ 21 | 'get_best_video', 22 | 'get_quality_ranking', 23 | 'map_post_dto_to_domain', 24 | 'to_domain_file_chunk', 25 | 'to_domain_image_chunk', 26 | 'to_domain_list_chunk', 27 | 'to_domain_text_chunk', 28 | 'to_external_video_content', 29 | 'to_ok_boosty_video_content', 30 | ] 31 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/post.py: -------------------------------------------------------------------------------- 1 | """The module describes the form of a post of a user on boosty.to""" 2 | 3 | from __future__ import annotations 4 | 5 | from datetime import datetime # noqa: TC003 Pydantic should know this type fully 6 | 7 | from pydantic import ConfigDict 8 | from pydantic.alias_generators import to_camel 9 | from pydantic.main import BaseModel 10 | 11 | from boosty_downloader.src.infrastructure.boosty_api.models.post.base_post_data import ( 12 | BasePostData, # noqa: TC001 Pydantic should know this type fully 13 | ) 14 | 15 | 16 | class PostDTO(BaseModel): 17 | """Post on boosty.to which also have data pieces""" 18 | 19 | id: str 20 | title: str 21 | created_at: datetime 22 | updated_at: datetime 23 | has_access: bool 24 | 25 | signed_query: str 26 | 27 | data: list[BasePostData] 28 | 29 | model_config = ConfigDict( 30 | alias_generator=to_camel, 31 | populate_by_name=True, 32 | from_attributes=True, 33 | ) 34 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/base_post_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | The module contains a model for boosty 'post' data. 3 | 4 | Only essentials fields defined for parsing purposes. 5 | """ 6 | 7 | from __future__ import annotations 8 | 9 | from typing import Annotated 10 | 11 | from pydantic import Field 12 | 13 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import ( 14 | BoostyPostDataExternalVideoDTO, 15 | BoostyPostDataFileDTO, 16 | BoostyPostDataHeaderDTO, 17 | BoostyPostDataImageDTO, 18 | BoostyPostDataLinkDTO, 19 | BoostyPostDataListDTO, 20 | BoostyPostDataOkVideoDTO, 21 | BoostyPostDataTextDTO, 22 | ) 23 | 24 | BasePostData = Annotated[ 25 | BoostyPostDataTextDTO 26 | | BoostyPostDataImageDTO 27 | | BoostyPostDataLinkDTO 28 | | BoostyPostDataFileDTO 29 | | BoostyPostDataExternalVideoDTO 30 | | BoostyPostDataOkVideoDTO 31 | | BoostyPostDataHeaderDTO 32 | | BoostyPostDataListDTO, 33 | Field( 34 | discriminator='type', 35 | ), 36 | ] 37 | -------------------------------------------------------------------------------- /ruff.toml: -------------------------------------------------------------------------------- 1 | # match black 2 | 3 | line-length = 88 4 | lint.select = [ 5 | "ALL", # include all the rules, including new ones 6 | ] 7 | lint.ignore = [ 8 | "E501", # line too long 9 | "D102", # missing docstring in public method 10 | "D212", # multiline docstring should start at the first line (personal preference) 11 | "D107", # missing docstring in __init__ (lol why) 12 | "D400", # first line should end with a period (sometimes mess with markdown or code blocks) 13 | "D415", # first line should end with a period (same as above but trickier) 14 | "RUF001", # unused variable 15 | "G004", # don't log f-strings (personal preference) 16 | "D203", # incorrected blank line before class is incompatible with D211 17 | "COM812", # missing trailing comma (formatter conflicts with this) 18 | ] 19 | 20 | [lint.per-file-ignores] 21 | "test/*" = ["D", "ANN201", "S101", "PLR2004", "INP001"] 22 | "__init__.py" = ["D104"] 23 | 24 | [format] 25 | quote-style = "single" 26 | 27 | [lint.flake8-quotes] 28 | inline-quotes = "single" 29 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # 📌 Task 2 | 3 | ## 📝 Description 4 | 5 | Describe in detail what has changed, why it is needed, and what problems this PR solves. 6 | 7 | ## 🔄 Changelog 8 | 9 | 10 | 11 | - **✨ Added:** … 12 | - **🛠 Fixed:** … 13 | - **🔄 Changed:** … 14 | - **🗑 Removed:** … 15 | 16 | ## 🎯 Related Issue 17 | 18 | 19 | ## 📷 Screenshots (if applicable) 20 | 25 | 26 | ## ✅ Checklist 27 | 28 | - [ ] Locally tested (`make test` and your own judgment) 29 | - [ ] Documentation updated (if necessary) 30 | - [ ] Code follows the project's style guidelines (`make lint && make format`) 31 | 32 | ## ⚠ Notes 33 | 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Roman Berezkin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /boosty_downloader/src/domain/post.py: -------------------------------------------------------------------------------- 1 | """Module define the Post domain model for further downloading.""" 2 | 3 | from dataclasses import dataclass 4 | from datetime import datetime 5 | 6 | from boosty_downloader.src.domain.post_data_chunks import ( 7 | PostDataChunkBoostyVideo, 8 | PostDataChunkExternalVideo, 9 | PostDataChunkFile, 10 | PostDataChunkImage, 11 | PostDataChunkText, 12 | PostDataChunkTextualList, 13 | ) 14 | 15 | PostDataAllChunks = ( 16 | PostDataChunkImage 17 | | PostDataChunkText 18 | | PostDataChunkBoostyVideo 19 | | PostDataChunkExternalVideo 20 | | PostDataChunkFile 21 | | PostDataChunkTextualList 22 | ) 23 | 24 | PostDataAllChunksList = list[PostDataAllChunks] 25 | 26 | PostDataPostOnlyChunksList = list[ 27 | PostDataChunkText | PostDataChunkImage | PostDataChunkTextualList 28 | ] 29 | 30 | 31 | @dataclass 32 | class Post: 33 | """Post on boosty.to which have different kinds of content (images, text, videos, etc.)""" 34 | 35 | uuid: str 36 | title: str 37 | created_at: datetime 38 | updated_at: datetime 39 | has_access: bool 40 | 41 | signed_query: str 42 | 43 | post_data_chunks: PostDataAllChunksList 44 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/html_generator/templates/text.html: -------------------------------------------------------------------------------- 1 | {% for frag in text.text_fragments %} 2 | {% set lvl = frag.header_level|default(0)|int %} 3 | {% if lvl > 0 %} 4 | {% if lvl > 6 %}{% set lvl = 6 %}{% endif %} 5 | {{ frag.text }} 6 | {% else %} 7 | {% if frag.text in ['\n', '\r\n'] %} 8 |
    9 | {% else %} 10 | {% if frag.link_url %} 11 | 12 | {% if frag.style.bold %}{% endif %} 13 | {% if frag.style.italic %}{% endif %} 14 | {% if frag.style.underline %}{% endif %} 15 | {{ frag.text }} 16 | {% if frag.style.underline %}{% endif %} 17 | {% if frag.style.italic %}{% endif %} 18 | {% if frag.style.bold %}{% endif %} 19 | 20 | {% else %} 21 | {% if frag.style.bold %}{% endif %} 22 | {% if frag.style.italic %}{% endif %} 23 | {% if frag.style.underline %}{% endif %} 24 | {{ frag.text }} 25 | {% if frag.style.underline %}{% endif %} 26 | {% if frag.style.italic %}{% endif %} 27 | {% if frag.style.bold %}{% endif %} 28 | {% endif %} 29 | {% endif %} 30 | {% endif %} 31 | {% endfor %} -------------------------------------------------------------------------------- /boosty_downloader/src/application/di/download_context.py: -------------------------------------------------------------------------------- 1 | """Define the DownloadContext dataclass and its dependencies for the download workflow.""" 2 | 3 | from dataclasses import dataclass 4 | 5 | from aiohttp_retry import RetryClient 6 | 7 | from boosty_downloader.src.application.filtering import ( 8 | BoostyOkVideoType, 9 | DownloadContentTypeFilter, 10 | ) 11 | from boosty_downloader.src.infrastructure.external_videos_downloader.external_videos_downloader import ( 12 | ExternalVideosDownloader, 13 | ) 14 | from boosty_downloader.src.infrastructure.loggers.failed_downloads_logger import ( 15 | FailedDownloadsLogger, 16 | ) 17 | from boosty_downloader.src.infrastructure.post_caching.post_cache import SQLitePostCache 18 | from boosty_downloader.src.interfaces.console_progress_reporter import ProgressReporter 19 | 20 | 21 | @dataclass 22 | class DownloadContext: 23 | """Aggregates dependencies and configuration for the download workflow.""" 24 | 25 | author_name: str 26 | downloader_session: RetryClient 27 | external_videos_downloader: ExternalVideosDownloader 28 | post_cache: SQLitePostCache 29 | filters: list[DownloadContentTypeFilter] 30 | preferred_video_quality: BoostyOkVideoType 31 | progress_reporter: ProgressReporter 32 | failed_logger: FailedDownloadsLogger 33 | -------------------------------------------------------------------------------- /test/ABOUT_TESTING.md: -------------------------------------------------------------------------------- 1 | # Structure 2 | 3 | Tests structure doesn't mirror the application structure, but rather groups tests by their functionality or "domain": 4 | 5 | ``` 6 | test/ 7 | ├── analysis - Tests ONLY for purpose to analyze responses by known endpoints 8 | │ └── ... 9 | │ 10 | ├── unit - Unit tests for the application, groupped by "domains" 11 | │ └── ... 12 | │ 13 | └── integration - Integration tests for the application, groupped by "domains" 14 | ``` 15 | 16 | # Add a new test 17 | 18 | **If you want to add a new test:** 19 | 1. *Decide whether it is a unit test or an integration test.* 20 | - **Integration** tests depends on external services (Boosty) or network, can be configurable. 21 | - **Unit** tests are isolated and can be run any time without configuration or setup. 22 | 2. *Decide which "domain" it belongs to* 23 | - For example ok_video_ranking is the boosty_downloader's domain. 24 | 3. *Create test file, following the naming convention `_test.py`.* 25 | 4. Test some functionality with `test_` function name. 26 | - Use `assert` statements to check expected outcomes. 27 | 5. *Run the test using `make test` for unit tests or `make test-integration` for integration tests.* 28 | 6. *Make a pull request with your changes.* (see [CONTRIBUTING.md](../CONTRIBUTING.md) for more details) 29 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/mappers/ok_boosty_video.py: -------------------------------------------------------------------------------- 1 | """Mapper for converting Boosty API video DTOs to domain video content objects.""" 2 | 3 | from boosty_downloader.src.application.ok_video_ranking import ( 4 | get_best_video, 5 | ) 6 | from boosty_downloader.src.domain.post import PostDataChunkBoostyVideo 7 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import ( 8 | BoostyPostDataOkVideoDTO, 9 | ) 10 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_ok_video import ( 11 | BoostyOkVideoType, 12 | ) 13 | 14 | 15 | def to_ok_boosty_video_content( 16 | api_video_dto: BoostyPostDataOkVideoDTO, preferred_quality: BoostyOkVideoType 17 | ) -> PostDataChunkBoostyVideo | None: 18 | """ 19 | Convert API video data to domain video content object. 20 | 21 | It uses the PostDataVideo DTO to extract the video URL and other metadata 22 | to create a domain video content object. 23 | """ 24 | best_video_info = get_best_video( 25 | preferred_quality=preferred_quality, 26 | video_urls=api_video_dto.player_urls, 27 | ) 28 | 29 | if best_video_info is None: 30 | return None 31 | 32 | best_video, choosed_quality = best_video_info 33 | 34 | return PostDataChunkBoostyVideo( 35 | url=best_video.url, 36 | title=api_video_dto.title, 37 | quality=choosed_quality.name, 38 | ) 39 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "boosty-downloader" 3 | version = "2.0.1" 4 | description = "" 5 | authors = [ 6 | { name = "Roman Berezkin", email = "Glitchy-Sheep@users.noreply.github.com" }, 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.10,<4" 10 | dependencies = [ 11 | "asyncio (>=3.4.3,<4.0.0)", 12 | "aiofiles (>=24.1.0,<25.0.0)", 13 | "aiohttp (>=3.11.12,<4.0.0)", 14 | "pydantic (>=2.10.6,<3.0.0)", 15 | "rich (>=14.0.0,<14.1.0)", 16 | "pydantic-settings[yaml] (>=2.7.1,<3.0.0)", 17 | "typer (>=0.16.0,<0.17.0)", 18 | "yt-dlp (>=2025.1.26,<2026.0.0)", 19 | "jinja2 (>=3.1.5,<4.0.0)", 20 | "aiohttp-retry (>=2.9.1,<3.0.0)", 21 | "yarl (>=1.18.3,<2.0.0)", 22 | "sqlalchemy (>=2.0.42,<3.0.0)", 23 | "aiolimiter (>=1.2.1,<2.0.0)", 24 | "packaging (>=25.0,<26.0)", 25 | ] 26 | 27 | [project.scripts] 28 | boosty-downloader = "boosty_downloader.main:entry_point" 29 | 30 | [build-system] 31 | requires = ["poetry-core>=2.0.0,<3.0.0"] 32 | build-backend = "poetry.core.masonry.api" 33 | 34 | [tool.poetry.group.dev.dependencies] 35 | ruff = ">=0.9.6,<0.13.0" 36 | pyright = "^1.1.394" 37 | pytest = "^8.3.4" 38 | pytest-asyncio = "^1.1.0" 39 | 40 | 41 | [tool.poetry] 42 | name = "boosty-downloader" 43 | version = "2.0.1" 44 | description = "Download any type of content from boosty.to" 45 | authors = ["Roman Berezkin"] 46 | readme = "README.md" 47 | 48 | packages = [{ include = "boosty_downloader" }] 49 | -------------------------------------------------------------------------------- /.github/workflows/any-pr-validation.yaml: -------------------------------------------------------------------------------- 1 | # This workflow triggers on any pull request or push to main or dev branches 2 | name: 🔍 PR Code Health Checks (linters / type checks / tests) 3 | 4 | on: 5 | pull_request: 6 | branches: 7 | - main 8 | - dev 9 | push: 10 | branches: 11 | - dev 12 | - 'feature/**' 13 | - 'hotfix/**' 14 | 15 | env: 16 | PACKAGE_NAME: "boosty-downloader" 17 | 18 | jobs: 19 | lint-test-build: 20 | name: 🧪 Code Quality & Build 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - name: 🐍 Set up Python 26 | uses: actions/setup-python@v5 27 | with: 28 | python-version: "3.12" 29 | 30 | - name: 📦 Install Poetry if missing 31 | uses: snok/install-poetry@v1 32 | with: 33 | version: 'latest' 34 | 35 | - name: 📥 Install dependencies 36 | run: poetry sync 37 | 38 | - name: 🔍 Run ruff linting 39 | run: make lint-check 40 | 41 | - name: 🎨 Run ruff formatting check 42 | run: make format-check 43 | 44 | - name: 🔎 Run type checking 45 | run: make types 46 | 47 | - name: 🧪 Run tests 48 | run: | 49 | make test-verbose 50 | make test-api-verbose 51 | timeout-minutes: 5 52 | 53 | - name: 🏗️ Build package 54 | run: make build 55 | 56 | - name: ✅ Verify build artifacts 57 | run: | 58 | ls -la dist/ 59 | if [ ! -f dist/*.whl ] || [ ! -f dist/*.tar.gz ]; then 60 | echo "❌ Build artifacts missing" 61 | exit 1 62 | fi 63 | echo "✅ Build artifacts created successfully" 64 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_ok_video.py: -------------------------------------------------------------------------------- 1 | """Module with ok video representation of posts data""" 2 | 3 | from __future__ import annotations 4 | 5 | from datetime import timedelta # noqa: TC003 Pydantic should know this type fully 6 | from enum import Enum 7 | from typing import Literal 8 | 9 | from pydantic import BaseModel, ConfigDict 10 | from pydantic.alias_generators import to_camel 11 | 12 | 13 | class BoostyOkVideoType(Enum): 14 | """All the types which boosty provides for ok video""" 15 | 16 | live_playback_dash = 'live_playback_dash' 17 | live_playback_hls = 'live_playback_hls' 18 | live_ondemand_hls = 'live_ondemand_hls' 19 | 20 | live_dash = 'live_dash' 21 | live_hls = 'live_hls' 22 | hls = 'hls' 23 | dash = 'dash' 24 | dash_uni = 'dash_uni' 25 | live_cmaf = 'live_cmaf' 26 | 27 | ultra_hd = 'ultra_hd' 28 | quad_hd = 'quad_hd' 29 | full_hd = 'full_hd' 30 | high = 'high' 31 | medium = 'medium' 32 | low = 'low' 33 | tiny = 'tiny' 34 | lowest = 'lowest' 35 | 36 | 37 | class BoostyOkVideoUrl(BaseModel): 38 | """Link to video with specific format (link can be empty for some formats)""" 39 | 40 | url: str 41 | type: BoostyOkVideoType 42 | 43 | 44 | class BoostyPostDataOkVideoDTO(BaseModel): 45 | """Ok video content piece in posts""" 46 | 47 | type: Literal['ok_video'] 48 | 49 | title: str 50 | failover_host: str 51 | duration: timedelta 52 | 53 | upload_status: str 54 | complete: bool 55 | player_urls: list[BoostyOkVideoUrl] 56 | 57 | model_config = ConfigDict( 58 | alias_generator=to_camel, 59 | populate_by_name=True, 60 | from_attributes=True, 61 | ) 62 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/exceptions/application_errors.py: -------------------------------------------------------------------------------- 1 | """ 2 | Custom exceptions for application-level download errors. 3 | 4 | These classes standardize handling of any download failures from posts or resources, 5 | wrapping lower-level errors into a unified application-level form. 6 | """ 7 | 8 | 9 | class ApplicationBaseDownloadError(Exception): 10 | """ 11 | Base class for all application-level download errors. 12 | 13 | Each error instance is bound to a specific post that triggered it. 14 | 15 | Attributes 16 | ---------- 17 | post_uuid : str 18 | Unique identifier of the post related to the error. 19 | 20 | """ 21 | 22 | def __init__(self, post_uuid: str) -> None: 23 | super().__init__() 24 | self.post_uuid = post_uuid 25 | 26 | 27 | class ApplicationFailedDownloadError(ApplicationBaseDownloadError): 28 | """ 29 | Raised when downloading a specific resource from a post fails. 30 | 31 | Causes may include network errors, invalid URLs, or resource unavailability 32 | (e.g., a YouTube video becoming private). 33 | 34 | Attributes 35 | ---------- 36 | resource : str 37 | Identifier or description of the resource that failed to download. 38 | message : str 39 | Human-readable details about the failure. 40 | 41 | """ 42 | 43 | def __init__(self, post_uuid: str, resource: str, message: str) -> None: 44 | super().__init__(post_uuid) 45 | self.resource = resource 46 | self.message = message 47 | 48 | 49 | class ApplicationCancelledError(ApplicationBaseDownloadError): 50 | """ 51 | Raised when a download for a specific post is cancelled by the user. 52 | 53 | Typically stops the entire download process. 54 | """ 55 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/filtering.py: -------------------------------------------------------------------------------- 1 | """Content type filters for the download manager.""" 2 | 3 | from enum import Enum 4 | 5 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_ok_video import ( 6 | BoostyOkVideoType, 7 | ) 8 | 9 | 10 | class DownloadContentTypeFilter(Enum): 11 | """ 12 | Class that holds content type filters for the download manager 13 | 14 | They can be used to download only specific parts of content. 15 | """ 16 | 17 | # ------------------------------------------------------------------- 18 | # --------------------------- WARNING !!! --------------------------- 19 | # ------------------------------------------------------------------- 20 | # 21 | # If you add any new content type filters here, please ensure that: 22 | # 1. You updated cache logic accordingly 23 | # 2. You updated all the use cases that use this filter 24 | # 3. You checked all other places in which those filters were used before 25 | 26 | boosty_videos = 'boosty_videos' 27 | external_videos = 'external_videos' 28 | post_content = 'post_content' 29 | files = 'files' 30 | 31 | 32 | class VideoQualityOption(str, Enum): 33 | """Preferred video quality option for cli""" 34 | 35 | smallest_size = 'smallest_size' 36 | low = 'low' 37 | medium = 'medium' 38 | high = 'high' 39 | highest = 'highest' 40 | 41 | def to_ok_video_type(self) -> BoostyOkVideoType: 42 | mapping = { 43 | VideoQualityOption.smallest_size: BoostyOkVideoType.lowest, 44 | VideoQualityOption.low: BoostyOkVideoType.low, 45 | VideoQualityOption.medium: BoostyOkVideoType.medium, 46 | VideoQualityOption.high: BoostyOkVideoType.high, 47 | VideoQualityOption.highest: BoostyOkVideoType.ultra_hd, 48 | } 49 | return mapping[self] 50 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/update_checker/pypi_checker.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyPI update checker 3 | 4 | Provides functions and data structures to check for updates of any package on PyPI. 5 | """ 6 | 7 | import json 8 | from dataclasses import dataclass 9 | from enum import Enum, auto 10 | from urllib.request import urlopen 11 | 12 | from packaging import version 13 | 14 | 15 | class UpdateCheckStatus(Enum): 16 | """Represents the status of an update check.""" 17 | 18 | NO_UPDATE = auto() 19 | UPDATE_AVAILABLE = auto() 20 | CHECK_FAILED = auto() 21 | 22 | 23 | @dataclass 24 | class UpdateAvailable: 25 | """Update is available.""" 26 | 27 | current_version: str 28 | latest_version: str 29 | 30 | 31 | @dataclass 32 | class NoUpdate: 33 | """No update available.""" 34 | 35 | 36 | @dataclass 37 | class CheckFailed: 38 | """Update check failed.""" 39 | 40 | 41 | UpdateResult = UpdateAvailable | NoUpdate | CheckFailed 42 | 43 | 44 | def get_pypi_latest_version(package_name: str) -> str | None: 45 | """Fetch the latest version string of a package from PyPI.""" 46 | try: 47 | with urlopen(f'https://pypi.org/pypi/{package_name}/json') as resp: 48 | data = json.load(resp) 49 | return data['info']['version'] 50 | except Exception: # noqa: BLE001 It doesn't matter what exception is raised, we just need to 100% catch it 51 | return None 52 | 53 | 54 | def check_for_updates(current_version: str, package_name: str) -> UpdateResult: 55 | """Check PyPI for a newer version of a package and return update result.""" 56 | latest_str = get_pypi_latest_version(package_name) 57 | if latest_str is None: 58 | return CheckFailed() 59 | 60 | try: 61 | current = version.parse(current_version) 62 | latest = version.parse(latest_str) 63 | except version.InvalidVersion: 64 | return CheckFailed() 65 | 66 | if latest > current: 67 | return UpdateAvailable( 68 | current_version=str(current), 69 | latest_version=str(latest), 70 | ) 71 | 72 | return NoUpdate() 73 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/loggers/failed_downloads_logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | Deduplicating file logger for failed downloads. 3 | 4 | Format: "[]: "; duplicates are suppressed by . 5 | The log file and its parent directory are created on demand; writes append. 6 | """ 7 | 8 | import re 9 | from pathlib import Path 10 | 11 | import aiofiles 12 | 13 | 14 | class FailedDownloadsLogger: 15 | """ 16 | Append-only deduplicating logger keyed by error id. 17 | 18 | Will write to a log file created on demand. 19 | Each error id is unique and will be written only once. 20 | """ 21 | 22 | def __init__(self, log_file_path: Path) -> None: 23 | self.file_path = log_file_path 24 | self.file_path.parent.mkdir(parents=True, exist_ok=True) 25 | self._seen_ids: set[str] = set() 26 | self._loaded = False 27 | 28 | async def _ensure_loaded(self) -> None: 29 | if self._loaded: 30 | return 31 | if not self.file_path.exists(): 32 | self._loaded = True 33 | return 34 | 35 | pattern = re.compile(r'^\[(?P[^\]]+)\]:') 36 | async with aiofiles.open(self.file_path, encoding='utf-8') as f: 37 | async for line in f: 38 | m = pattern.match(line.strip()) 39 | if m: 40 | self._seen_ids.add(m.group('id')) 41 | self._loaded = True 42 | 43 | async def _write_line(self, line: str) -> None: 44 | async with aiofiles.open(self.file_path, 'a', encoding='utf-8') as f: 45 | await f.write(line.rstrip() + '\n') 46 | 47 | async def add_error(self, error_id: str, message: str) -> None: 48 | """ 49 | Add a failed download error to the log. 50 | 51 | If the error ID is already logged, the message will be suppressed. 52 | """ 53 | error_id = error_id.strip() 54 | message = message.strip() 55 | 56 | await self._ensure_loaded() 57 | if error_id in self._seen_ids: 58 | return 59 | 60 | await self._write_line(f'[{error_id}]: {message}') 61 | self._seen_ids.add(error_id) 62 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/html_generator/models.py: -------------------------------------------------------------------------------- 1 | """HTML generator models that are independent from domain types.""" 2 | 3 | from __future__ import annotations 4 | 5 | from dataclasses import dataclass, field 6 | from enum import Enum 7 | 8 | 9 | @dataclass 10 | class HtmlTextStyle: 11 | """Text styling options for HTML generation.""" 12 | 13 | bold: bool = False 14 | italic: bool = False 15 | underline: bool = False 16 | 17 | 18 | @dataclass 19 | class HtmlTextFragment: 20 | """A text fragment with optional styling and links.""" 21 | 22 | text: str 23 | link_url: str | None = None 24 | header_level: int = 0 # 0 means no header, 1-6 for h1-h6 25 | style: HtmlTextStyle = field(default_factory=HtmlTextStyle) 26 | 27 | 28 | @dataclass 29 | class HtmlGenText: 30 | """Text content for HTML generation.""" 31 | 32 | text_fragments: list[HtmlTextFragment] 33 | 34 | 35 | @dataclass 36 | class HtmlGenImage: 37 | """Image content for HTML generation.""" 38 | 39 | url: str 40 | alt: str = 'Image' 41 | width: int | None = None 42 | height: int | None = None 43 | 44 | 45 | @dataclass 46 | class HtmlGenVideo: 47 | """Video content for HTML generation.""" 48 | 49 | url: str 50 | title: str | None = None 51 | poster: str | None = None 52 | 53 | 54 | class HtmlListStyle(Enum): 55 | """List style for HTML generation.""" 56 | 57 | ORDERED = 'ordered' 58 | UNORDERED = 'unordered' 59 | 60 | 61 | @dataclass 62 | class HtmlListItem: 63 | """A single item in an HTML list.""" 64 | 65 | data: list[HtmlGenText] 66 | nested_items: list[HtmlListItem] = field(default_factory=list['HtmlListItem']) 67 | 68 | 69 | @dataclass 70 | class HtmlGenList: 71 | """List content for HTML generation.""" 72 | 73 | items: list[HtmlListItem] 74 | style: HtmlListStyle = HtmlListStyle.UNORDERED 75 | 76 | 77 | @dataclass 78 | class HtmlGenFile: 79 | """File content for HTML generation.""" 80 | 81 | url: str 82 | filename: str 83 | title: str | None = None 84 | 85 | 86 | # Union type for all HTML chunk types 87 | HtmlGenChunk = HtmlGenText | HtmlGenImage | HtmlGenVideo | HtmlGenList | HtmlGenFile 88 | -------------------------------------------------------------------------------- /test/integration/analysis/get_author_posts_test.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import pytest 4 | import rich 5 | from aiohttp_retry import RetryClient 6 | 7 | from boosty_downloader.src.infrastructure.boosty_api.core.endpoints import ( 8 | BOOSTY_DEFAULT_BASE_URL, 9 | ) 10 | from boosty_downloader.src.infrastructure.boosty_api.utils.filter_none_params import ( 11 | filter_none_params, 12 | ) 13 | from integration.configuration import IntegrationTestConfig 14 | 15 | pytest_plugins = [ 16 | 'integration.fixtures', 17 | ] 18 | 19 | 20 | @pytest.mark.asyncio 21 | async def test_get_author_posts( 22 | authorized_retry_client: RetryClient, integration_config: IntegrationTestConfig 23 | ) -> None: 24 | """Test successful retrieval of posts from an existing author.""" 25 | endpoint = f'{BOOSTY_DEFAULT_BASE_URL}blog/{integration_config.boosty_existing_author}/post/' 26 | 27 | posts_raw = await authorized_retry_client.get( 28 | endpoint, 29 | params=filter_none_params( 30 | { 31 | 'limit': 10, 32 | }, 33 | ), 34 | ) 35 | posts_data = await posts_raw.json() 36 | 37 | assert posts_data is not None 38 | 39 | rich.print_json(data=posts_data) 40 | 41 | 42 | @pytest.mark.asyncio 43 | async def test_all_data_chunk_types( 44 | authorized_retry_client: RetryClient, 45 | integration_config: IntegrationTestConfig, 46 | ) -> None: 47 | """Test successful retrieval of posts from an existing author.""" 48 | endpoint = f'{BOOSTY_DEFAULT_BASE_URL}blog/{integration_config.boosty_existing_author}/post/' 49 | 50 | posts_raw = await authorized_retry_client.get( 51 | endpoint, 52 | params=filter_none_params( 53 | { 54 | 'limit': 25, 55 | }, 56 | ), 57 | ) 58 | posts_data = await posts_raw.json() 59 | 60 | assert posts_data is not None 61 | 62 | unique_data_types: Any = {} 63 | 64 | for post in posts_data['data']: 65 | rich.print(post) 66 | for chunk in post['data']: 67 | if chunk['type'] not in unique_data_types: 68 | unique_data_types[chunk['type']] = chunk 69 | 70 | rich.print_json(data=unique_data_types) 71 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/html_generator/renderer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module provides functions to render HTML content from structured data. 3 | 4 | You can also dump the rendered HTML to a file. 5 | 6 | Current implementation uses Jinja2 templates to render HTML with a little styling. 7 | """ 8 | 9 | from pathlib import Path 10 | 11 | from jinja2 import Environment, PackageLoader, select_autoescape 12 | 13 | from boosty_downloader.src.infrastructure.html_generator.models import ( 14 | HtmlGenChunk, 15 | HtmlGenFile, 16 | HtmlGenImage, 17 | HtmlGenList, 18 | HtmlGenText, 19 | HtmlGenVideo, 20 | ) 21 | 22 | # Load all templates as a package files 23 | # So if ANY structure changed in this path - it should be reflected here. 24 | # There is also a test to check if templates are rendered correctly (available). 25 | env = Environment( 26 | loader=PackageLoader( 27 | 'boosty_downloader.src.infrastructure.html_generator', 'templates' 28 | ), 29 | autoescape=select_autoescape(['html']), 30 | ) 31 | 32 | 33 | def render_html_chunk(chunk: HtmlGenChunk) -> str: 34 | """Render a single HtmlGenChunk to its HTML representation.""" 35 | match chunk: 36 | case HtmlGenText(): 37 | return env.get_template('text.html').render(text=chunk) 38 | case HtmlGenImage(): 39 | return env.get_template('image.html').render(image=chunk) 40 | case HtmlGenVideo(): 41 | chunk.url = str(chunk.url).replace('\\', '/') 42 | return env.get_template('video.html').render(video=chunk) 43 | case HtmlGenList(): 44 | return env.get_template('list.html').render( 45 | lst=chunk, render_chunk=render_html_chunk 46 | ) 47 | case HtmlGenFile(): 48 | return f'{chunk.filename}' 49 | 50 | 51 | def render_html(chunks: list[HtmlGenChunk]) -> str: 52 | """Render a list of HTML chunks to HTML.""" 53 | rendered = [render_html_chunk(chunk) for chunk in chunks] 54 | return env.get_template('base.html').render(content='\n'.join(rendered)) 55 | 56 | 57 | def render_html_to_file(chunks: list[HtmlGenChunk], out_path: Path) -> None: 58 | """Render HTML chunks to HTML file.""" 59 | html = render_html(chunks) 60 | out_path.parent.mkdir(parents=True, exist_ok=True) 61 | out_path.write_text(html, encoding='utf-8') 62 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/use_cases/check_total_posts.py: -------------------------------------------------------------------------------- 1 | """Use case for reporting the total number of posts and their accessibility for a given Boosty author.""" 2 | 3 | from boosty_downloader.src.infrastructure.boosty_api.core.client import ( 4 | BoostyAPIClient, 5 | ) 6 | from boosty_downloader.src.infrastructure.loggers.logger_instances import RichLogger 7 | 8 | 9 | class ReportTotalPostsCountUseCase: 10 | """ 11 | Reports the total number of posts and their accessibility for a given Boosty author. 12 | 13 | This use case iterates over all posts for the specified author, counts accessible and inaccessible posts, 14 | and reports the results using the provided ProgressReporter. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | author_name: str, 20 | logger: RichLogger, 21 | boosty_api: BoostyAPIClient, 22 | ) -> None: 23 | self.author_name = author_name 24 | self.logger = logger 25 | self.boosty_api = boosty_api 26 | 27 | async def execute(self) -> None: 28 | current_page = 0 29 | total_posts = 0 30 | 31 | accessible_posts_count = 0 32 | inaccessible_posts_count = 0 33 | inaccessible_posts_names: list[str] = [] 34 | 35 | async for page in self.boosty_api.iterate_over_posts( 36 | self.author_name, posts_per_page=100 37 | ): 38 | current_page += 1 39 | total_posts += len(page.posts) 40 | 41 | self.logger.info( 42 | f'Processing page [bold]{current_page}[/bold]' 43 | ' | ' 44 | f'Total posts so far: [bold]{total_posts}[/bold]' 45 | ) 46 | 47 | for post in page.posts: 48 | if post.has_access: 49 | accessible_posts_count += 1 50 | else: 51 | inaccessible_posts_count += 1 52 | inaccessible_posts_names.append(' - ' + post.title + '\n') 53 | 54 | inaccessible_titles_str = ''.join(inaccessible_posts_names) 55 | 56 | self.logger.success( 57 | f'Total posts: [bold]{total_posts}[/bold]\n' 58 | f'Accessible posts: [bold]{accessible_posts_count}[/bold]\n' 59 | f'Inaccessible posts: [bold]{inaccessible_posts_count}[/bold] (need higher tier subscription) see their titles:\n' 60 | '\n' 61 | f'[bold]{inaccessible_titles_str}[/bold]' 62 | ) 63 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/mappers/list.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains the mapper function for converting Boosty API post data lists. 3 | 4 | This module is responsible for transforming the Boosty API's list representation 5 | to the domain's PostDataChunkTextualList object: 6 | 7 | - unordered list example 8 | - one 9 | - two 10 | - ... 11 | 12 | 1. ordered list example 13 | 1. one 14 | 2. two 15 | 2. ... 16 | """ 17 | 18 | from boosty_downloader.src.application.mappers.link_header_text import ( 19 | to_domain_text_chunk, 20 | ) 21 | from boosty_downloader.src.domain.post_data_chunks import ( 22 | PostDataChunkText, 23 | PostDataChunkTextualList, 24 | ) 25 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_list import ( 26 | BoostyPostDataListDTO, 27 | BoostyPostDataListItemDTO, 28 | ) 29 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_text import ( 30 | BoostyPostDataTextDTO, 31 | ) 32 | 33 | 34 | def to_domain_list_chunk(post_list: BoostyPostDataListDTO) -> PostDataChunkTextualList: 35 | """Convert API PostDataList to domain PostDataChunkTextualList.""" 36 | 37 | def convert_list_item( 38 | api_item: BoostyPostDataListItemDTO, 39 | ) -> PostDataChunkTextualList.ListItem: 40 | """Recursively convert API list item to domain list item.""" 41 | # Convert data items to domain text chunks 42 | domain_data: list[PostDataChunkText] = [] 43 | for data_item in api_item.data: 44 | if data_item.type == 'text': 45 | # Create proper DTO object for the text mapper 46 | text_dto = BoostyPostDataTextDTO( 47 | type='text', 48 | content=data_item.content, 49 | modificator=data_item.modificator or '', 50 | ) 51 | text_fragments = to_domain_text_chunk(text_dto) 52 | 53 | # Create a PostDataChunkText with the text fragments 54 | text_chunk = PostDataChunkText(text_fragments=text_fragments) 55 | domain_data.append(text_chunk) 56 | 57 | # Recursively convert nested items 58 | nested_items = [ 59 | convert_list_item(nested_item) for nested_item in api_item.items 60 | ] 61 | 62 | return PostDataChunkTextualList.ListItem( 63 | data=domain_data, nested_items=nested_items 64 | ) 65 | 66 | # Convert all items 67 | domain_items = [convert_list_item(api_item) for api_item in post_list.items] 68 | 69 | return PostDataChunkTextualList(items=domain_items) 70 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/mappers/html_converter.py: -------------------------------------------------------------------------------- 1 | """Converters from domain models to HTML generator models.""" 2 | 3 | from boosty_downloader.src.domain.post import ( 4 | PostDataChunkImage, 5 | PostDataChunkText, 6 | PostDataChunkTextualList, 7 | ) 8 | from boosty_downloader.src.domain.post_data_chunks import ( 9 | PostDataChunkFile, 10 | ) 11 | from boosty_downloader.src.infrastructure.html_generator.models import ( 12 | HtmlGenFile, 13 | HtmlGenImage, 14 | HtmlGenList, 15 | HtmlGenText, 16 | HtmlGenVideo, 17 | HtmlListItem, 18 | HtmlListStyle, 19 | HtmlTextFragment, 20 | HtmlTextStyle, 21 | ) 22 | 23 | 24 | def convert_text_to_html(chunk: PostDataChunkText) -> HtmlGenText: 25 | """Convert domain text chunk to HTML text model.""" 26 | fragments: list[HtmlTextFragment] = [] 27 | for frag in chunk.text_fragments: 28 | style = HtmlTextStyle( 29 | bold=frag.style.bold, 30 | italic=frag.style.italic, 31 | underline=frag.style.underline, 32 | ) 33 | html_fragment = HtmlTextFragment( 34 | text=frag.text, 35 | link_url=frag.link_url, 36 | header_level=frag.header_level, 37 | style=style, 38 | ) 39 | fragments.append(html_fragment) 40 | 41 | return HtmlGenText(text_fragments=fragments) 42 | 43 | 44 | def convert_image_to_html(chunk: PostDataChunkImage) -> HtmlGenImage: 45 | """Convert domain image chunk to HTML image model.""" 46 | return HtmlGenImage(url=chunk.url) 47 | 48 | 49 | def convert_video_to_html(src: str, title: str) -> HtmlGenVideo: 50 | """Convert domain video chunk to HTML video model.""" 51 | return HtmlGenVideo(url=src, title=title) 52 | 53 | 54 | def convert_file_to_html(chunk: PostDataChunkFile) -> HtmlGenFile: 55 | """Convert domain file chunk to HTML file model.""" 56 | return HtmlGenFile(url=chunk.url, filename=chunk.filename) 57 | 58 | 59 | def convert_list_to_html(chunk: PostDataChunkTextualList) -> HtmlGenList: 60 | """Convert domain list chunk to HTML list model.""" 61 | 62 | def convert_list_item(item: PostDataChunkTextualList.ListItem) -> HtmlListItem: 63 | data = [convert_text_to_html(text_chunk) for text_chunk in item.data] 64 | nested_items = [convert_list_item(nested) for nested in item.nested_items] 65 | return HtmlListItem(data=data, nested_items=nested_items) 66 | 67 | items = [convert_list_item(item) for item in chunk.items] 68 | # Default to unordered list since the domain model doesn't have style 69 | style = HtmlListStyle.UNORDERED 70 | 71 | return HtmlGenList(items=items, style=style) 72 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # 💖 Contributing to Boosty Downloader 2 | 3 | Hello, I'm glad you find this project useful and I appreciate your willingness to contribute. 4 | 5 | I created this note to help you understand the way you can help improve the project. 6 | 7 | 8 | ## 👩‍💻 Development Process 9 | 10 |
    11 | Development Process 12 |
    13 | 14 | ### 🔧 Quick Start 15 | 16 | 1. Fork and clone the repository 17 | 2. Install dependencies: `poetry install` 18 | 3. Create a feature branch and make your changes 19 | 4. Run tests: `poetry run pytest` 20 | 5. Don't forget version bump `poetry version patch` (or minor/major) and update `CHANGELOG.md` 21 | 6. Open a pull request and describe changes and why they are needed 22 | 23 | **Most of needed/handy commands are available via `make`.** 24 | To see available commands, run: 25 | ```bash 26 | make help 27 | ``` 28 | 29 | ### 🩺 Code Quality 30 | 31 | We use: 32 | - **Ruff** for linting and formatting 33 | - **Pyright** for type checking 34 | - **pytest** for testing 35 | 36 | *Please ensure your IDE is configured to use these tools for a smooth development experience.* 37 | 38 | 39 | ### 📝 Writing Good Commit Messages 40 | 41 | **We use**: 42 | - [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) for commit messages. 43 | - [GitMoji](https://gitmoji.dev/) for visual representation of commit types. (**OPTIONAL**) 44 | - Describe not only the change but also **why** it was made. 45 | 46 | 47 | So a generic commit message would look like this: 48 | ``` 49 | feat: ✨ Add hyperspace drive support 50 | The hyperspace drive allows faster travel between galaxies. 51 | 52 | fix: 🐛 Fix formatting. 53 | ``` 54 | 55 | **To make it even easier for you, use VS Code extension:** 56 | - [VSCode Conventional Commits](https://marketplace.visualstudio.com/items?itemName=vivaxy.vscode-conventional-commits) - it speed up writing commit messages in our format. 57 | 58 | 59 | ### ✅ Pull Requests CI Checks 60 | 61 | **Now project uses Github Actions for:** 62 | - Check PRs for code quality (linting, type checking, tests) 63 | - Check `dev -> main` PRs for version bump 64 | - Automatically create releases on `main` merge (PyPi and GitHub Releases) 65 | 66 | 67 | ### 🔨 Other HOW TOs: 68 | 69 |
    70 | 🏁 Making a Release 71 | 72 | 1. **Prepare in `dev` branch:** 73 | ```bash 74 | poetry version patch # or minor/major 75 | # Update CHANGELOG.md 76 | git commit -am "chore: bump version to X.Y.Z" 77 | git push origin dev 78 | ``` 79 | 80 | 2. **Create PR:** `dev` → `main` 81 | 82 | 3. **Merge PR** → Automatic release! 🎉 83 |
    84 | 85 |
    86 | 🐛 Hotfix 87 | 88 | 1. **From main:** 89 | ```bash 90 | git checkout -b hotfix/fix-name 91 | poetry version patch 92 | # Fix bug, update changelog 93 | ``` 94 | 95 | 2. **PR:** `hotfix/*` → `main` 96 |
    97 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/mappers/post_mapper.py: -------------------------------------------------------------------------------- 1 | """Mapping logic for converting Boosty API post DTOs to domain Post objects.""" 2 | 3 | from boosty_downloader.src.application import mappers 4 | from boosty_downloader.src.domain.post import Post 5 | from boosty_downloader.src.domain.post_data_chunks import PostDataChunkText 6 | from boosty_downloader.src.infrastructure.boosty_api.models.post.base_post_data import ( 7 | BoostyPostDataExternalVideoDTO, 8 | ) 9 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post import PostDTO 10 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import ( 11 | BoostyPostDataFileDTO, 12 | BoostyPostDataHeaderDTO, 13 | BoostyPostDataImageDTO, 14 | BoostyPostDataLinkDTO, 15 | BoostyPostDataListDTO, 16 | BoostyPostDataOkVideoDTO, 17 | BoostyPostDataTextDTO, 18 | ) 19 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_ok_video import ( 20 | BoostyOkVideoType, 21 | ) 22 | 23 | 24 | def map_post_dto_to_domain( 25 | post_dto: PostDTO, preferred_video_quality: BoostyOkVideoType 26 | ) -> Post: 27 | """Convert a Boosty API PostDTO object to a domain Post object, mapping all data chunks to their domain representations.""" 28 | post = Post( 29 | uuid=post_dto.id, 30 | title=post_dto.title, 31 | created_at=post_dto.created_at, 32 | updated_at=post_dto.updated_at, 33 | has_access=post_dto.has_access, 34 | signed_query=post_dto.signed_query, 35 | post_data_chunks=[], 36 | ) 37 | 38 | for data_chunk in post_dto.data: 39 | match data_chunk: 40 | case BoostyPostDataImageDTO(): 41 | post.post_data_chunks.append(mappers.to_domain_image_chunk(data_chunk)) 42 | case ( 43 | BoostyPostDataHeaderDTO() 44 | | BoostyPostDataLinkDTO() 45 | | BoostyPostDataTextDTO() 46 | ): 47 | text_fragments = mappers.to_domain_text_chunk(data_chunk) 48 | text_chunk = PostDataChunkText(text_fragments=text_fragments) 49 | post.post_data_chunks.append(text_chunk) 50 | case BoostyPostDataListDTO(): 51 | post.post_data_chunks.append(mappers.to_domain_list_chunk(data_chunk)) 52 | case BoostyPostDataFileDTO(): 53 | post.post_data_chunks.append( 54 | mappers.to_domain_file_chunk(data_chunk, post.signed_query) 55 | ) 56 | case BoostyPostDataOkVideoDTO(): 57 | video_chunk = mappers.to_ok_boosty_video_content( 58 | data_chunk, preferred_quality=preferred_video_quality 59 | ) 60 | if video_chunk is not None: 61 | post.post_data_chunks.append(video_chunk) 62 | case BoostyPostDataExternalVideoDTO(): 63 | post.post_data_chunks.append( 64 | mappers.to_external_video_content(data_chunk) 65 | ) 66 | 67 | return post 68 | -------------------------------------------------------------------------------- /boosty_downloader/src/interfaces/cli_options.py: -------------------------------------------------------------------------------- 1 | """CLI option definitions for Boosty Downloader.""" 2 | 3 | from pathlib import Path 4 | from typing import Annotated 5 | 6 | import typer 7 | 8 | from boosty_downloader.src.application.filtering import ( 9 | DownloadContentTypeFilter, 10 | VideoQualityOption, 11 | ) 12 | from boosty_downloader.src.interfaces.help_panels import HelpPanels 13 | 14 | UsernameOption = Annotated[ 15 | str, 16 | typer.Option( 17 | '--username', 18 | '-u', 19 | help='Username to download posts from.', 20 | ), 21 | ] 22 | 23 | RequestDelaySecondsOption = Annotated[ 24 | float, 25 | typer.Option( 26 | '--request-delay-seconds', 27 | '-d', 28 | help='Delay between requests to the API, in seconds', 29 | min=1, 30 | rich_help_panel=HelpPanels.network, 31 | ), 32 | ] 33 | 34 | 35 | ContentTypeFilterOption = Annotated[ 36 | list[DownloadContentTypeFilter] | None, 37 | typer.Option( 38 | '--content-type-filter', 39 | '-f', 40 | help='Choose what content you want to download\n\n(default: ALL SET)', 41 | metavar='Available options:\n- files\n- post_content\n- boosty_videos\n- external_videos\n', 42 | show_default=False, 43 | rich_help_panel=HelpPanels.filtering, 44 | ), 45 | ] 46 | 47 | 48 | PreferredVideoQualityOption = Annotated[ 49 | VideoQualityOption, 50 | typer.Option( 51 | '--preferred-video-quality', 52 | '-q', 53 | help='Preferred video quality. If not available, the best quality will be used.', 54 | metavar='Available options:\n- smallest_size\n- low\n- medium\n- high\n- highest', 55 | rich_help_panel=HelpPanels.filtering, 56 | ), 57 | ] 58 | 59 | PostUrlOption = Annotated[ 60 | str | None, 61 | typer.Option( 62 | '--post-url', 63 | '-p', 64 | help='Download only the specified post if possible', 65 | metavar='URL', 66 | show_default=False, 67 | rich_help_panel=HelpPanels.actions, 68 | ), 69 | ] 70 | 71 | CheckTotalCountOption = Annotated[ 72 | bool, 73 | typer.Option( 74 | '--only-check-total', 75 | '-t', 76 | help='Check total count of accessible/inaccessible(+names) posts and exit, no download', 77 | rich_help_panel=HelpPanels.actions, 78 | ), 79 | ] 80 | 81 | CleanCacheOption = Annotated[ 82 | bool, 83 | typer.Option( 84 | '--clean-cache', 85 | '-c', 86 | help='Remove posts cache for selected username [italic]completely[/italic], use with caution', 87 | rich_help_panel=HelpPanels.actions, 88 | ), 89 | ] 90 | 91 | DestinationDirectoryOption = Annotated[ 92 | Path | None, 93 | typer.Option( 94 | '--destination-directory', 95 | '-o', 96 | help='Directory to save downloaded posts', 97 | dir_okay=True, 98 | file_okay=False, 99 | resolve_path=True, 100 | rich_help_panel=HelpPanels.actions, 101 | show_default=False, 102 | ), 103 | ] 104 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build test posts-example 2 | 3 | # Ensure that all the pipe-like commands work correctly. 4 | export PYTHONIOENCODING = utf-8 5 | 6 | help: 7 | @echo ------------------------- To run locally: ---------------------------- 8 | @echo Run make deps to install dependencies 9 | @echo And to run current project locally without installation: 10 | @echo poetry run python -m boosty_downloader.main 11 | @echo . . 12 | @echo ------------------------- Available commands: ------------------------ 13 | @echo Building: 14 | @echo deps - Install project dependencies using poetry 15 | @echo build - Build the project whl file 16 | @echo ---------------------------------------------------------------------- 17 | @echo Code Health: 18 | @echo dev-fix - Try to fix code issues, show problems if any 19 | @echo ci-check - Run CI checks (linter/formatter/type checks) 20 | @echo types - Code type checks using pyright 21 | @echo format-check - Code format check using ruff 22 | @echo format-fix - Code format using ruff 23 | @echo lint-check - Code linting (only check) 24 | @echo lint-fix - Code linting (try to fix) 25 | @echo ---------------------------------------------------------------------- 26 | @echo Testing: 27 | @echo test - Run the project unit tests 28 | @echo test-verbose - Run the project unit tests 29 | @echo test-api - Run the project API integration tests 30 | @echo test-api-verbose - Run the project API integration tests with verbose output 31 | @echo ---------------------------------------------------------------------- 32 | @echo Endpoints Analysis (Only work if integration tests config available): 33 | @echo posts_example - Show posts json for defined author 34 | 35 | 36 | 37 | # ------------------------------------------------------------------------------ 38 | # 📦 Distribution 39 | 40 | deps: 41 | poetry sync --no-interaction 42 | 43 | build: 44 | poetry build --no-cache 45 | @echo Build complete at /dist/ 46 | 47 | # ------------------------------------------------------------------------------ 48 | # 🩺 Code Health Checks 49 | 50 | dev-fix: lint-fix format-fix types 51 | ci-check: lint-check types format-check 52 | 53 | lint-check: 54 | poetry run ruff check . 55 | 56 | lint-fix: 57 | poetry run ruff check --fix . 58 | 59 | format-check: 60 | poetry run ruff format --check . 61 | 62 | format-fix: 63 | poetry run ruff format . 64 | 65 | types: 66 | poetry run pyright 67 | 68 | 69 | # ------------------------------------------------------------------------------ 70 | # 🧪 Testing 71 | 72 | test: 73 | poetry run pytest test/unit/ 74 | 75 | test-verbose: 76 | poetry run pytest -v test/unit/ 77 | 78 | test-api: 79 | poetry run pytest test/integration/ 80 | 81 | test-api-verbose: 82 | poetry run pytest -v test/integration/ 83 | 84 | # ------------------------------------------------------------------------------ 85 | # 🔍 Endpoints analysis 86 | 87 | posts-example: 88 | poetry run pytest ./test/integration/analysis/get_author_posts_test.py::test_get_author_posts -s -q 89 | 90 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/yaml_configuration/config.py: -------------------------------------------------------------------------------- 1 | """Configuration for the whole application""" 2 | 3 | from __future__ import annotations 4 | 5 | import sys 6 | from pathlib import Path 7 | 8 | from pydantic import BaseModel, Field, ValidationError 9 | from pydantic_settings import ( 10 | BaseSettings, 11 | PydanticBaseSettingsSource, 12 | SettingsConfigDict, 13 | YamlConfigSettingsSource, 14 | ) 15 | 16 | from boosty_downloader.src.infrastructure.loggers import logger_instances 17 | from boosty_downloader.src.infrastructure.yaml_configuration.sample_config import ( 18 | DEFAULT_YAML_CONFIG_VALUE, 19 | ) 20 | 21 | 22 | class DownloadSettings(BaseModel): 23 | """Settings for the script downloading process""" 24 | 25 | target_directory: Path = Path('./boosty-downloads') 26 | 27 | 28 | class AuthSettings(BaseModel): 29 | """Configuration for authentication (cookies and authorization headers)""" 30 | 31 | cookie: str = Field(default='', min_length=1) 32 | auth_header: str = Field(default='', min_length=1) 33 | 34 | 35 | CONFIG_LOCATION: Path = Path('config.yaml') 36 | 37 | 38 | class Config(BaseSettings): 39 | """General script configuration with subsections""" 40 | 41 | model_config = SettingsConfigDict( 42 | yaml_file=CONFIG_LOCATION, 43 | yaml_file_encoding='utf-8', 44 | ) 45 | 46 | auth: AuthSettings = AuthSettings() 47 | downloading_settings: DownloadSettings = DownloadSettings() 48 | 49 | @classmethod 50 | def settings_customise_sources( 51 | cls, 52 | settings_cls: type[BaseSettings], 53 | init_settings: PydanticBaseSettingsSource, 54 | env_settings: PydanticBaseSettingsSource, 55 | dotenv_settings: PydanticBaseSettingsSource, 56 | file_secret_settings: PydanticBaseSettingsSource, 57 | ) -> tuple[PydanticBaseSettingsSource, ...]: 58 | return ( 59 | YamlConfigSettingsSource(settings_cls), 60 | init_settings, 61 | env_settings, 62 | dotenv_settings, 63 | file_secret_settings, 64 | ) 65 | 66 | 67 | def create_sample_config_file() -> None: 68 | """Create a sample config file if it doesn't exist.""" 69 | with CONFIG_LOCATION.open(mode='w') as f: 70 | f.write(DEFAULT_YAML_CONFIG_VALUE) 71 | 72 | 73 | def init_config() -> Config: 74 | """Initialize the config file with a sample if it doesn't exist""" 75 | try: 76 | if not CONFIG_LOCATION.exists(): 77 | create_sample_config_file() 78 | logger_instances.downloader_logger.error("Config doesn't exist") 79 | logger_instances.downloader_logger.success( 80 | f'Created a sample config file at {CONFIG_LOCATION.absolute()}, please fill `auth_header` and `cookie` with yours before running the app', 81 | ) 82 | sys.exit(1) 83 | return Config() 84 | except ValidationError: 85 | # If can't be parsed correctly 86 | create_sample_config_file() 87 | logger_instances.downloader_logger.error( 88 | 'Config is invalid (could not be parsed)' 89 | ) 90 | logger_instances.downloader_logger.error( 91 | '[bold yellow]Make sure you fill `auth_header` and `cookie` with yours, they are required[/bold yellow]', 92 | ) 93 | logger_instances.downloader_logger.success( 94 | f'Recreated it at [green bold]{CONFIG_LOCATION.absolute()}[/green bold]', 95 | ) 96 | sys.exit(1) 97 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 2.0.1 2 | 3 | - 🐛 Fixed image data so posts download even when width/height is missing 4 | - 🐛 Fixed download process to stop automatically after the chosen post 5 | 6 | ## 2.0.0 7 | 8 | ### ⛔ BREAKING CHANGES ⛔ 9 | 10 | - Because of the new caching system, the cache database changed. 11 | If you have an existing cache, you may need to clean it first to avoid issues. 12 | 13 | The utility will automatically detect cache inconsistencies and prompt you to clean it though. 14 | 15 | I tried to figgure some sort of db migration but it is too complex for the current state of the project, so I decided to just make it a breaking change yet. 16 | 17 | If you know how I can keep migrating the cache given the fact that dbs are 18 | scattered across multiple author directories, and even possibly have different versions 19 | please let me know with an issue! 20 | 21 | - Some options were renamed but their functionality remains the same 22 | 23 | ### 🔔 New Features 24 | 25 | - 🔔 **Automatic Update Checker** 26 | You'll now be notified when a new version is available on PyPI. 27 | 28 | - 📦 **Improved Caching Layer** 29 | - Only the requested parts are cached to avoid unnecessary re-downloads/skips (before this change the post was cached entirely not just the requested parts), so now partial updates are possible. 30 | - Cache is properly **invalidated** if a post is updated by its author (will be re-downloaded). 31 | - More **robust and accurate** caching system: better handling of missing post parts. 32 | 33 | - **HTML Generation Enhancements** 34 | - New **HTML generator engine** with support for **Dark/Light modes**. 🦉 35 | - Added support for **headings and lists** in HTML output. 36 | - Added better support for styling (italic/bold/etc) 37 | - `post_content` now includes both **images AND videos** (offline only). 38 | 39 | - **Improved CLI UX** 40 | - New destination option to allow override config values. 41 | - Better help descriptions with logical **option grouping**. 42 | - More informative **post counter**: displays both accessible and inaccessible posts, with names listed for all inaccessible posts. 43 | - Enhanced **logging and error handling** for a more readable and helpful output. 44 | 45 | - **Retry Logic** 46 | - If post download fails, it will be retried up to 5 times with exponential backoff. 47 | - After 5 failed attempts, the post will be skipped and not cached. 48 | 49 | ### 🐛 Fixes 50 | 51 | - Fixed duplication problem [#12](https://github.com/Glitchy-Sheep/boosty-downloader/issues/12) (now posts are cached by UUID and have it as part of the filename, so duplication is no longer an issue) 52 | - Fixed external video downloading for unsupported formats (now format >=720p is preferred, less otherwise). 53 | - Fixed HTML generation for posts with **no content**, now it won't be created. 54 | - Resolved issues with **newline handling** in some HTML outputs. 55 | - Fixed **Ctrl+C interruption** handling with proper cleanup and messaging. 56 | - Prevented creation of **empty directories** for posts with no downloadable content. 57 | now the utility do the job only if there is one. 58 | 59 | ### 🧹 Miscellaneous 60 | 61 | - Internal **project structure refactored** for better maintainability and scalability. 62 | 63 | ## 1.0.1 64 | - Fix: 🐛 Support new boosty API response schema (as a placeholder) 65 | 66 | ## 1.0.0 67 | 68 | - First stable release 69 | - Main downloader functions such as video/post/external_video/files 70 | - Added CLI interface with typer (with customizable options) 71 | -------------------------------------------------------------------------------- /test/unit/download_manager/ok_video_ranking_test.py: -------------------------------------------------------------------------------- 1 | from boosty_downloader.src.application.mappers import ( 2 | get_best_video, 3 | get_quality_ranking, 4 | ) 5 | from boosty_downloader.src.application.ok_video_ranking import ( 6 | BoostyOkVideoType, 7 | BoostyOkVideoUrl, 8 | RankingDict, 9 | ) 10 | 11 | 12 | def test_ranking_dict_basic_operations(): 13 | ranking = RankingDict[str]() 14 | ranking['a'] = 10 15 | ranking['b'] = 20 16 | ranking['c'] = 15 17 | 18 | assert ranking['a'] == 10 19 | assert ranking['b'] == 20 20 | assert ranking['c'] == 15 21 | 22 | assert ranking.pop_max() == ('b', 20) 23 | assert ranking.pop_max() == ('c', 15) 24 | assert ranking.pop_max() == ('a', 10) 25 | assert ranking.pop_max() is None 26 | 27 | 28 | def test_ranking_dict_delete(): 29 | ranking = RankingDict[str]() 30 | ranking['x'] = 5 31 | ranking['y'] = 10 32 | 33 | del ranking['x'] 34 | assert 'x' not in ranking.data 35 | assert ranking.pop_max() == ('y', 10) 36 | assert ranking.pop_max() is None 37 | 38 | 39 | def test_get_quality_ranking(): 40 | ranking = get_quality_ranking() 41 | assert ranking[BoostyOkVideoType.ultra_hd] == 17 42 | assert ranking[BoostyOkVideoType.lowest] == 10 43 | assert ranking.pop_max() == (BoostyOkVideoType.ultra_hd, 17) 44 | assert ranking.pop_max() == (BoostyOkVideoType.quad_hd, 16) 45 | assert ranking.pop_max() == (BoostyOkVideoType.full_hd, 15) 46 | 47 | 48 | def test_get_best_video(): 49 | video_urls = [ 50 | BoostyOkVideoUrl(type=BoostyOkVideoType.low, url='low.mp4'), 51 | BoostyOkVideoUrl(type=BoostyOkVideoType.medium, url='medium.mp4'), 52 | BoostyOkVideoUrl(type=BoostyOkVideoType.full_hd, url='full_hd.mp4'), 53 | ] 54 | 55 | best_video_info = get_best_video(video_urls) 56 | best_video = best_video_info[0] if best_video_info else None 57 | assert best_video is not None 58 | assert best_video.type == BoostyOkVideoType.medium # Default preference 59 | assert best_video.url == 'medium.mp4' 60 | 61 | 62 | def test_get_best_video_with_preference(): 63 | video_urls = [ 64 | BoostyOkVideoUrl(type=BoostyOkVideoType.low, url='low.mp4'), 65 | BoostyOkVideoUrl(type=BoostyOkVideoType.full_hd, url='full_hd.mp4'), 66 | ] 67 | 68 | best_video_info = get_best_video( 69 | video_urls, preferred_quality=BoostyOkVideoType.full_hd 70 | ) 71 | 72 | best_video = best_video_info[0] if best_video_info else None 73 | 74 | assert best_video is not None 75 | assert best_video.type == BoostyOkVideoType.full_hd 76 | assert best_video.url == 'full_hd.mp4' 77 | 78 | 79 | def test_get_best_video_no_available(): 80 | video_urls = [ 81 | BoostyOkVideoUrl(type=BoostyOkVideoType.low, url=''), # No valid URL 82 | BoostyOkVideoUrl(type=BoostyOkVideoType.medium, url=''), 83 | ] 84 | 85 | best_video = get_best_video(video_urls) 86 | assert best_video is None 87 | 88 | 89 | def test_get_best_video_empty_list(): 90 | best_video = get_best_video([]) 91 | assert best_video is None 92 | 93 | 94 | def test_ranking_dict_with_duplicate_entries(): 95 | ranking = RankingDict[str]() 96 | ranking['a'] = 10 97 | ranking['b'] = 20 98 | ranking['a'] = 30 # Overwriting "a" with a higher value 99 | 100 | assert ranking.pop_max() == ('a', 30) 101 | assert ranking.pop_max() == ('b', 20) 102 | assert ranking.pop_max() is None 103 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/loggers/base.py: -------------------------------------------------------------------------------- 1 | """Logger for the application.""" 2 | 3 | import io 4 | import logging 5 | import sys 6 | 7 | from rich.logging import RichHandler 8 | 9 | # Detect if running in a terminal 10 | is_terminal = sys.stdout.isatty() 11 | 12 | # Ensure proper UTF-8 handling in non-interactive environments 13 | if not is_terminal and 'pytest' not in sys.modules: 14 | sys.stdout = io.TextIOWrapper( 15 | sys.stdout.buffer, 16 | encoding='utf-8', 17 | line_buffering=True, 18 | ) 19 | 20 | 21 | class RichLogger: 22 | """Enhanced logger with Rich for colorful output while keeping severity levels.""" 23 | 24 | def __init__(self, prefix: str) -> None: 25 | self.prefix = prefix 26 | 27 | # Avoid adding duplicate handlers 28 | handler = RichHandler( 29 | log_time_format='[%H:%M:%S]', 30 | markup=True, 31 | show_time=True, 32 | rich_tracebacks=True, 33 | show_path=False, 34 | show_level=False, 35 | ) 36 | 37 | self._handler = handler 38 | self._log = logging.getLogger(prefix) 39 | self._log.setLevel(logging.DEBUG) 40 | self._log.addHandler(handler) 41 | self.console = self._handler.console 42 | self.logging_logger_obj = self._log 43 | 44 | def _log_message( 45 | self, 46 | level: int, 47 | msg: str, 48 | *, 49 | highlight: bool = True, 50 | tab_level: int = 0, 51 | ) -> None: 52 | if highlight: 53 | self._log.log(level, '\t' * tab_level + msg) 54 | else: 55 | self._handler.console.log('\t' * tab_level + msg, highlight=False) 56 | 57 | def info(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None: 58 | prefix = f'[cyan]{self.prefix}[/cyan][blue].INFO 🔹[/blue]:' 59 | self._log_message( 60 | logging.INFO, 61 | f'{prefix} {msg}', 62 | highlight=highlight, 63 | tab_level=tab_level, 64 | ) 65 | 66 | def success(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None: 67 | prefix = f'[cyan]{self.prefix}[/cyan][green].SUCCESS ✔[/green]:' 68 | self._log_message( 69 | logging.INFO, 70 | f'{prefix} {msg}', 71 | highlight=highlight, 72 | tab_level=tab_level, 73 | ) 74 | 75 | def error(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None: 76 | prefix = f'[cyan]{self.prefix}[/cyan][bold red].ERROR ❌[/bold red]:' 77 | self._log_message( 78 | logging.ERROR, 79 | f'{prefix} {msg}', 80 | highlight=highlight, 81 | tab_level=tab_level, 82 | ) 83 | 84 | def wait(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None: 85 | prefix = f'[cyan]{self.prefix}[/cyan][yellow].WAIT ⏳[/yellow]:' 86 | self._log_message( 87 | logging.INFO, 88 | f'{prefix} {msg}', 89 | highlight=highlight, 90 | tab_level=tab_level, 91 | ) 92 | 93 | def warning(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None: 94 | prefix = f'[cyan]{self.prefix}[/cyan][bold yellow].WARNING ⚠ [/bold yellow]:' 95 | self._log_message( 96 | logging.WARNING, 97 | f'{prefix} {msg}', 98 | highlight=highlight, 99 | tab_level=tab_level, 100 | ) 101 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/ok_video_ranking.py: -------------------------------------------------------------------------------- 1 | """The module provides tools to work with ok video links (selecting them) by quality.""" 2 | 3 | from __future__ import annotations 4 | 5 | import heapq 6 | from typing import Generic, TypeVar 7 | 8 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_ok_video import ( 9 | BoostyOkVideoType, 10 | BoostyOkVideoUrl, 11 | ) 12 | 13 | KT = TypeVar('KT') 14 | 15 | 16 | class RankingDict(Generic[KT]): 17 | """A dict which also keeps track of the max value, it's not thread-safe""" 18 | 19 | def __init__(self) -> None: 20 | self.data: dict[KT, float] = {} 21 | self.max_heap: list[tuple[float, KT]] = [] 22 | self.entries: dict[KT, tuple[float, KT]] = {} 23 | 24 | def __getitem__(self, key: KT) -> float: 25 | """Get the value associated with the key""" 26 | return self.data[key] 27 | 28 | def __setitem__(self, key: KT, value: float) -> None: 29 | """Set the value associated with the key""" 30 | self.data[key] = value 31 | entry = (-value, key) 32 | self.entries[key] = entry 33 | heapq.heappush(self.max_heap, entry) 34 | 35 | def __delitem__(self, key: KT) -> None: 36 | """Remove the key and its value""" 37 | if key in self.data: 38 | del self.data[key] 39 | if key in self.entries: 40 | self.entries[key] = (float('-inf'), key) # Mark as deleted 41 | 42 | def pop_max(self) -> tuple[KT, float] | None: 43 | """Pop the maximum value""" 44 | while self.max_heap: 45 | value, key = heapq.heappop(self.max_heap) 46 | if key in self.data and self.entries[key] == (value, key): 47 | del self.data[key] 48 | del self.entries[key] 49 | return key, -value # Convert back to positive 50 | return None 51 | 52 | 53 | def get_quality_ranking() -> RankingDict[BoostyOkVideoType]: 54 | """Get the ranking dict for video quality""" 55 | quality_ranking = RankingDict[BoostyOkVideoType]() 56 | quality_ranking[BoostyOkVideoType.ultra_hd] = 17 57 | quality_ranking[BoostyOkVideoType.quad_hd] = 16 58 | quality_ranking[BoostyOkVideoType.full_hd] = 15 59 | quality_ranking[BoostyOkVideoType.high] = 14 60 | quality_ranking[BoostyOkVideoType.medium] = 13 61 | quality_ranking[BoostyOkVideoType.low] = 12 62 | quality_ranking[BoostyOkVideoType.tiny] = 11 63 | quality_ranking[BoostyOkVideoType.lowest] = 10 64 | quality_ranking[BoostyOkVideoType.live_playback_dash] = 9 65 | quality_ranking[BoostyOkVideoType.live_playback_hls] = 8 66 | quality_ranking[BoostyOkVideoType.live_ondemand_hls] = 7 67 | quality_ranking[BoostyOkVideoType.live_dash] = 6 68 | quality_ranking[BoostyOkVideoType.live_hls] = 5 69 | quality_ranking[BoostyOkVideoType.hls] = 4 70 | quality_ranking[BoostyOkVideoType.dash] = 3 71 | quality_ranking[BoostyOkVideoType.dash_uni] = 2 72 | quality_ranking[BoostyOkVideoType.live_cmaf] = 1 73 | 74 | return quality_ranking 75 | 76 | 77 | def get_best_video( 78 | video_urls: list[BoostyOkVideoUrl], 79 | preferred_quality: BoostyOkVideoType = BoostyOkVideoType.medium, 80 | ) -> tuple[BoostyOkVideoUrl, BoostyOkVideoType] | None: 81 | """Select the best video format for downloading according to user's preferences""" 82 | quality_ranking: RankingDict[BoostyOkVideoType] = get_quality_ranking() 83 | quality_ranking[preferred_quality] = float('inf') 84 | 85 | video_urls_map = {video.type: video for video in video_urls} 86 | 87 | while highest_rank_video_type := quality_ranking.pop_max(): 88 | highest_rank_video_type = highest_rank_video_type[0] 89 | 90 | video_url = video_urls_map.get(highest_rank_video_type) 91 | if video_url and video_url.url: 92 | return video_url, highest_rank_video_type 93 | 94 | return None 95 | -------------------------------------------------------------------------------- /test/integration/boosty_api/boosty_api_test.py: -------------------------------------------------------------------------------- 1 | """Integration tests for Boosty API client. 2 | 3 | These tests make real requests to the Boosty API and require proper configuration. 4 | 5 | Please see test/ABOUT_TESTING.md for more details. 6 | """ 7 | 8 | import pytest 9 | 10 | from boosty_downloader.src.infrastructure.boosty_api import ( 11 | BoostyAPIClient, 12 | ) 13 | from boosty_downloader.src.infrastructure.boosty_api.core.client import ( 14 | BoostyAPINoUsernameError, 15 | BoostyAPIUnauthorizedError, 16 | ) 17 | from integration.configuration import IntegrationTestConfig 18 | 19 | # For automatic fixture discovery 20 | pytest_plugins = [ 21 | 'integration.fixtures', 22 | ] 23 | 24 | 25 | @pytest.mark.asyncio 26 | async def test_get_posts_existing_author_success( 27 | authorized_boosty_client: BoostyAPIClient, integration_config: IntegrationTestConfig 28 | ) -> None: 29 | """Test successful retrieval of posts from an existing author.""" 30 | response = await authorized_boosty_client.get_author_posts( 31 | author_name=integration_config.boosty_existing_author, limit=5 32 | ) 33 | 34 | assert response.posts is not None 35 | assert response.extra is not None 36 | assert len(response.posts) >= 0 37 | 38 | 39 | @pytest.mark.asyncio 40 | async def test_get_posts_nonexistent_author_raises_error( 41 | authorized_boosty_client: BoostyAPIClient, integration_config: IntegrationTestConfig 42 | ) -> None: 43 | """Test that requesting posts from non-existent author raises BoostyAPINoUsernameError.""" 44 | with pytest.raises(BoostyAPINoUsernameError): 45 | await authorized_boosty_client.get_author_posts( 46 | author_name=integration_config.boosty_nonexistent_author, limit=5 47 | ) 48 | 49 | 50 | @pytest.mark.asyncio 51 | async def test_get_posts_with_pagination( 52 | authorized_boosty_client: BoostyAPIClient, integration_config: IntegrationTestConfig 53 | ) -> None: 54 | """Test pagination functionality for author posts.""" 55 | first_page = await authorized_boosty_client.get_author_posts( 56 | author_name=integration_config.boosty_existing_author, limit=2 57 | ) 58 | 59 | if not first_page.extra.is_last and first_page.extra.offset: 60 | second_page = await authorized_boosty_client.get_author_posts( 61 | author_name=integration_config.boosty_existing_author, 62 | limit=2, 63 | offset=first_page.extra.offset, 64 | ) 65 | 66 | # Posts should be different between pages (assuming author has more than 2 posts) 67 | first_page_ids = {post.id for post in first_page.posts} 68 | second_page_ids = {post.id for post in second_page.posts} 69 | assert first_page_ids.isdisjoint(second_page_ids), ( 70 | 'Pages should contain different posts' 71 | ) 72 | 73 | 74 | @pytest.mark.asyncio 75 | async def test_iterate_over_posts( 76 | authorized_boosty_client: BoostyAPIClient, integration_config: IntegrationTestConfig 77 | ) -> None: 78 | """Test the async generator for iterating over all author posts.""" 79 | pages_count = 0 80 | total_posts = 0 81 | 82 | async for response in authorized_boosty_client.iterate_over_posts( 83 | author_name=integration_config.boosty_existing_author, 84 | posts_per_page=2, 85 | ): 86 | pages_count += 1 87 | total_posts += len(response.posts) 88 | 89 | # Limit iteration to avoid running too long in tests 90 | if pages_count >= 3: 91 | break 92 | 93 | assert pages_count > 0, 'Should retrieve at least one page' 94 | assert total_posts >= 0, 'Should count posts correctly' 95 | 96 | 97 | @pytest.mark.asyncio 98 | async def test_unathoirized_raises_error( 99 | invalid_auth_boosty_client: BoostyAPIClient, 100 | integration_config: IntegrationTestConfig, 101 | ) -> None: 102 | """Test that unauthorized access raises an error.""" 103 | with pytest.raises(BoostyAPIUnauthorizedError): 104 | await invalid_auth_boosty_client.get_author_posts( 105 | author_name=integration_config.boosty_existing_author, limit=5 106 | ) 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |

    3 | 4 |

    5 | 6 | # 🖥️ About 7 | 8 | Welcome to the **Boosty Downloader** project! 9 | 10 | This CLI tool allows you to download most of the content from Boosty.to in bulk. 11 | The post content itself is saved in html with a little bit of styling. 12 | 13 | **You can download:** 14 | - Boosty Videos 15 | - External Videos (YouTube, Vimeo) 16 | - Files 17 | - Full Post content (including photos and links) 18 | 19 | ## 📑 Table of Contents 20 | - [🖥️ About](#️-about) 21 | - [📑 Table of Contents](#-table-of-contents) 22 | - [✨ Features](#-features) 23 | - [📸 Screenshots \& Usage](#-screenshots--usage) 24 | - [🛠️ Installation](#️-installation) 25 | - [🚀 Configuration for Usage](#-configuration-for-usage) 26 | - [Step 1: Get the auth cookie and auth header](#step-1-get-the-auth-cookie-and-auth-header) 27 | - [Step 2: Paste the cookie and auth header into the config file](#step-2-paste-the-cookie-and-auth-header-into-the-config-file) 28 | - [Step 3: Run the utility](#step-3-run-the-utility) 29 | - [💖 Contributing](#-contributing) 30 | - [📜 License](#-license) 31 | 32 | 33 | 34 | ## ✨ Features 35 | 36 | - 📦 **Bulk download**: Download all available content from your favorite creator. 37 | - 🔎 **Total checker**: See how many posts are available to you, and which are not. 38 | - 📂 **Content type filters**: Download only the content you need (videos, images, etc), choose what you really want with flags (see below). 39 | - 📄 **Download specific posts**: Download post by url and username. 40 | - 🔃 **Sync content seamlessly**: The utility keeps cache of already downloaded posts, so you can resume your download at any time or get new content after a while. 41 | - 📼 **Choose your video quality**: You can choose preferred video quality to download (for boosty videos) 42 | - 🎨 **Beauty posts preview**: You can see posts content with rendered offline html files with dark/light theme changing. 43 | - 📊 **Order matters**: Posts have dates in names, so you can just sort it by name in your file explorer and see them in the correct chronological order. 44 | - 🆙 **App update checker**: If new updates are available, you'll be notified when you use the application next time. 45 | 46 | 47 | ## 📸 Screenshots & Usage 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | ## 🛠️ Installation 57 | 58 | 1. **Install python**: 59 | - Window: 60 | ```bash 61 | winget install Python.Python.3.13 62 | ``` 63 | - Linux: 64 | ```bash 65 | sudo apt-get install python3 66 | ``` 67 | - macOS: 68 | ```bash 69 | brew install python 70 | ``` 71 | 72 | 2. **Install the boosty-downloader package:** 73 | ```bash 74 | pip install boosty-downloader 75 | ``` 76 | 77 | 3. **Run the application:** 78 | ```bash 79 | boosty-downloader --help 80 | ``` 81 | 82 | ## 🚀 Configuration for Usage 83 | 84 | ### Step 1: Get the auth cookie and auth header 85 | 86 | 1. Open the [Boosty](https://boosty.to) website. 87 | 2. Click the "Sign in" button and fill you credentials. 88 | 3. Navigate to any author you have access to and scroll post a little. 89 | 4. Copy auth token and cookie from browser network tab. 90 | 91 | 92 | 93 | ### Step 2: Paste the cookie and auth header into the config file 94 | 95 | This config will be created during first run of the app in the current working directory. 96 | 97 | 98 | 99 | ### Step 3: Run the utility 100 | 101 | Now you can just download your content with the following command: 102 | 103 | ```bash 104 | boosty-downloader --username YOUR_CREATOR_NAME 105 | ``` 106 | 107 | ## 💖 Contributing 108 | 109 | If you want to contribute to this project, please see the [CONTRIBUTING.md](CONTRIBUTING.md). 110 | 111 | ## 📜 License 112 | 113 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 114 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/python 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python 3 | 4 | test/data 5 | 6 | # ------------- USER DEFINED --------------- # 7 | lab/ 8 | 9 | # For local downloading tests 10 | boosty-downloads/ 11 | 12 | # Credentials 13 | config.yaml 14 | 15 | 16 | ### Python ### 17 | # Byte-compiled / optimized / DLL files 18 | __pycache__/ 19 | *.py[cod] 20 | *$py.class 21 | 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | share/python-wheels/ 40 | *.egg-info/ 41 | .installed.cfg 42 | *.egg 43 | MANIFEST 44 | 45 | # PyInstaller 46 | # Usually these files are written by a python script from a template 47 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 48 | *.manifest 49 | *.spec 50 | 51 | # Installer logs 52 | pip-log.txt 53 | pip-delete-this-directory.txt 54 | 55 | # Unit test / coverage reports 56 | htmlcov/ 57 | .tox/ 58 | .nox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *.cover 65 | *.py,cover 66 | .hypothesis/ 67 | .pytest_cache/ 68 | cover/ 69 | 70 | # Translations 71 | *.mo 72 | *.pot 73 | 74 | # Django stuff: 75 | *.log 76 | local_settings.py 77 | db.sqlite3 78 | db.sqlite3-journal 79 | 80 | # Flask stuff: 81 | instance/ 82 | .webassets-cache 83 | 84 | # Scrapy stuff: 85 | .scrapy 86 | 87 | # Sphinx documentation 88 | docs/_build/ 89 | 90 | # PyBuilder 91 | .pybuilder/ 92 | target/ 93 | 94 | # Jupyter Notebook 95 | .ipynb_checkpoints 96 | 97 | # IPython 98 | profile_default/ 99 | ipython_config.py 100 | 101 | # pyenv 102 | # For a library or package, you might want to ignore these files since the code is 103 | # intended to run in multiple environments; otherwise, check them in: 104 | # .python-version 105 | 106 | # pipenv 107 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 108 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 109 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 110 | # install all needed dependencies. 111 | #Pipfile.lock 112 | 113 | # poetry 114 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 115 | # This is especially recommended for binary packages to ensure reproducibility, and is more 116 | # commonly ignored for libraries. 117 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 118 | #poetry.lock 119 | 120 | # pdm 121 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 122 | #pdm.lock 123 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 124 | # in version control. 125 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 126 | .pdm.toml 127 | .pdm-python 128 | .pdm-build/ 129 | 130 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 131 | __pypackages__/ 132 | 133 | # Celery stuff 134 | celerybeat-schedule 135 | celerybeat.pid 136 | 137 | # SageMath parsed files 138 | *.sage.py 139 | 140 | # Environments 141 | .env 142 | .venv 143 | env/ 144 | venv/ 145 | ENV/ 146 | env.bak/ 147 | venv.bak/ 148 | 149 | # Spyder project settings 150 | .spyderproject 151 | .spyproject 152 | 153 | # Rope project settings 154 | .ropeproject 155 | 156 | # mkdocs documentation 157 | /site 158 | 159 | # mypy 160 | .mypy_cache/ 161 | .dmypy.json 162 | dmypy.json 163 | 164 | # Pyre type checker 165 | .pyre/ 166 | 167 | # pytype static type analyzer 168 | .pytype/ 169 | 170 | # Cython debug symbols 171 | cython_debug/ 172 | 173 | # PyCharm 174 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 175 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 176 | # and can be added to the global gitignore or merged into this file. For a more nuclear 177 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 178 | #.idea/ 179 | 180 | ### Python Patch ### 181 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 182 | poetry.toml 183 | 184 | # ruff 185 | .ruff_cache/ 186 | 187 | # LSP config files 188 | # pyrightconfig.json Make those rules crucial to the project's quality 189 | 190 | # End of https://www.toptal.com/developers/gitignore/api/python 191 | 192 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/di/app_environment.py: -------------------------------------------------------------------------------- 1 | """Defines the application environment and dependency injection context for resource management.""" 2 | 3 | from contextlib import AsyncExitStack 4 | from dataclasses import dataclass 5 | from pathlib import Path 6 | from types import TracebackType 7 | 8 | import aiohttp 9 | from aiohttp.typedefs import LooseHeaders 10 | from aiohttp_retry import RetryClient, RetryOptionsBase 11 | 12 | from boosty_downloader.src.infrastructure.boosty_api.core.client import BoostyAPIClient 13 | from boosty_downloader.src.infrastructure.loggers.logger_instances import RichLogger 14 | from boosty_downloader.src.infrastructure.post_caching.post_cache import SQLitePostCache 15 | from boosty_downloader.src.interfaces.console_progress_reporter import ( 16 | ProgressReporter, 17 | use_reporter, 18 | ) 19 | 20 | 21 | class AppEnvironment: 22 | """Manages the application's resource initialization and cleanup, providing an async context for dependency injection.""" 23 | 24 | @dataclass 25 | class Environment: 26 | """Holds initialized application resources for use within the app context.""" 27 | 28 | boosty_api_client: BoostyAPIClient 29 | downloading_retry_client: RetryClient 30 | progress_reporter: ProgressReporter 31 | destination_directory: Path 32 | post_cache: SQLitePostCache 33 | 34 | @dataclass 35 | class AppConfig: 36 | """Configuration for the application environment.""" 37 | 38 | author_name: str 39 | target_directory: Path 40 | boosty_headers: LooseHeaders 41 | boosty_cookies_jar: aiohttp.CookieJar 42 | retry_options: RetryOptionsBase 43 | request_delay_seconds: float 44 | logger: RichLogger 45 | 46 | def __init__( 47 | self, 48 | config: AppConfig, 49 | ) -> None: 50 | self.author_name = config.author_name 51 | self.target_directory = config.target_directory 52 | self.boosty_headers = config.boosty_headers 53 | self.boosty_cookies_jar = config.boosty_cookies_jar 54 | self.logger = config.logger 55 | self.retry_options = config.retry_options 56 | self._request_delay_seconds = config.request_delay_seconds 57 | 58 | async def __aenter__(self) -> 'Environment': 59 | """Enter the async context and initialize resources.""" 60 | self._exit_stack = AsyncExitStack() 61 | await self._exit_stack.__aenter__() 62 | 63 | authorized_boosty_session = await self._exit_stack.enter_async_context( 64 | # Don't: set BASE_URL here, the BoostyAPIClient will handle it internally. 65 | # Why: this session will be used for both downloading and API requests with different bases. 66 | aiohttp.ClientSession( 67 | headers=self.boosty_headers, 68 | cookie_jar=self.boosty_cookies_jar, 69 | timeout=aiohttp.ClientTimeout(total=None), 70 | trust_env=True, 71 | ) 72 | ) 73 | 74 | progress_reporter = await self._exit_stack.enter_async_context( 75 | use_reporter( 76 | reporter=ProgressReporter( 77 | logger=self.logger.logging_logger_obj, 78 | console=self.logger.console, 79 | ) 80 | ) 81 | ) 82 | 83 | authorized_retry_client = RetryClient( 84 | authorized_boosty_session, retry_options=self.retry_options 85 | ) 86 | 87 | boosty_api_client = BoostyAPIClient( 88 | authorized_retry_client, 89 | request_delay_seconds=self._request_delay_seconds, 90 | ) 91 | 92 | post_cache = SQLitePostCache( 93 | destination=self.target_directory / self.author_name, 94 | logger=self.logger, 95 | ) 96 | post_cache.__enter__() # sync context manager 97 | self._exit_stack.callback(post_cache.__exit__, None, None, None) 98 | 99 | return self.Environment( 100 | boosty_api_client=boosty_api_client, 101 | downloading_retry_client=authorized_retry_client, 102 | progress_reporter=progress_reporter, 103 | destination_directory=self.target_directory / self.author_name, 104 | post_cache=post_cache, 105 | ) 106 | 107 | async def __aexit__( 108 | self, 109 | exc_type: type[BaseException] | None, 110 | exc_val: BaseException | None, 111 | exc_tb: TracebackType | None, 112 | ) -> None: 113 | """Exit the async context and clean up resources""" 114 | await self._exit_stack.__aexit__(exc_type, exc_val, exc_tb) 115 | -------------------------------------------------------------------------------- /boosty_downloader/src/domain/post_data_chunks.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module contains domain models for post data chunks. 3 | 4 | These are used to represent different parts of a post, such as text, images, etc. 5 | """ 6 | 7 | from dataclasses import dataclass, field 8 | from enum import Enum 9 | 10 | 11 | @dataclass 12 | class PostDataChunkImage: 13 | """Represent an image data chunk within a post.""" 14 | 15 | url: str 16 | 17 | 18 | @dataclass 19 | class PostDataChunkText: 20 | """ 21 | Represent a textual data chunk within a post. 22 | 23 | It can contain multiple text fragments, each with optional styling and links. 24 | 25 | For example: 26 | - PostDataChunkText( 27 | text_fragments=[ 28 | PostDataChunkText.TextFragment(text="Hello, world!", bold=True), 29 | PostDataChunkText.TextFragment(text="Visit Boosty", link_data="https://boosty.com", header_level=1), 30 | PostDataChunkText.TextFragment(text="This is a normal text."), 31 | PostDataChunkText.TextFragment(text=""), 32 | ] 33 | """ 34 | 35 | @dataclass 36 | class TextFragment: 37 | """ 38 | Represent a text fragment within a post with possibly additional styling. 39 | 40 | It also can contain a link to external resources (if link_data == None - it's just a text). 41 | """ 42 | 43 | @dataclass 44 | class TextStyle: 45 | """Represent text styling options.""" 46 | 47 | bold: bool = False 48 | italic: bool = False 49 | underline: bool = False 50 | 51 | text: str 52 | link_url: str | None = None 53 | header_level: int = 0 # Header level (0-6), 0 means no header 54 | style: TextStyle = field(default_factory=TextStyle) 55 | 56 | text_fragments: list[TextFragment] 57 | 58 | 59 | @dataclass 60 | class PostDataChunkBoostyVideo: 61 | """Represent a Boosty video data chunk within a post.""" 62 | 63 | title: str 64 | url: str 65 | quality: str 66 | 67 | 68 | @dataclass 69 | class PostDataChunkExternalVideo: 70 | """ 71 | Represent an external video data chunk within a post. 72 | 73 | Can be from: YouTube, Vimeo, etc. 74 | """ 75 | 76 | url: str 77 | 78 | 79 | @dataclass 80 | class PostDataChunkFile: 81 | """Represent a file data chunk within a post.""" 82 | 83 | url: str 84 | filename: str 85 | 86 | 87 | @dataclass 88 | class PostDataChunkTextualList: 89 | """ 90 | Represent a list of text items within a post. 91 | 92 | Each item can be a simple text or a more complex structure with optional styling. 93 | """ 94 | 95 | """ 📃 About this creepy structure: 96 | 97 | Lists can be nested, so we use a union type for items 98 | each level of nesting means a new list of items: 99 | 100 | ---------------------------------------------------------------------------- 101 | # For example this: 102 | ---------------------------------------------------------------------------- 103 | 104 | PostDataChunkTextualList( 105 | items=[ 106 | PostDataChunkTextualList.ListItem( 107 | data=[PostDataChunkText(text="Item 1")], 108 | nested_items=[] 109 | ), 110 | PostDataChunkTextualList.ListItem( 111 | data=[PostDataChunkText(text="Nested list:")], 112 | nested_items=[ 113 | PostDataChunkTextualList.ListItem( 114 | data=[PostDataChunkText(text="Item 2")], 115 | nested_items=[] 116 | ), 117 | PostDataChunkTextualList.ListItem( 118 | data=[PostDataChunkText(text="Item 3")], 119 | nested_items=[] 120 | ) 121 | ] 122 | ) 123 | ] 124 | ) 125 | 126 | ---------------------------------------------------------------------------- 127 | # Becomes this: 128 | ---------------------------------------------------------------------------- 129 | 130 | - Item 1 131 | - Nested list: 132 | - Item 2 133 | - Item 3 134 | """ 135 | 136 | @dataclass 137 | class ListItem: 138 | """'Represent a single item in a textual list.""" 139 | 140 | data: list['PostDataChunkText'] 141 | nested_items: list['PostDataChunkTextualList.ListItem'] 142 | 143 | class ListStyle(Enum): 144 | """Style of the list, can be ordered or unordered.""" 145 | 146 | ordered = 'ordered' 147 | unordered = 'unordered' 148 | 149 | items: list[ListItem] 150 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/use_cases/download_specific_post.py: -------------------------------------------------------------------------------- 1 | """Use case for downloading a specific Boosty post by URL.""" 2 | 3 | from pathlib import Path 4 | 5 | from boosty_downloader.src.application.di.download_context import DownloadContext 6 | from boosty_downloader.src.application.exceptions.application_errors import ( 7 | ApplicationCancelledError, 8 | ) 9 | from boosty_downloader.src.application.use_cases.check_total_posts import ( 10 | BoostyAPIClient, 11 | ) 12 | from boosty_downloader.src.application.use_cases.download_single_post import ( 13 | ApplicationFailedDownloadError, 14 | DownloadSinglePostUseCase, 15 | ) 16 | from boosty_downloader.src.infrastructure.file_downloader import sanitize_string 17 | 18 | 19 | class DownloadPostByUrlUseCase: 20 | """ 21 | Handles downloading a specific Boosty post given its URL. 22 | 23 | Right now it just iterates over the post and downloads it if UUID matches. 24 | Because I can't find a way to get post by URL directly at this moment. 25 | 26 | If you know how to do it, please open an issue on GitHub or PR with this functionality. 27 | """ 28 | 29 | def __init__( 30 | self, 31 | post_url: str, 32 | boosty_api: BoostyAPIClient, 33 | destination: Path, 34 | download_context: DownloadContext, 35 | ) -> None: 36 | self.post_url = post_url 37 | self.boosty_api = boosty_api 38 | self.destination = destination 39 | self.context = download_context 40 | 41 | def extract_author_and_uuid_from_url(self) -> tuple[str | None, str | None]: 42 | """ 43 | Parse Boosty post URL and returns (author_name, post_uuid) if possible. 44 | 45 | Expects URLs like: https://boosty.to/author_name/posts/post_uuid 46 | Returns None if parsing fails or URL is not Boosty. 47 | """ 48 | url = self.post_url 49 | if 'boosty.to' not in url: 50 | self.context.progress_reporter.error( 51 | "Provided URL doesn't match Boosty format (https://boosty.to/...)" 52 | ) 53 | return None, None 54 | try: 55 | parts = url.split('/') 56 | author = parts[3] 57 | post_uuid = parts[5].split('?')[0] 58 | except (IndexError, AttributeError): 59 | self.context.progress_reporter.error( 60 | 'Failed to parse author or post UUID from the provided URL. ' 61 | ) 62 | return None, None 63 | else: 64 | return author, post_uuid 65 | 66 | async def execute(self) -> None: 67 | author_name, post_uuid = self.extract_author_and_uuid_from_url() 68 | if not author_name or not post_uuid: 69 | self.context.progress_reporter.error( 70 | 'Failed to extract author and UUID from the provided URL, aborting...' 71 | ) 72 | return 73 | 74 | current_page = 0 75 | 76 | async for page in self.boosty_api.iterate_over_posts( 77 | author_name=author_name, posts_per_page=100 78 | ): 79 | current_page += 1 80 | self.context.progress_reporter.info( 81 | f'[Page({current_page})] Searching for the post with UUID: {post_uuid}... ' 82 | ) 83 | for post in page.posts: 84 | if post.id == post_uuid: 85 | self.context.progress_reporter.success( 86 | f'Found post with UUID: {post_uuid}, starting download...' 87 | ) 88 | 89 | post_name = f'{post.created_at.date()} - {post.title}' 90 | post_name = sanitize_string(post_name).replace('.', '').strip() 91 | 92 | try: 93 | await DownloadSinglePostUseCase( 94 | post_dto=post, 95 | destination=self.destination / post_name, 96 | download_context=self.context, 97 | ).execute() 98 | except ApplicationCancelledError: 99 | self.context.progress_reporter.warn( 100 | 'Download cancelled by user. Bye!' 101 | ) 102 | except ApplicationFailedDownloadError as e: 103 | self.context.progress_reporter.error( 104 | f'Failed to download post: {e.message}, RESOURCE: ({e.resource})' 105 | ) 106 | else: 107 | return 108 | 109 | self.context.progress_reporter.error( 110 | 'Failed to find and download the specified post.' 111 | ) 112 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/html_generator/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | HTML Report 7 | 8 | 14 | 15 | 133 | 134 | 135 | 136 | 137 | 138 |
    139 | {{ content | safe }} 140 |
    141 | 142 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/use_cases/download_all_posts.py: -------------------------------------------------------------------------------- 1 | """Implements the use case for downloading all posts from a Boosty author, applying filters and caching as needed.""" 2 | 3 | import asyncio 4 | from pathlib import Path 5 | 6 | from boosty_downloader.src.application.di.download_context import DownloadContext 7 | from boosty_downloader.src.application.exceptions.application_errors import ( 8 | ApplicationCancelledError, 9 | ApplicationFailedDownloadError, 10 | ) 11 | from boosty_downloader.src.application.use_cases.download_single_post import ( 12 | DownloadSinglePostUseCase, 13 | ) 14 | from boosty_downloader.src.infrastructure.boosty_api.core.client import BoostyAPIClient 15 | from boosty_downloader.src.infrastructure.path_sanitizer import ( 16 | sanitize_string, 17 | ) 18 | 19 | 20 | class DownloadAllPostUseCase: 21 | """ 22 | Use case for downloading all user's posts. 23 | 24 | This class encapsulates the logic required to download all posts from a source. 25 | Initialize the use case and call its methods to perform the download operation. 26 | 27 | All the downloaded content parts will be saved under the specified destination path. 28 | """ 29 | 30 | def __init__( 31 | self, 32 | author_name: str, 33 | boosty_api: BoostyAPIClient, 34 | destination: Path, 35 | download_context: DownloadContext, 36 | ) -> None: 37 | self.author_name = author_name 38 | 39 | self.boosty_api = boosty_api 40 | self.destination = destination 41 | self.context = download_context 42 | 43 | async def execute(self) -> None: 44 | posts_iterator = self.boosty_api.iterate_over_posts( 45 | author_name=self.author_name 46 | ) 47 | 48 | current_page = 0 49 | 50 | async for page in posts_iterator: 51 | count = len(page.posts) 52 | current_page += 1 53 | 54 | page_task_id = self.context.progress_reporter.create_task( 55 | f'Got new posts: [{count}]', 56 | total=count, 57 | indent_level=0, # Each page prints without indentation 58 | ) 59 | 60 | for post_dto in page.posts: 61 | if not post_dto.has_access: 62 | self.context.progress_reporter.warn( 63 | f'Skip post ([red]no access to content[/red]): {post_dto.title}' 64 | ) 65 | continue 66 | 67 | # For empty titles use post ID as a fallback (first 8 chars) 68 | if len(post_dto.title) == 0: 69 | post_dto.title = f'Not title (id_{post_dto.id[:8]})' 70 | 71 | post_dto.title = ( 72 | sanitize_string(post_dto.title).replace('.', '').strip() 73 | ) 74 | 75 | # date - TITLE (UUID_PART) for deduplication in case of same names with different posts 76 | full_post_title = f'{post_dto.created_at.date()} - {post_dto.title} ({post_dto.id[:8]})' 77 | 78 | single_post_use_case = DownloadSinglePostUseCase( 79 | destination=self.destination / full_post_title, 80 | post_dto=post_dto, 81 | download_context=self.context, 82 | ) 83 | 84 | self.context.progress_reporter.update_task( 85 | page_task_id, 86 | advance=1, 87 | description=f'Processing page [bold]{current_page}[/bold]', 88 | ) 89 | 90 | max_attempts = 5 91 | delay = 1.0 92 | for attempt in range(1, max_attempts + 1): 93 | try: 94 | await single_post_use_case.execute() 95 | break 96 | except ApplicationCancelledError: 97 | raise 98 | except ApplicationFailedDownloadError as e: 99 | if attempt == max_attempts: 100 | self.context.progress_reporter.error( 101 | f'Skip post after {attempt} failed attempts: {full_post_title} ({e.message})' 102 | ) 103 | else: 104 | self.context.progress_reporter.warn( 105 | f'Attempt {attempt} failed for post: {full_post_title} ({e.message}), RESOURCE: ({e.resource})' 106 | ) 107 | self.context.progress_reporter.warn( 108 | f'Retrying in {delay:.1f}s... ({e.message})' 109 | ) 110 | await asyncio.sleep(delay) 111 | delay = min(delay * 1.5, 10.0) 112 | 113 | self.context.progress_reporter.complete_task(page_task_id) 114 | self.context.progress_reporter.success( 115 | f'--- Finished page {current_page} ---' 116 | ) 117 | -------------------------------------------------------------------------------- /.github/workflows/release-pr-validation.yaml: -------------------------------------------------------------------------------- 1 | # This workflow runs only for dev -> main PRs to ensure that: 2 | # - CHANGELOG updated 3 | # - pyproject.toml version updated 4 | # - Version is higher than the one on PyPI 5 | name: 🔍 Release PR Validation (version checks) 6 | 7 | on: 8 | pull_request: 9 | branches: 10 | - main 11 | 12 | env: 13 | PACKAGE_NAME: "boosty-downloader" 14 | 15 | jobs: 16 | # About Inter-step Communication: 17 | # Steps share data (versions) using GitHub Actions outputs mechanism: 18 | # 19 | # Creating output: echo "key=value" >> "$GITHUB_OUTPUT" 20 | # Using output: ${{ steps.STEP_ID.outputs.key }} 21 | # 22 | version-validation: 23 | name: 📋 Version Validation (Main Branch PRs) 24 | runs-on: ubuntu-latest 25 | # if: github.event_name == 'pull_request' && github.base_ref == 'main' 26 | steps: 27 | - uses: actions/checkout@v4 28 | with: 29 | fetch-depth: 0 30 | # ref: ${{ github.event.pull_request.head.sha }} 31 | 32 | - name: 🐍 Set up Python 33 | uses: actions/setup-python@v5 34 | with: 35 | python-version: "3.12" 36 | 37 | - name: 📦 Install Poetry if missing 38 | uses: snok/install-poetry@v1 39 | with: 40 | version: 'latest' 41 | 42 | - name: Get project versions (base and head) 43 | id: get_poetry_versions 44 | run: | 45 | HEAD_VERSION=$(poetry version --short) 46 | echo "head_version=$HEAD_VERSION" >> "$GITHUB_OUTPUT" 47 | echo "Current version: $HEAD_VERSION at $(git rev-parse --short HEAD)" 48 | 49 | git switch main 50 | BASE_VERSION=$(poetry version --short) 51 | echo "base_version=$BASE_VERSION" >> "$GITHUB_OUTPUT" 52 | echo "Base version: $BASE_VERSION at $(git rev-parse --short HEAD)" 53 | 54 | git switch - -d 55 | 56 | 57 | - name: ✅ Validate version bump in pyproject.toml 58 | run: | 59 | CURRENT_VERSION="${{ steps.get_poetry_versions.outputs.head_version }}" 60 | BASE_VERSION="${{ steps.get_poetry_versions.outputs.base_version }}" 61 | 62 | if [ "$CURRENT_VERSION" == "$BASE_VERSION" ]; then 63 | echo "❌ Version not updated! Please update version in pyproject.toml" 64 | echo "Current: $CURRENT_VERSION" 65 | echo "Base: $BASE_VERSION" 66 | exit 1 67 | fi 68 | 69 | if [ "$(printf '%s\n' "$BASE_VERSION" "$CURRENT_VERSION" | sort -rV | head -n 1)" != "$CURRENT_VERSION" ]; then 70 | echo "❌ Version should be higher than base version!" 71 | echo "Current: $CURRENT_VERSION" 72 | echo "Base: $BASE_VERSION" 73 | exit 1 74 | fi 75 | 76 | echo "✅ Version correctly updated: $BASE_VERSION → $CURRENT_VERSION" 77 | 78 | - name: 📝 Check for version in CHANGELOG.md 79 | run: | 80 | if [ ! -f CHANGELOG.md ]; then 81 | echo "❌ CHANGELOG.md not found! Please create it." 82 | exit 1 83 | fi 84 | VERSION="${{ steps.get_poetry_versions.outputs.head_version }}" 85 | if ! grep -q "$VERSION" CHANGELOG.md; then 86 | echo "at $(git rev-parse --short HEAD)" 87 | echo "❌ Version $VERSION not found in CHANGELOG.md" 88 | echo "Please add changelog entry for version $VERSION" 89 | exit 1 90 | fi 91 | echo "✅ Version $VERSION found in CHANGELOG.md" 92 | 93 | - name: 🩺 Check PyPi release version compatibility 94 | run: | 95 | echo "Checking package: $PACKAGE_NAME" 96 | echo "Current version: $CURRENT_VERSION" 97 | 98 | PACKAGE_NAME="${{ env.PACKAGE_NAME }}" 99 | CURRENT_VERSION="${{ steps.get_poetry_versions.outputs.head_version }}" 100 | 101 | response=$(curl -s "https://pypi.org/pypi/$PACKAGE_NAME/json" || echo "{}") 102 | 103 | pypi_version=$(echo "$response" | jq --raw-output "select(.releases != null) | .releases | keys_unsorted | last // empty") 104 | 105 | if [ -z "$pypi_version" ] || [ "$pypi_version" = "null" ]; then 106 | echo "Package not found on PyPI or no releases available." 107 | pypi_version="0.0.0" 108 | fi 109 | 110 | echo "Latest version on PyPI: $pypi_version" 111 | echo "pypi_version=$pypi_version" >> "$GITHUB_OUTPUT" 112 | 113 | # Compare versions using sort -rV 114 | if [ "$CURRENT_VERSION" = "$pypi_version" ]; then 115 | echo "❌ Current version equals PyPI version ($CURRENT_VERSION)" 116 | echo "is_newer=false" >> "$GITHUB_OUTPUT" 117 | exit 1 118 | elif [ "$(printf '%s\n' "$pypi_version" "$CURRENT_VERSION" | sort -rV | head -n 1)" = "$CURRENT_VERSION" ]; then 119 | echo "✅ Current version ($CURRENT_VERSION) is newer than PyPI version ($pypi_version)" 120 | echo "is_newer=true" >> "$GITHUB_OUTPUT" 121 | else 122 | echo "❌ Current version ($CURRENT_VERSION) is older than PyPI version ($pypi_version)" 123 | echo "is_newer=false" >> "$GITHUB_OUTPUT" 124 | exit 1 125 | fi 126 | -------------------------------------------------------------------------------- /boosty_downloader/src/application/mappers/link_header_text.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mapper for converting textual Boosty API post data chunks to domain text object. 3 | 4 | If the API responses change, this mapper may need to be updated accordingly. 5 | """ 6 | 7 | import json 8 | 9 | from boosty_downloader.src.domain.post_data_chunks import PostDataChunkText 10 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import ( 11 | BoostyPostDataHeaderDTO, 12 | BoostyPostDataLinkDTO, 13 | BoostyPostDataTextDTO, 14 | ) 15 | 16 | 17 | def _parse_header(style_definition: str) -> int: 18 | r""" 19 | Parse header level (h1/h2/h3...) from the style definition. 20 | 21 | Style definition usually comes as a 2nd field in the "content" field of PostDataText. 22 | 23 | ``` 24 | "content": "[\"Hello, world!\", \"unstyled\", <---- [[0, 0, 13]]" 25 | ``` 26 | """ 27 | # These values were reverse engineered from Boosty API responses. 28 | header_possible_values = { 29 | 'unstyled': 0, 30 | 'header-one': 1, 31 | 'header-two': 2, 32 | 'header-three': 3, 33 | 'header-four': 4, 34 | 'header-five': 5, 35 | 'header-six': 6, 36 | } 37 | 38 | # by default (and in other cases) have no header 39 | return header_possible_values.get(style_definition, 0) 40 | 41 | 42 | def _create_style_bitmap( 43 | text_length: int, style_array: list[list[int]] 44 | ) -> list[set[int]]: 45 | """Create bitmap of styles for each character position.""" 46 | bitmap: list[set[int]] = [set() for _ in range(text_length)] 47 | 48 | for style_desc in style_array: 49 | style_id, start_idx, end_idx = style_desc 50 | for i in range(start_idx, min(end_idx, text_length)): 51 | bitmap[i].add(style_id) 52 | 53 | return bitmap 54 | 55 | 56 | def _create_text_fragments( 57 | text: str, style_bitmap: list[set[int]], header_level: int 58 | ) -> list[PostDataChunkText.TextFragment]: 59 | """Create text fragments based on style bitmap.""" 60 | if not text: 61 | return [] 62 | 63 | fragments: list[PostDataChunkText.TextFragment] = [] 64 | current_fragment_start = 0 65 | current_styles: set[int] = style_bitmap[0] if style_bitmap else set() 66 | 67 | for i in range(1, len(text)): 68 | if i >= len(style_bitmap) or style_bitmap[i] != current_styles: 69 | fragment_text = text[current_fragment_start:i] 70 | fragment = PostDataChunkText.TextFragment(fragment_text) 71 | fragment.header_level = header_level 72 | fragment.style = _convert_style_set_to_text_style(current_styles) 73 | fragments.append(fragment) 74 | 75 | current_fragment_start = i 76 | current_styles = style_bitmap[i] if i < len(style_bitmap) else set() 77 | 78 | # Add the last fragment 79 | fragment_text = text[current_fragment_start:] 80 | fragment = PostDataChunkText.TextFragment(fragment_text) 81 | fragment.header_level = header_level 82 | fragment.style = _convert_style_set_to_text_style(current_styles) 83 | fragments.append(fragment) 84 | 85 | return fragments 86 | 87 | 88 | def _convert_style_set_to_text_style( 89 | style_set: set[int], 90 | ) -> PostDataChunkText.TextFragment.TextStyle: 91 | """Convert set of style IDs to TextStyle object.""" 92 | bold = 0 93 | italic = 2 94 | underline = 4 95 | 96 | text_style = PostDataChunkText.TextFragment.TextStyle() 97 | text_style.bold = bold in style_set 98 | text_style.italic = italic in style_set 99 | text_style.underline = underline in style_set 100 | 101 | return text_style 102 | 103 | 104 | def _parse_content_field( 105 | content: str, modificator: str = '' 106 | ) -> list[PostDataChunkText.TextFragment]: 107 | def _extract_content_field(content: str) -> tuple[str, str, list[list[int]]]: 108 | r""" 109 | Extract text, style info, and style array from the content field. 110 | 111 | Boosty API returns "content" as a JSON-encoded string like this: 112 | "[\"Hello, world!\", \"unstyled\", [[0, 0, 13]]" 113 | 114 | The first part is just a text string, the other two parts are style information: 115 | - you can read about them in the _parse_style_array and _parse_header functions above. 116 | """ 117 | try: 118 | parsed = json.loads(content) 119 | text = parsed[0] 120 | style_info = parsed[1] 121 | style_array = parsed[2] 122 | except json.JSONDecodeError: 123 | return content, '', [] 124 | else: 125 | return text, style_info, style_array 126 | 127 | text, style_info, styles_array = _extract_content_field(content) 128 | 129 | if modificator == 'BLOCK_END': 130 | text += '\n' 131 | 132 | header_level = _parse_header(style_info) 133 | style_bitmap = _create_style_bitmap(len(text), styles_array) 134 | return _create_text_fragments(text, style_bitmap, header_level) 135 | 136 | 137 | def to_domain_text_chunk( 138 | api_textual_dto: BoostyPostDataTextDTO 139 | | BoostyPostDataHeaderDTO 140 | | BoostyPostDataLinkDTO, 141 | ) -> list[PostDataChunkText.TextFragment]: 142 | """ 143 | Convert API textual data chunks to domain text fragments. 144 | 145 | It uses the PostDataText, PostDataHeader, or PostDataLink DTOs 146 | to extract the content and convert it to a list of domain text fragments. 147 | """ 148 | modificator = getattr(api_textual_dto, 'modificator', '') 149 | text_fragments = _parse_content_field(api_textual_dto.content, modificator) 150 | 151 | # Attach link information to the text fragments if any is present 152 | if isinstance(api_textual_dto, BoostyPostDataLinkDTO): 153 | for fragment in text_fragments: 154 | fragment.link_url = api_textual_dto.url 155 | 156 | return text_fragments 157 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/html_reporter/html_reporter.py: -------------------------------------------------------------------------------- 1 | """HTML Reporter for generating HTML documents""" 2 | 3 | from __future__ import annotations 4 | 5 | from dataclasses import dataclass 6 | from typing import TYPE_CHECKING, TypedDict 7 | 8 | from jinja2 import Template 9 | 10 | if TYPE_CHECKING: 11 | from pathlib import Path 12 | 13 | 14 | @dataclass 15 | class NormalText: 16 | """Textual element, which can be added to the html document""" 17 | 18 | text: str 19 | 20 | 21 | @dataclass 22 | class HyperlinkText: 23 | """Hyperlink element, which can be added to the html document""" 24 | 25 | text: str 26 | url: str 27 | 28 | 29 | class TextElement(TypedDict): 30 | """Text element, which can be added to the html document""" 31 | 32 | type: str 33 | content: str 34 | 35 | 36 | class ImageElement(TypedDict): 37 | """Image element, which can be added to the html document""" 38 | 39 | type: str 40 | content: str 41 | width: int 42 | 43 | 44 | class LinkElement(TypedDict): 45 | """Link element, which can be added to the html document""" 46 | 47 | type: str 48 | content: str 49 | url: str 50 | 51 | 52 | class HTMLReport: 53 | """ 54 | Representation of the document, which can be saved as an HTML file. 55 | 56 | You can add text/links/images to the document, they will be added one after another. 57 | """ 58 | 59 | def __init__(self, filename: Path) -> None: 60 | self.filename = filename 61 | self.elements: list[TextElement | ImageElement | LinkElement] = [] 62 | 63 | def _render_template(self) -> str: 64 | """Render the HTML document using Jinja2""" 65 | template = """ 66 | 67 | 68 | HTML Report 69 | 115 | 116 | 117 |
    118 | {% for element in elements %} 119 | {% if element.type == 'text' %} 120 |

    {{ element.content }}

    121 | {% elif element.type == 'image' %} 122 |
    123 | 124 |
    125 | {% elif element.type == 'link' %} 126 | {{ element.content }} 127 | {% endif %} 128 | {% endfor %} 129 |
    130 | 131 | 132 | """ 133 | jinja_template = Template(template) 134 | return jinja_template.render(elements=self.elements) 135 | 136 | def new_paragraph(self) -> None: 137 | """Add an empty line between elements""" 138 | # Append a new paragraph using a proper TextElement type 139 | self.elements.append(TextElement(type='text', content='
    ')) 140 | 141 | def add_text(self, text: NormalText) -> None: 142 | """Add a text to the report right after the last added element""" 143 | # Append text content using TextElement 144 | self.elements.append(TextElement(type='text', content=text.text)) 145 | 146 | def add_image(self, image_path: str, width: int = 600) -> None: 147 | """ 148 | Add an image to the report right after the last added element 149 | 150 | - width 600 is usually enough for most HTML pages 151 | """ 152 | # Append image content using ImageElement 153 | self.elements.append( 154 | ImageElement(type='image', content=image_path, width=width), 155 | ) 156 | 157 | def add_link(self, text: NormalText, url: str) -> None: 158 | """Add a link to the report right after the last added element""" 159 | # Append link content using LinkElement 160 | self.elements.append(LinkElement(type='link', content=text.text, url=url)) 161 | 162 | def save(self) -> None: 163 | """Save the whole document to the file""" 164 | html_content = self._render_template() 165 | with self.filename.open('w', encoding='utf-8') as file: 166 | file.write(html_content) 167 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/file_downloader.py: -------------------------------------------------------------------------------- 1 | """Module to download files with reporting process mechanisms""" 2 | 3 | from __future__ import annotations 4 | 5 | import http 6 | import mimetypes 7 | from asyncio import CancelledError 8 | from dataclasses import dataclass 9 | from typing import TYPE_CHECKING 10 | 11 | import aiofiles 12 | from aiohttp import ClientConnectionError 13 | 14 | from boosty_downloader.src.infrastructure.path_sanitizer import ( 15 | sanitize_string, 16 | ) 17 | 18 | if TYPE_CHECKING: 19 | from collections.abc import Callable 20 | from pathlib import Path 21 | 22 | from aiohttp_retry import RetryClient 23 | 24 | 25 | @dataclass 26 | class DownloadingStatus: 27 | """ 28 | Model for status of the download. 29 | 30 | Can be used in status update callbacks. 31 | """ 32 | 33 | name: str 34 | total_bytes: int | None 35 | total_downloaded_bytes: int 36 | downloaded_bytes: int = 0 37 | 38 | 39 | @dataclass 40 | class DownloadFileConfig: 41 | """General configuration for the file download""" 42 | 43 | session: RetryClient 44 | url: str 45 | 46 | filename: str 47 | destination: Path 48 | on_status_update: Callable[[DownloadingStatus], None] = lambda _: None 49 | 50 | guess_extension: bool = True 51 | chunk_size_bytes: int = 524288 # 512 KiB 52 | 53 | 54 | class DownloadError(Exception): 55 | """Exception raised when the download failed for any reason""" 56 | 57 | message: str 58 | file: Path | None 59 | resource_url: str 60 | 61 | def __init__(self, message: str, file: Path | None, resource_url: str) -> None: 62 | super().__init__(message) 63 | self.file = file 64 | self.resource_url = resource_url 65 | 66 | 67 | class DownloadCancelledError(DownloadError): 68 | """Exception raised when the download was cancelled by the user""" 69 | 70 | def __init__(self, resource_url: str, file: Path | None = None) -> None: 71 | super().__init__('Download cancelled by user', file, resource_url=resource_url) 72 | 73 | 74 | class DownloadTimeoutError(DownloadError): 75 | """Exception raised when the download timed out""" 76 | 77 | def __init__(self, resource_url: str, file: Path | None = None) -> None: 78 | super().__init__( 79 | 'Download timed out for the destination server', 80 | file, 81 | resource_url=resource_url, 82 | ) 83 | 84 | 85 | class DownloadConnectionError(DownloadError): 86 | """Exception raised when there was a connection error during the download""" 87 | 88 | def __init__(self, resource_url: str, file: Path | None = None) -> None: 89 | super().__init__( 90 | 'Connection error during the download', file, resource_url=resource_url 91 | ) 92 | 93 | 94 | class DownloadIOFailureError(DownloadError): 95 | """Exception raised when there was an IOError during the download""" 96 | 97 | def __init__(self, resource_url: str, file: Path | None = None) -> None: 98 | super().__init__('Failed during I/O operation', file, resource_url=resource_url) 99 | 100 | 101 | class DownloadUnexpectedStatusError(DownloadError): 102 | """Exception raised when the server returned an unexpected status code""" 103 | 104 | status_code: int 105 | response_message: str 106 | 107 | def __init__(self, status: int, response_message: str, resource_url: str) -> None: 108 | super().__init__( 109 | f'Unexpected status code: {status}', file=None, resource_url=resource_url 110 | ) 111 | self.status_code = status 112 | self.response_message = response_message 113 | 114 | 115 | async def download_file( 116 | dl_config: DownloadFileConfig, 117 | ) -> Path: 118 | """Download files and report the downloading process via callback""" 119 | async with dl_config.session.get(dl_config.url) as response: 120 | if response.status != http.HTTPStatus.OK: 121 | raise DownloadUnexpectedStatusError( 122 | resource_url=dl_config.url, 123 | status=response.status, 124 | response_message=response.reason or 'No reason provided', 125 | ) 126 | 127 | filename = sanitize_string(dl_config.filename) 128 | file_path = dl_config.destination / filename 129 | 130 | content_type = response.content_type 131 | if content_type and dl_config.guess_extension: 132 | ext = mimetypes.guess_extension(content_type) 133 | if ext is not None: 134 | file_path = file_path.with_suffix(ext) 135 | 136 | total_downloaded = 0 137 | 138 | async with aiofiles.open(file_path, mode='wb') as file: 139 | total_size = response.content_length 140 | 141 | try: 142 | async for chunk in response.content.iter_chunked( 143 | dl_config.chunk_size_bytes 144 | ): 145 | total_downloaded += len(chunk) 146 | dl_config.on_status_update( 147 | DownloadingStatus( 148 | name=filename, 149 | total_bytes=total_size, 150 | total_downloaded_bytes=total_downloaded, 151 | downloaded_bytes=len(chunk), 152 | ), 153 | ) 154 | await file.write(chunk) 155 | except (CancelledError, KeyboardInterrupt) as e: 156 | raise DownloadCancelledError( 157 | file=file_path, resource_url=dl_config.url 158 | ) from e 159 | except DownloadTimeoutError as e: 160 | raise DownloadTimeoutError( 161 | file=file_path, resource_url=dl_config.url 162 | ) from e 163 | except (ConnectionResetError, BrokenPipeError, ClientConnectionError) as e: 164 | raise DownloadConnectionError( 165 | file=file_path, resource_url=dl_config.url 166 | ) from e 167 | except OSError as e: 168 | raise DownloadIOFailureError( 169 | file=file_path, resource_url=dl_config.url 170 | ) from e 171 | 172 | return file_path 173 | -------------------------------------------------------------------------------- /test/integration/fixtures.py: -------------------------------------------------------------------------------- 1 | """Shared fixtures for Boosty API integration tests.""" 2 | 3 | import logging 4 | from collections.abc import AsyncGenerator 5 | 6 | import pytest 7 | import pytest_asyncio 8 | from aiohttp import ClientSession, CookieJar 9 | from aiohttp.typedefs import LooseHeaders 10 | from aiohttp_retry import ExponentialRetry, RetryClient 11 | from pydantic import ValidationError 12 | 13 | from boosty_downloader.src.infrastructure.boosty_api.core.client import BoostyAPIClient 14 | from boosty_downloader.src.infrastructure.boosty_api.utils.auth_parsers import ( 15 | parse_session_cookie, 16 | ) 17 | from integration.configuration import IntegrationTestConfig 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | # ------------------------------------------------------------------------------ 22 | # Utilities for further fixtures 23 | 24 | 25 | @pytest.fixture(scope='session') 26 | def integration_config() -> IntegrationTestConfig: 27 | """ 28 | Provides configuration for integration tests. 29 | 30 | It loads the configuration from the environment or a configuration file. 31 | If the configuration is invalid, it logs the errors and skips the tests. 32 | """ 33 | try: 34 | return IntegrationTestConfig() # pyright: ignore[reportCallIssue] : will be loaded automatically by pydantic_settings 35 | 36 | except ValidationError as e: 37 | logger.exception('❌ Failed to load integration test config:') 38 | for err in e.errors(): 39 | loc = '.'.join(map(str, err['loc'])) 40 | msg = err['msg'] 41 | logger.exception(f' - {loc}: {msg}') 42 | pytest.skip('Integration tests require valid configuration') 43 | 44 | 45 | @pytest.fixture 46 | def boosty_headers(integration_config: IntegrationTestConfig) -> LooseHeaders: 47 | """Returns headers with authorization token for Boosty API requests.""" 48 | return { 49 | 'Authorization': integration_config.boosty_auth_token, 50 | 'Content-Type': 'application/json', 51 | } 52 | 53 | 54 | @pytest_asyncio.fixture 55 | async def boosty_cookies_jar_async( 56 | integration_config: IntegrationTestConfig, 57 | ) -> CookieJar: 58 | # This avoids 'no running event loop' error by ensuring the jar is created in an async context 59 | return parse_session_cookie(integration_config.boosty_cookies) 60 | 61 | 62 | # ------------------------------------------------------------------------------ 63 | # Different session setups 64 | 65 | 66 | @pytest_asyncio.fixture 67 | async def authorized_http_session( 68 | boosty_headers: LooseHeaders, 69 | boosty_cookies_jar_async: CookieJar, 70 | ) -> AsyncGenerator[ClientSession, None]: 71 | """Creates an HTTP session for making requests.""" 72 | session = ClientSession( 73 | headers=boosty_headers, 74 | cookie_jar=boosty_cookies_jar_async, 75 | ) 76 | yield session 77 | await session.close() 78 | 79 | 80 | @pytest_asyncio.fixture 81 | async def unauthorized_http_session() -> AsyncGenerator[ClientSession, None]: 82 | """Creates an HTTP session without authorization headers.""" 83 | session = ClientSession() 84 | yield session 85 | await session.close() 86 | 87 | 88 | @pytest_asyncio.fixture 89 | async def invalid_auth_http_session() -> AsyncGenerator[ClientSession, None]: 90 | session = ClientSession( 91 | headers={ 92 | 'Authorization': 'Bearer ' 93 | + 'a' * 64, # Looks valid (64 hex chars), but not actually valid 94 | }, 95 | ) 96 | yield session 97 | await session.close() 98 | 99 | 100 | # ------------------------------------------------------------------------------ 101 | # Clients for Boosty API 102 | 103 | 104 | @pytest_asyncio.fixture 105 | async def authorized_retry_client( 106 | authorized_http_session: ClientSession, 107 | ) -> AsyncGenerator[RetryClient, None]: 108 | """Creates a retry client for handling transient failures.""" 109 | retry_options = ExponentialRetry(attempts=3, start_timeout=1.0) 110 | client = RetryClient( 111 | client_session=authorized_http_session, 112 | retry_options=retry_options, 113 | ) 114 | yield client 115 | await client.close() 116 | 117 | 118 | @pytest_asyncio.fixture 119 | async def unauthorized_retry_client( 120 | unauthorized_http_session: ClientSession, 121 | ) -> AsyncGenerator[RetryClient, None]: 122 | """Creates a retry client without authentication for testing unauthorized scenarios.""" 123 | retry_options = ExponentialRetry(attempts=3, start_timeout=1.0) 124 | client = RetryClient( 125 | client_session=unauthorized_http_session, 126 | retry_options=retry_options, 127 | ) 128 | yield client 129 | await client.close() 130 | 131 | 132 | @pytest_asyncio.fixture 133 | async def invalid_auth_retry_client( 134 | invalid_auth_http_session: ClientSession, 135 | ) -> AsyncGenerator[RetryClient, None]: 136 | """Creates a retry client with invalid authentication for testing error handling.""" 137 | retry_options = ExponentialRetry(attempts=3, start_timeout=1.0) 138 | client = RetryClient( 139 | client_session=invalid_auth_http_session, 140 | retry_options=retry_options, 141 | ) 142 | yield client 143 | await client.close() 144 | 145 | 146 | # ------------------------------------------------------------------------------ 147 | # Clients for Boosty API 148 | 149 | 150 | @pytest_asyncio.fixture 151 | async def authorized_boosty_client( 152 | authorized_retry_client: RetryClient, 153 | ) -> BoostyAPIClient: 154 | """Creates a Boosty API client configured with authentication.""" 155 | return BoostyAPIClient(session=authorized_retry_client) 156 | 157 | 158 | @pytest_asyncio.fixture 159 | async def unauthorized_boosty_client( 160 | unauthorized_retry_client: RetryClient, 161 | ) -> BoostyAPIClient: 162 | """Creates a Boosty API client without authentication for testing unauthorized scenarios.""" 163 | return BoostyAPIClient(session=unauthorized_retry_client, request_delay_seconds=1) 164 | 165 | 166 | @pytest_asyncio.fixture 167 | async def invalid_auth_boosty_client( 168 | invalid_auth_retry_client: RetryClient, 169 | ) -> BoostyAPIClient: 170 | """Creates a Boosty API client with invalid authentication for testing error handling.""" 171 | return BoostyAPIClient(session=invalid_auth_retry_client, request_delay_seconds=1) 172 | -------------------------------------------------------------------------------- /test/unit/html_generator/html_templates_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from boosty_downloader.src.infrastructure.html_generator.models import ( 4 | HtmlGenChunk, 5 | HtmlGenImage, 6 | HtmlGenList, 7 | HtmlGenText, 8 | HtmlGenVideo, 9 | HtmlListItem, 10 | HtmlTextFragment, 11 | HtmlTextStyle, 12 | ) 13 | from boosty_downloader.src.infrastructure.html_generator.renderer import ( 14 | render_html, 15 | render_html_to_file, 16 | ) 17 | 18 | 19 | def test_html_generator_templates(): 20 | chunks: list[HtmlGenChunk] = [ 21 | HtmlGenText( 22 | text_fragments=[ 23 | HtmlTextFragment(text='Welcome to my Boosty!', header_level=1), 24 | HtmlTextFragment( 25 | text='This post includes various elements: text, media, and lists.', 26 | ), 27 | HtmlTextFragment(text=''), 28 | HtmlTextFragment( 29 | text="Let's dive in below:", 30 | style=HtmlTextStyle(italic=True), 31 | ), 32 | ] 33 | ), 34 | HtmlGenText( 35 | text_fragments=[ 36 | HtmlTextFragment(text='Highlights', header_level=2), 37 | HtmlTextFragment( 38 | text='This paragraph contains a mix of ', 39 | ), 40 | HtmlTextFragment( 41 | text='bold', 42 | style=HtmlTextStyle(bold=True), 43 | ), 44 | HtmlTextFragment(text=', '), 45 | HtmlTextFragment( 46 | text='italic', 47 | style=HtmlTextStyle(italic=True), 48 | ), 49 | HtmlTextFragment(text=', and '), 50 | HtmlTextFragment( 51 | text='underlined', 52 | style=HtmlTextStyle(underline=True), 53 | ), 54 | HtmlTextFragment(text=' text. You can '), 55 | HtmlTextFragment( 56 | text='click here', 57 | link_url='https://boosty.to/example', 58 | style=HtmlTextStyle(underline=True), 59 | ), 60 | HtmlTextFragment(text=' to support me.'), 61 | ] 62 | ), 63 | HtmlGenList( 64 | items=[ 65 | HtmlListItem( 66 | data=[ 67 | HtmlGenText( 68 | text_fragments=[ 69 | HtmlTextFragment(text="📌 What you'll get inside:") 70 | ] 71 | ) 72 | ], 73 | nested_items=[ 74 | HtmlListItem( 75 | data=[ 76 | HtmlGenText( 77 | text_fragments=[ 78 | HtmlTextFragment(text='High-quality images') 79 | ] 80 | ) 81 | ], 82 | nested_items=[], 83 | ), 84 | HtmlListItem( 85 | data=[ 86 | HtmlGenText( 87 | text_fragments=[ 88 | HtmlTextFragment(text='Source files (PSD, RAW)') 89 | ] 90 | ) 91 | ], 92 | nested_items=[], 93 | ), 94 | HtmlListItem( 95 | data=[ 96 | HtmlGenText( 97 | text_fragments=[ 98 | HtmlTextFragment(text='Bonus video content') 99 | ] 100 | ) 101 | ], 102 | nested_items=[ 103 | HtmlListItem( 104 | data=[ 105 | HtmlGenText( 106 | text_fragments=[ 107 | HtmlTextFragment( 108 | text='Behind the scenes' 109 | ) 110 | ] 111 | ) 112 | ], 113 | nested_items=[], 114 | ), 115 | HtmlListItem( 116 | data=[ 117 | HtmlGenText( 118 | text_fragments=[ 119 | HtmlTextFragment( 120 | text='Unreleased footage' 121 | ) 122 | ] 123 | ) 124 | ], 125 | nested_items=[], 126 | ), 127 | ], 128 | ), 129 | ], 130 | ) 131 | ] 132 | ), 133 | HtmlGenImage(url='https://example.com/banner.jpg'), 134 | HtmlGenVideo( 135 | title='Exclusive Behind the Scenes', 136 | url='https://example.com/video.mp4', 137 | ), 138 | HtmlGenVideo(url='https://www.youtube.com/watch?v=dQw4w9WgXcQ'), 139 | HtmlGenText( 140 | text_fragments=[ 141 | HtmlTextFragment(text=''), 142 | HtmlTextFragment(text='Thanks for reading!', header_level=2), 143 | HtmlTextFragment( 144 | text='Feel free to leave a comment or suggestion below.', 145 | ), 146 | ] 147 | ), 148 | ] 149 | 150 | data = render_html(chunks) 151 | 152 | test_output_file = Path('test_output.html') 153 | 154 | render_html_to_file(chunks, test_output_file) 155 | 156 | assert test_output_file.exists() 157 | assert test_output_file.read_text(encoding='utf-8') == data 158 | assert len(data) > 0 159 | 160 | test_output_file.unlink(missing_ok=True) 161 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | workflow_dispatch: 8 | 9 | env: 10 | PACKAGE_NAME: "boosty-downloader" 11 | OWNER: "Glitchy-Sheep" 12 | 13 | jobs: 14 | extract_base_project_version: 15 | name: "📋 Extract pyproject.toml version from main" 16 | runs-on: ubuntu-latest 17 | outputs: 18 | version: ${{ steps.extract_version.outputs.version }} 19 | steps: 20 | # Ensure that we are on the main branch to get latest stable version 21 | - uses: actions/checkout@v4 22 | with: 23 | fetch-depth: 0 24 | ref: main 25 | 26 | - name: Set up Python 27 | uses: actions/setup-python@v5 28 | with: 29 | python-version: "3.12" 30 | 31 | - name: Install Poetry 32 | uses: snok/install-poetry@v1 33 | 34 | - name: Extract base version 35 | id: extract_version 36 | run: | 37 | VERSION=$(poetry version --short) 38 | echo "version=$VERSION" >> "$GITHUB_OUTPUT" 39 | 40 | extract_pushed_version: 41 | name: "📦 Extract pushed pyproject.toml version" 42 | runs-on: ubuntu-latest 43 | outputs: 44 | version: ${{ steps.extract_version.outputs.version }} 45 | steps: 46 | - uses: actions/checkout@v4 47 | with: 48 | fetch-depth: 0 # чтобы poetry могла читать pyproject.toml в любом случае 49 | 50 | - name: Set up Python 51 | uses: actions/setup-python@v5 52 | with: 53 | python-version: "3.12" 54 | 55 | - name: Install Poetry 56 | uses: snok/install-poetry@v1 57 | 58 | - name: Extract pushed version 59 | id: extract_version 60 | run: | 61 | VERSION=$(poetry version --short) 62 | echo "version=$VERSION" >> "$GITHUB_OUTPUT" 63 | 64 | 65 | 66 | # Check if new version is greater than the latest version on PyPI 67 | check_pypi: 68 | name: "🔍 Validate version against PyPI" 69 | needs: extract_pushed_version 70 | runs-on: ubuntu-latest 71 | steps: 72 | - uses: actions/checkout@v4 73 | - name: 🩺 Check PyPi release version compatibility 74 | run: | 75 | echo "Checking package: $PACKAGE_NAME" 76 | echo "Current version: $CURRENT_VERSION" 77 | 78 | PACKAGE_NAME="${{ env.PACKAGE_NAME }}" 79 | CURRENT_VERSION="${{ needs.extract_pushed_version.outputs.version }}" 80 | 81 | response=$(curl -s "https://pypi.org/pypi/$PACKAGE_NAME/json" || echo "{}") 82 | 83 | pypi_version=$(echo "$response" | jq --raw-output "select(.releases != null) | .releases | keys_unsorted | last // empty") 84 | 85 | if [ -z "$pypi_version" ] || [ "$pypi_version" = "null" ]; then 86 | echo "Package not found on PyPI or no releases available." 87 | pypi_version="0.0.0" 88 | fi 89 | 90 | echo "Latest version on PyPI: $pypi_version" 91 | echo "pypi_version=$pypi_version" >> "$GITHUB_OUTPUT" 92 | 93 | # Compare versions using sort -rV 94 | if [ "$CURRENT_VERSION" = "$pypi_version" ]; then 95 | echo "❌ Current version equals PyPI version ($CURRENT_VERSION)" 96 | echo "is_newer=false" >> "$GITHUB_OUTPUT" 97 | exit 1 98 | elif [ "$(printf '%s\n' "$pypi_version" "$CURRENT_VERSION" | sort -rV | head -n 1)" = "$CURRENT_VERSION" ]; then 99 | echo "✅ Current version ($CURRENT_VERSION) is newer than PyPI version ($pypi_version)" 100 | echo "is_newer=true" >> "$GITHUB_OUTPUT" 101 | else 102 | echo "❌ Current version ($CURRENT_VERSION) is older than PyPI version ($pypi_version)" 103 | echo "is_newer=false" >> "$GITHUB_OUTPUT" 104 | exit 1 105 | fi 106 | 107 | 108 | # Install dependencies, run tests, and build distribution packages 109 | setup_and_build: 110 | name: "🔨 Build distribution packages" 111 | needs: [extract_pushed_version, extract_base_project_version, check_pypi] 112 | runs-on: ubuntu-latest 113 | steps: 114 | # Checkout the code on MAIN to get current latest stable version 115 | - uses: actions/checkout@v4 116 | with: 117 | fetch-depth: 0 118 | 119 | - name: Set up Python 120 | uses: actions/setup-python@v5 121 | with: 122 | python-version: "3.12" 123 | 124 | - name: Install Poetry 125 | uses: snok/install-poetry@v1 126 | 127 | - name: Install dependencies 128 | run: make deps 129 | timeout-minutes: 10 130 | 131 | - name: Build source and wheel distribution 132 | run: make build 133 | 134 | - name: Upload artifacts 135 | uses: actions/upload-artifact@v4 136 | with: 137 | name: dist 138 | path: dist/ 139 | 140 | create_tag: 141 | name: "🏷️ Create release tag" 142 | needs: [extract_pushed_version, setup_and_build] 143 | runs-on: ubuntu-latest 144 | permissions: 145 | contents: write 146 | steps: 147 | - uses: actions/checkout@v4 148 | 149 | - name: Create and push tag 150 | run: | 151 | VERSION=${{ needs.extract_pushed_version.outputs.version }} 152 | git config user.name "github-actions[bot]" 153 | git config user.email "github-actions[bot]@users.noreply.github.com" 154 | git tag -a "v$VERSION" -m "Release v$VERSION" 155 | git push origin "v$VERSION" 156 | 157 | 158 | pypi_publish: 159 | name: "📦 Upload release to PyPI" 160 | needs: [setup_and_build, extract_pushed_version, create_tag] 161 | runs-on: ubuntu-latest 162 | environment: 163 | name: release 164 | permissions: 165 | id-token: write 166 | steps: 167 | - name: Download artifacts 168 | uses: actions/download-artifact@v5 169 | with: 170 | name: dist 171 | path: dist/ 172 | 173 | - name: Publish distribution to PyPI 174 | uses: pypa/gh-action-pypi-publish@release/v1 175 | 176 | 177 | github_release: 178 | name: "🚀 Create GitHub Release" 179 | needs: [setup_and_build, extract_pushed_version, create_tag] 180 | runs-on: ubuntu-latest 181 | permissions: 182 | contents: write 183 | steps: 184 | - name: Checkout Code 185 | uses: actions/checkout@v4 186 | with: 187 | fetch-depth: 0 188 | 189 | - name: Download artifacts 190 | uses: actions/download-artifact@v5 191 | with: 192 | name: dist 193 | path: dist/ 194 | 195 | - name: Create GitHub Release 196 | env: 197 | GH_TOKEN: ${{ github.token }} 198 | run: | 199 | VERSION=${{ needs.extract_pushed_version.outputs.version }} 200 | gh release create "v$VERSION" dist/* --title "v$VERSION" --generate-notes 201 | 202 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/boosty_api/core/client.py: -------------------------------------------------------------------------------- 1 | """Boosty API client for accessing content.""" 2 | 3 | from __future__ import annotations 4 | 5 | from http import HTTPStatus 6 | from typing import TYPE_CHECKING 7 | 8 | from aiolimiter import AsyncLimiter 9 | from pydantic import ValidationError 10 | from yarl import URL 11 | 12 | from boosty_downloader.src.infrastructure.boosty_api.core.endpoints import ( 13 | BOOSTY_DEFAULT_BASE_URL, 14 | ) 15 | from boosty_downloader.src.infrastructure.boosty_api.models.post.extra import Extra 16 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post import PostDTO 17 | from boosty_downloader.src.infrastructure.boosty_api.models.post.posts_request import ( 18 | PostsResponse, 19 | ) 20 | from boosty_downloader.src.infrastructure.boosty_api.utils.filter_none_params import ( 21 | filter_none_params, 22 | ) 23 | 24 | if TYPE_CHECKING: 25 | from collections.abc import AsyncGenerator, Mapping 26 | 27 | from aiohttp import ClientResponse 28 | from aiohttp_retry import RetryClient 29 | from pydantic_core import ErrorDetails 30 | 31 | 32 | class BoostyAPIError(Exception): 33 | """Base class for all Boosty API related errors.""" 34 | 35 | 36 | class BoostyAPINoUsernameError(BoostyAPIError): 37 | """Raised when no username is specified.""" 38 | 39 | username: str 40 | 41 | def __init__(self, username: str) -> None: 42 | super().__init__(f'Username not found: {username}') 43 | self.username = username 44 | 45 | 46 | class BoostyAPIUnauthorizedError(BoostyAPIError): 47 | """Raised when authorization error occurs, e.g when credentials is invalid.""" 48 | 49 | 50 | class BoostyAPIUnknownError(BoostyAPIError): 51 | """Raised when Boosty returns unexpected error.""" 52 | 53 | details: str 54 | 55 | def __init__(self, status_code: int, details: str) -> None: 56 | super().__init__(f'Boosty returned unknown error[{status_code}]: {details}') 57 | self.details = details 58 | 59 | 60 | class BoostyAPIValidationError(BoostyAPIError): 61 | """ 62 | Raised when validation error occurs, e.g. when response data is invalid. 63 | 64 | It can happen if the API response structure changes. 65 | In that case the client should be updated to match the new structure. 66 | """ 67 | 68 | errors: list[ErrorDetails] 69 | 70 | def __init__(self, errors: list[ErrorDetails]) -> None: 71 | super().__init__('Boosty API response validation error') 72 | self.errors = errors 73 | 74 | 75 | def _create_limiter(request_delay_seconds: float) -> AsyncLimiter | None: 76 | # aiolimiter expects max_rate and time_period to be positive. 77 | # For delays <1s, we use a 1-second window and scale the rate to avoid exceptions and ensure correct throttling. 78 | # For delays >=1s, we allow 1 request per delay period, matching the intended throttle. 79 | # Without this logic, certain values (e.g. delay=0.5) would cause aiolimiter to raise or throttle incorrectly. 80 | if request_delay_seconds > 0: 81 | if request_delay_seconds < 1: 82 | max_rate = 1 / request_delay_seconds 83 | time_period = 1 84 | else: 85 | max_rate = 1 86 | time_period = request_delay_seconds 87 | return AsyncLimiter(max_rate=max_rate, time_period=time_period) 88 | return None 89 | 90 | 91 | class BoostyAPIClient: 92 | """ 93 | Main client class for the Boosty API. 94 | 95 | The session you provide to this class MUST NOT CONTAIN BASE URL. 96 | It should only contain headers and cookies. Base url is set internally. 97 | 98 | It handles the connection and makes requests to the API. 99 | To work with private/paid posts you need to provide valid authentication token and cookies in the session. 100 | """ 101 | 102 | def __init__( 103 | self, 104 | session: RetryClient, 105 | request_delay_seconds: float = 0.0, 106 | base_url: URL | None = None, 107 | ) -> None: 108 | self._base_url = base_url or BOOSTY_DEFAULT_BASE_URL 109 | self.session = session 110 | self._limiter = _create_limiter(request_delay_seconds) 111 | 112 | async def _throttled_get( 113 | self, 114 | endpoint: str, 115 | params: Mapping[str, str] | None = None, 116 | headers: Mapping[str, str] | None = None, 117 | ) -> ClientResponse: 118 | url = URL(self._base_url) / endpoint.lstrip('/') 119 | 120 | if self._limiter: 121 | async with self._limiter: 122 | return await self.session.get(url, params=params, headers=headers) 123 | return await self.session.get(url, params=params, headers=headers) 124 | 125 | async def get_author_posts( 126 | self, 127 | author_name: str, 128 | limit: int, 129 | offset: str | None = None, 130 | ) -> PostsResponse: 131 | """ 132 | Request to get posts from the specified author. 133 | 134 | The request supports pagination, so the response contains meta info. 135 | If you want to get all posts, you need to repeat the request with the offset of previous response 136 | until the 'is_last' field becomes True. 137 | """ 138 | endpoint = f'blog/{author_name}/post/' 139 | 140 | posts_raw = await self._throttled_get( 141 | endpoint, 142 | params=filter_none_params( 143 | { 144 | 'offset': offset, 145 | 'limit': limit, 146 | }, 147 | ), 148 | ) 149 | posts_data = await posts_raw.json() 150 | 151 | if posts_raw.status == HTTPStatus.NOT_FOUND: 152 | raise BoostyAPINoUsernameError(author_name) 153 | 154 | # This will be returned if the user has creds but they're invalid/expired 155 | if posts_raw.status == HTTPStatus.UNAUTHORIZED: 156 | raise BoostyAPIUnauthorizedError 157 | 158 | if posts_raw.status != HTTPStatus.OK: 159 | raise BoostyAPIUnknownError( 160 | posts_raw.status, f'Unexpected status code: {posts_raw.status}' 161 | ) 162 | 163 | try: 164 | posts: list[PostDTO] = [ 165 | PostDTO.model_validate(post) for post in posts_data['data'] 166 | ] 167 | extra: Extra = Extra.model_validate(posts_data['extra']) 168 | except ValidationError as e: 169 | raise BoostyAPIValidationError(errors=e.errors()) from e 170 | 171 | return PostsResponse( 172 | posts=posts, 173 | extra=extra, 174 | ) 175 | 176 | async def iterate_over_posts( 177 | self, 178 | author_name: str, 179 | posts_per_page: int = 5, 180 | ) -> AsyncGenerator[PostsResponse, None]: 181 | """ 182 | Infinite generator iterating over posts of the specified author. 183 | 184 | The generator will yield all posts of the author, paginating internally. 185 | """ 186 | offset = None 187 | while True: 188 | response = await self.get_author_posts( 189 | author_name, 190 | offset=offset, 191 | limit=posts_per_page, 192 | ) 193 | yield response 194 | if response.extra.is_last: 195 | break 196 | offset = response.extra.offset 197 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/external_videos_downloader/external_videos_downloader.py: -------------------------------------------------------------------------------- 1 | """Manager for downloading external videos (e.g., YouTube, Vimeo) with progress reporting.""" 2 | # ruff: noqa: I001 3 | 4 | from __future__ import annotations 5 | 6 | import contextlib 7 | from collections.abc import Callable 8 | from dataclasses import dataclass 9 | from pathlib import Path 10 | from typing import Any, ClassVar, cast 11 | 12 | from yt_dlp.YoutubeDL import YoutubeDL 13 | from yt_dlp.utils import DownloadError 14 | 15 | YtDlOptions = dict[str, object] 16 | ExternalVideoDownloadProgressHook = Callable[['ExternalVideoDownloadStatus'], None] 17 | 18 | 19 | class ExtVideoError(Exception): 20 | """Base class for external video download errors.""" 21 | 22 | 23 | class ExtVideoInfoError(ExtVideoError): 24 | """Raised when video information (e.g., title) cannot be extracted.""" 25 | 26 | def __init__(self, url: str) -> None: 27 | self.video_url = url 28 | 29 | 30 | class ExtVideoDownloadError(ExtVideoError): 31 | """Raised when the video download fails.""" 32 | 33 | def __init__(self, url: str) -> None: 34 | self.video_url = url 35 | 36 | 37 | class ExtVideoInterruptedByUserError(ExtVideoError): 38 | """Raised when the user interrupts the download (Ctrl+C).""" 39 | 40 | 41 | @dataclass(slots=True) 42 | class ExternalVideoDownloadStatus: 43 | """Status payload for reporting external video download progress.""" 44 | 45 | name: str 46 | total_bytes: int | None 47 | downloaded_bytes: int | None 48 | speed: float | None 49 | percentage: float 50 | delta_bytes: int 51 | 52 | 53 | @dataclass(slots=True) 54 | class _HookState: 55 | """Internal state holder for tracking the status of an external video download.""" 56 | 57 | last_downloaded: int = 0 58 | final_filename: Path | None = None 59 | 60 | 61 | class _SilentLogger: 62 | """ 63 | Silly hack for yt-dlp to supress any noisy logging output. 64 | 65 | For logging use ExternalVideoDownloadStatus with progress callback. 66 | And for errors use the downloader exceptions. 67 | """ 68 | 69 | def debug(self, msg: str) -> None: 70 | pass 71 | 72 | def info(self, msg: str) -> None: 73 | pass 74 | 75 | def warning(self, msg: str) -> None: 76 | pass 77 | 78 | def error(self, msg: str) -> None: 79 | pass 80 | 81 | def critical(self, msg: str) -> None: 82 | pass 83 | 84 | 85 | class ExternalVideosDownloader: 86 | """Manager for downloading external videos (YouTube, Vimeo) with a 720p preference.""" 87 | 88 | # Prefer 720p when available, otherwise choose the best >720 89 | _default_ydl_options: ClassVar[YtDlOptions] = { 90 | 'format': 'bv*[height=720]+ba/bv*[height>720]+ba/bv*+ba/b', 91 | 'quiet': True, 92 | 'no_warnings': True, 93 | 'no_color': True, 94 | 'noprogress': True, # Use progress hook instead 95 | 'logger': _SilentLogger(), # Suppress noisy error logging 96 | } 97 | 98 | def download_video( 99 | self, 100 | url: str, 101 | destination_directory: Path, 102 | progress_hook: ExternalVideoDownloadProgressHook | None = None, 103 | ) -> Path: 104 | """Download video using yt-dlp and repeatedly report progress via progress_hook callback until completion.""" 105 | info = self._probe_video(url) 106 | title = info.get('title') 107 | if not isinstance(title, str) or not title.strip(): 108 | raise ExtVideoInfoError(url) 109 | 110 | clean_title = self._sanitize_title(title) 111 | destination_directory.mkdir(parents=True, exist_ok=True) 112 | 113 | outtmpl = self._build_outtmpl(destination_directory, clean_title) 114 | 115 | state = _HookState() 116 | internal_hook = self._make_progress_hook(outtmpl, progress_hook, state) 117 | 118 | options: YtDlOptions = self._default_ydl_options.copy() 119 | options['outtmpl'] = outtmpl 120 | options['progress_hooks'] = [internal_hook] 121 | 122 | try: 123 | with YoutubeDL(params=options) as ydl: 124 | try: 125 | # yt-dlp isn't typed; cast to Any and coerce to int 126 | errors: int = int(cast('Any', ydl).download([url])) 127 | except KeyboardInterrupt as e: 128 | raise ExtVideoInterruptedByUserError from e 129 | 130 | if errors != 0: 131 | raise ExtVideoDownloadError(url) 132 | 133 | except DownloadError as e: 134 | raise ExtVideoError(url) from e 135 | 136 | if state.final_filename is not None: 137 | return state.final_filename 138 | 139 | ext = info.get('ext') 140 | guessed_ext = ext if isinstance(ext, str) and ext else 'mp4' 141 | return destination_directory / f'{clean_title}.{guessed_ext}' 142 | 143 | def _probe_video(self, url: str) -> dict[str, Any]: 144 | # Extract metadata without downloading to validate and fetch title/ext. 145 | try: 146 | with YoutubeDL({**self._default_ydl_options, 'skip_download': True}) as ydl: 147 | raw = cast('Any', ydl).extract_info(url, download=False) 148 | except DownloadError as e: 149 | raise ExtVideoInfoError(url) from e 150 | 151 | if not isinstance(raw, dict): 152 | raise ExtVideoInfoError(url) 153 | return cast('dict[str, Any]', raw) 154 | 155 | @staticmethod 156 | def _sanitize_title(text: str) -> str: 157 | # Cross-platform safe subset. 158 | return ''.join(ch for ch in text if ch.isalnum() or ch == ' ') 159 | 160 | @staticmethod 161 | def _build_outtmpl(destination_directory: Path, title: str) -> str: 162 | return str(destination_directory / f'{title}.%(ext)s') 163 | 164 | def _make_progress_hook( 165 | self, 166 | outtmpl: str, 167 | user_hook: ExternalVideoDownloadProgressHook | None, 168 | state: _HookState, 169 | ) -> Callable[[dict[str, Any]], None]: 170 | def _hook(d: dict[str, Any]) -> None: 171 | filename = d.get('filename') or d.get('tmpfilename') or outtmpl 172 | name = Path(str(filename)).name 173 | 174 | total = d.get('total_bytes') or d.get('total_bytes_estimate') 175 | downloaded = d.get('downloaded_bytes') 176 | speed = d.get('speed') 177 | 178 | total_i = int(total) if isinstance(total, (int, float)) else None 179 | downloaded_i = ( 180 | int(downloaded) if isinstance(downloaded, (int, float)) else None 181 | ) 182 | speed_f = float(speed) if isinstance(speed, (int, float)) else None 183 | 184 | if total_i and downloaded_i is not None and total_i > 0: 185 | percentage = (downloaded_i / total_i) * 100.0 186 | else: 187 | percentage = 0.0 188 | 189 | if downloaded_i is not None: 190 | delta = downloaded_i - state.last_downloaded 191 | state.last_downloaded = downloaded_i 192 | else: 193 | delta = 0 194 | 195 | status_payload = ExternalVideoDownloadStatus( 196 | name=name, 197 | total_bytes=total_i, 198 | downloaded_bytes=downloaded_i, 199 | speed=speed_f, 200 | percentage=percentage, 201 | delta_bytes=delta, 202 | ) 203 | 204 | if user_hook is not None: 205 | with contextlib.suppress(Exception): 206 | user_hook(status_payload) 207 | 208 | if d.get('status') in {'finished', 'postprocessing'}: 209 | f = d.get('filename') 210 | if isinstance(f, str): 211 | state.final_filename = Path(f) 212 | 213 | return _hook 214 | -------------------------------------------------------------------------------- /boosty_downloader/src/infrastructure/post_caching/post_cache.py: -------------------------------------------------------------------------------- 1 | """Implementation of a post cache using SQLAlchemy + SQLite local database.""" 2 | 3 | from datetime import datetime 4 | from pathlib import Path 5 | from types import TracebackType 6 | 7 | from sqlalchemy import String, create_engine, text 8 | from sqlalchemy.exc import DatabaseError, OperationalError 9 | from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column, sessionmaker 10 | 11 | from boosty_downloader.src.application.filtering import ( 12 | DownloadContentTypeFilter, 13 | ) 14 | from boosty_downloader.src.infrastructure.loggers.base import RichLogger 15 | 16 | 17 | class Base(DeclarativeBase): 18 | """Base class for SQLAlchemy models.""" 19 | 20 | 21 | class _PostCacheEntryModel(Base): 22 | """Internal sqlite table structure of the caching layer""" 23 | 24 | __tablename__ = 'post_cache' 25 | _Iso8601Datetime = str 26 | 27 | post_uuid: Mapped[str] = mapped_column(String, primary_key=True) 28 | 29 | # Flags to see which parts of the posts were downloaded and which are not. 30 | files_downloaded: Mapped[bool] = mapped_column(default=False, nullable=False) 31 | post_content_downloaded: Mapped[bool] = mapped_column(default=False, nullable=False) 32 | external_videos_downloaded: Mapped[bool] = mapped_column( 33 | default=False, nullable=False 34 | ) 35 | boosty_videos_downloaded: Mapped[bool] = mapped_column( 36 | default=False, nullable=False 37 | ) 38 | 39 | # Timestamp of the last update of the post. 40 | # Useful to determine if the post is outdated and needs to be re-downloaded even if some parts were downloaded before. 41 | # 42 | # Should be in ISO 8601 format (e.g., "2023-10-01T12:00:00Z"). 43 | # because SQLite does not have a native tz-aware datetime type. 44 | last_updated_timestamp: Mapped[_Iso8601Datetime] = mapped_column( 45 | String, nullable=False 46 | ) 47 | 48 | 49 | class SQLitePostCache: 50 | """ 51 | Post cache using SQLite with SQLAlchemy. 52 | 53 | Caches posts in a local SQLite database under a given directory. 54 | Automatically reinitializes the database if it's missing or corrupted. 55 | 56 | Caching mechanism is smart enough to determine which specific parts are up-to-date 57 | and which are not. 58 | """ 59 | 60 | DEFAULT_CACHE_FILENAME = 'post_cache.db' 61 | 62 | def __enter__(self) -> 'SQLitePostCache': 63 | """Create a context manager for the SQLitePostCache.""" 64 | return self 65 | 66 | def __exit__( 67 | self, 68 | exc_type: type[BaseException] | None, 69 | exc_value: BaseException | None, 70 | exc_tb: TracebackType | None, 71 | ) -> None: 72 | """Ensure that the database connection is closed when exiting the context.""" 73 | self.close() 74 | 75 | def __init__(self, destination: Path, logger: RichLogger) -> None: 76 | """Make a connection with the SQLite database and create/init it if necessary.""" 77 | self.logger = logger 78 | 79 | self.destination = destination 80 | self.db_file: Path = self.destination / self.DEFAULT_CACHE_FILENAME 81 | self.db_file.parent.mkdir(parents=True, exist_ok=True) 82 | 83 | self.engine = create_engine(f'sqlite:///{self.db_file}') 84 | Base.metadata.create_all(self.engine) 85 | 86 | self.Session = sessionmaker(bind=self.engine, expire_on_commit=False) 87 | self.session: Session = self.Session() 88 | self._dirty = False 89 | 90 | def _check_db_integrity(self) -> bool: 91 | """Check if post_cache table is available and the db itself is accessible.""" 92 | try: 93 | # Ping the database to check if it's accessible 94 | self.session.execute(text('SELECT 1 FROM post_cache LIMIT 1')) 95 | # Ensure the expected schema (column names) is present; reinit if legacy schema is detected 96 | self.session.execute(text('SELECT post_uuid FROM post_cache LIMIT 1')) 97 | except (OperationalError, DatabaseError): 98 | return False 99 | else: 100 | return True 101 | 102 | def _reinitialize_db(self) -> None: 103 | """Reinitialize the database (recreate it from scratch) and recreate session.""" 104 | self.session.close() 105 | self.engine.dispose() 106 | 107 | if self.db_file.exists(): 108 | self.db_file.unlink() # Remove the corrupted file 109 | 110 | self.engine = create_engine(f'sqlite:///{self.db_file}') 111 | Base.metadata.create_all(self.engine) 112 | self.session = self.Session() 113 | 114 | def _ensure_valid(self) -> None: 115 | """Maintenance method to ensure the database is valid before use.""" 116 | if not self._check_db_integrity(): 117 | self.logger.error( 118 | 'Post cache database is corrupted or inaccessible. Reinitializing...' 119 | ) 120 | self._reinitialize_db() 121 | 122 | def commit(self) -> None: 123 | """ 124 | Commit any pending changes to the database if there are modifications. 125 | 126 | This method should be called after making changes to the database (e.g., adding, 127 | updating, or deleting records) to ensure that the changes are persisted. 128 | The `_dirty` flag is used to track whether there are uncommitted changes. 129 | """ 130 | if self._dirty: 131 | self.session.commit() 132 | self._dirty = False 133 | 134 | def cache( 135 | self, 136 | post_uuid: str, 137 | updated_at: datetime, 138 | was_downloaded: list[DownloadContentTypeFilter], 139 | ) -> None: 140 | """Cache a post by its UUID and updated_at timestamp.""" 141 | self._ensure_valid() 142 | 143 | entry = self.session.get(_PostCacheEntryModel, post_uuid) 144 | 145 | files_downloaded = DownloadContentTypeFilter.files in was_downloaded 146 | boosty_videos_downloaded = ( 147 | DownloadContentTypeFilter.boosty_videos in was_downloaded 148 | ) 149 | post_content_downloaded = ( 150 | DownloadContentTypeFilter.post_content in was_downloaded 151 | ) 152 | external_videos_downloaded = ( 153 | DownloadContentTypeFilter.external_videos in was_downloaded 154 | ) 155 | 156 | # If post already existed - just update False fields to True. 157 | if entry: 158 | entry.last_updated_timestamp = updated_at.isoformat() 159 | entry.files_downloaded = files_downloaded or entry.files_downloaded 160 | entry.boosty_videos_downloaded = ( 161 | boosty_videos_downloaded or entry.boosty_videos_downloaded 162 | ) 163 | entry.post_content_downloaded = ( 164 | post_content_downloaded or entry.post_content_downloaded 165 | ) 166 | entry.external_videos_downloaded = ( 167 | external_videos_downloaded or entry.external_videos_downloaded 168 | ) 169 | else: 170 | entry = _PostCacheEntryModel( 171 | post_uuid=post_uuid, 172 | last_updated_timestamp=updated_at.isoformat(), 173 | files_downloaded=files_downloaded, 174 | boosty_videos_downloaded=boosty_videos_downloaded, 175 | post_content_downloaded=post_content_downloaded, 176 | external_videos_downloaded=external_videos_downloaded, 177 | ) 178 | self.session.add(entry) 179 | 180 | self._dirty = True 181 | 182 | def get_missing_parts( 183 | self, 184 | post_uuid: str, 185 | updated_at: datetime, 186 | required: list[DownloadContentTypeFilter], 187 | ) -> list[DownloadContentTypeFilter]: 188 | """ 189 | Determine which parts of the post still need to be downloaded. 190 | 191 | Returns all required parts if the post is missing or outdated; otherwise, returns only those parts that haven't been 192 | downloaded yet based on the current cache state. 193 | """ 194 | self._ensure_valid() 195 | post = self.session.get(_PostCacheEntryModel, post_uuid) 196 | if not post: 197 | return required 198 | 199 | # If cached post is outdated in general, just mark all required parts as missing. 200 | if datetime.fromisoformat(post.last_updated_timestamp) < updated_at: 201 | return required 202 | 203 | missing: list[DownloadContentTypeFilter] = [ 204 | part 205 | for part in required 206 | if ( 207 | (part is DownloadContentTypeFilter.files and not post.files_downloaded) 208 | or ( 209 | part is DownloadContentTypeFilter.boosty_videos 210 | and not post.boosty_videos_downloaded 211 | ) 212 | or ( 213 | part is DownloadContentTypeFilter.external_videos 214 | and not post.external_videos_downloaded 215 | ) 216 | or ( 217 | part is DownloadContentTypeFilter.post_content 218 | and not post.post_content_downloaded 219 | ) 220 | ) 221 | ] 222 | 223 | return missing 224 | 225 | def remove_cache_completely(self) -> None: 226 | """Reinitialize the cache completely in case if user wants to start fresh.""" 227 | self._reinitialize_db() 228 | 229 | def close(self) -> None: 230 | """Save and close the database connection.""" 231 | self.commit() 232 | self.session.close() 233 | self.engine.dispose() 234 | -------------------------------------------------------------------------------- /boosty_downloader/src/interfaces/console_progress_reporter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Progress reporting and logging utilities for console-based Boosty downloader interface. 3 | 4 | Includes a ProgressReporter class for rich progress bars and logging, and a FakeDownloader for demonstration/testing. 5 | """ 6 | 7 | import asyncio 8 | import logging 9 | import secrets 10 | import uuid 11 | from collections.abc import AsyncGenerator, Sequence 12 | from contextlib import asynccontextmanager 13 | 14 | from rich.console import Console 15 | from rich.logging import RichHandler 16 | from rich.progress import ( 17 | BarColumn, 18 | Progress, 19 | SpinnerColumn, 20 | TaskID, 21 | TaskProgressColumn, 22 | TimeElapsedColumn, 23 | ) 24 | 25 | from boosty_downloader.src.infrastructure.loggers.base import RichLogger 26 | 27 | 28 | class ProgressReporter: 29 | """ 30 | Provides progress bar management and rich logging for console-based interfaces using the Rich library. 31 | 32 | Tasks are identified by UUIDs and can be nested using `level` to visually indent sub-tasks. 33 | """ 34 | 35 | def __init__( 36 | self, 37 | console: Console | None = None, 38 | logger: logging.Logger | None = None, 39 | ) -> None: 40 | self.console = console or Console() 41 | self.progress = Progress( 42 | SpinnerColumn(), 43 | '[progress.description]{task.description}', 44 | BarColumn(), 45 | TaskProgressColumn(), 46 | TimeElapsedColumn(), 47 | console=self.console, 48 | refresh_per_second=29, 49 | transient=True, 50 | ) 51 | self._logger = logger or self._create_default_logger() 52 | self._uuid_to_task_id: dict[uuid.UUID, TaskID] = {} 53 | self._uuid_to_level: dict[uuid.UUID, int] = {} 54 | self._uuid_to_name: dict[uuid.UUID, str] = {} 55 | 56 | def _create_default_logger(self) -> logging.Logger: 57 | logger = logging.getLogger('ProgressLogger') 58 | logger.setLevel(logging.INFO) 59 | logger.addHandler( 60 | RichHandler( 61 | console=self.console, show_time=True, markup=True, show_path=False 62 | ) 63 | ) 64 | return logger 65 | 66 | def _format_description(self, name: str, level: int) -> str: 67 | indent = ' ' * level 68 | max_length = 80 69 | available = max_length - len(indent) 70 | 71 | if len(name) > available: 72 | name = name[: available - 1] + '…' # use ellipsis 73 | 74 | return f'{indent}{name}' 75 | 76 | def start(self) -> None: 77 | self.progress.start() 78 | 79 | def stop(self) -> None: 80 | self.progress.stop() 81 | 82 | def create_task( 83 | self, name: str, total: int | None = None, indent_level: int = 0 84 | ) -> uuid.UUID: 85 | task_id = self.progress.add_task( 86 | self._format_description(name, indent_level), total=total 87 | ) 88 | task_uuid = uuid.uuid4() 89 | self._uuid_to_task_id[task_uuid] = task_id 90 | self._uuid_to_level[task_uuid] = indent_level 91 | self._uuid_to_name[task_uuid] = name 92 | return task_uuid 93 | 94 | def update_task( 95 | self, 96 | task_uuid: uuid.UUID, 97 | advance: int = 1, 98 | total: int | None = None, 99 | description: str | None = None, 100 | ) -> None: 101 | task_id = self._uuid_to_task_id.get(task_uuid) 102 | if task_id is not None and task_id in self.progress.task_ids: 103 | level = self._uuid_to_level.get(task_uuid, 0) 104 | base_name = description or self._uuid_to_name.get(task_uuid, '') 105 | formatted_description = self._format_description(base_name, level) 106 | self.progress.update( 107 | task_id, 108 | advance=advance, 109 | total=total, 110 | description=formatted_description, 111 | ) 112 | 113 | def complete_task(self, task_uuid: uuid.UUID) -> None: 114 | task_id = self._uuid_to_task_id.get(task_uuid) 115 | if task_id is not None and task_id in self.progress.task_ids: 116 | total = self.progress.tasks[task_id].total 117 | self.progress.update(task_id, completed=total, visible=False) 118 | self._uuid_to_task_id.pop(task_uuid, None) 119 | self._uuid_to_level.pop(task_uuid, None) 120 | self._uuid_to_name.pop(task_uuid, None) 121 | 122 | def newline(self, count: int = 1) -> None: 123 | for _ in range(count): 124 | self.console.print() 125 | 126 | def headline_rule(self) -> None: 127 | self.console.rule() 128 | 129 | def info(self, message: str) -> None: 130 | self._logger.info(message) 131 | 132 | def success(self, message: str) -> None: 133 | self._logger.info(f'[bold green]✔ {message}[/bold green]') 134 | 135 | def warn(self, message: str) -> None: 136 | self._logger.warning(f'[bold yellow]⚠ {message}[/bold yellow]') 137 | 138 | def error(self, message: str) -> None: 139 | self._logger.error(f'[bold red]✖ {message}[/bold red]') 140 | 141 | def notice(self, message: str) -> None: 142 | self.console.print( 143 | f'[bold yellow]NOTICE:[/bold yellow] {message}', highlight=False 144 | ) 145 | 146 | def log_list(self, title: str, items: Sequence[str]) -> None: 147 | self.console.print(f'[bold cyan]{title}[/bold cyan]:') 148 | for item in items: 149 | self.console.print(f' • {item}') 150 | 151 | 152 | @asynccontextmanager 153 | async def use_reporter( 154 | reporter: ProgressReporter, 155 | ) -> AsyncGenerator[ProgressReporter, None]: 156 | """Async context manager to start and stop a ProgressReporter instance.""" 157 | try: 158 | reporter.start() 159 | yield reporter 160 | finally: 161 | reporter.stop() 162 | 163 | 164 | # ------------------------------------------------------------------------------ 165 | # Usage example: run it as a script to see how it works: 166 | # poetry run boosty_downloader .../console_progress_reporter.py 167 | 168 | if __name__ == '__main__': 169 | import asyncio 170 | 171 | class FakeDownloader: 172 | """Just Stupid faker""" 173 | 174 | def __init__(self, reporter: ProgressReporter) -> None: 175 | self.reporter = reporter 176 | 177 | async def iterate_pages( 178 | self, total_pages: int = 3, posts_per_page: int = 5 179 | ) -> AsyncGenerator[list[str], None]: 180 | """Simulate stuff""" 181 | for page_num in range(1, total_pages + 1): 182 | await asyncio.sleep(0.5) 183 | posts = [ 184 | f'post_{(page_num - 1) * posts_per_page + i + 1:02}' 185 | for i in range(posts_per_page) 186 | ] 187 | yield posts 188 | 189 | async def download_file(self, task_name: str, size_kb: int) -> None: 190 | """Simulate downloading a file of size size_kb KB with progress""" 191 | chunk_size = 50 192 | total_chunks = (size_kb + chunk_size - 1) // chunk_size 193 | download_task_id = self.reporter.create_task(task_name, total=total_chunks) 194 | 195 | for chunk in range(total_chunks): 196 | # Simulate delay proportional to chunk size 197 | await asyncio.sleep(secrets.randbelow(11) / 100 + 0.05) 198 | self.reporter.update_task( 199 | download_task_id, 200 | advance=1, 201 | description=f'{task_name} [{min((chunk + 1) * chunk_size, size_kb)} KB / {size_kb} KB]', 202 | ) 203 | self.reporter.complete_task(download_task_id) 204 | 205 | async def download_all_posts(self, username: str) -> None: 206 | """Simulate downloading all posts for a user with progress reporting""" 207 | self.reporter.notice(f'Starting download for user: {username}') 208 | self.reporter.headline_rule() 209 | 210 | total_posts = None 211 | download_task_id = self.reporter.create_task('posts', total=total_posts) 212 | 213 | downloaded_posts = 0 214 | 215 | async for posts in self.iterate_pages(): 216 | self.reporter.info(f'Loaded new page with {len(posts)} posts') 217 | 218 | for post_title in posts: 219 | self.reporter.info(f'Processing post: {post_title}') 220 | 221 | if secrets.randbelow(10) == 0: 222 | self.reporter.warn(f'Skipping inaccessible post: {post_title}') 223 | self.reporter.update_task(download_task_id, advance=1) 224 | continue 225 | 226 | files = { 227 | 'image_1': secrets.randbelow(201) + 100, # 100-300 KB 228 | 'video_1': secrets.randbelow(1501) + 1000, # 1-2.5 MB 229 | 'attachment_1': secrets.randbelow(301) + 200, # 200-500 KB 230 | } 231 | 232 | for fname, size_kb in files.items(): 233 | task_name = f'{post_title}::{fname}' 234 | await self.download_file(task_name, size_kb) 235 | self.reporter.success(f'Finished {fname} of {post_title}') 236 | 237 | downloaded_posts += 1 238 | self.reporter.update_task(download_task_id, advance=1) 239 | 240 | self.reporter.headline_rule() 241 | 242 | self.reporter.success(f'✅ Finished downloading {downloaded_posts} posts.') 243 | 244 | async def main() -> None: 245 | """Run a demonstration of the FakeDownloader with progress reporting.""" 246 | logger = RichLogger('dumb') 247 | 248 | reporter = ProgressReporter( 249 | logger=logger.logging_logger_obj, 250 | console=logger.console, 251 | ) 252 | async with use_reporter(reporter): 253 | downloader = FakeDownloader(reporter) 254 | await downloader.download_all_posts('demo_user') 255 | 256 | asyncio.run(main()) 257 | --------------------------------------------------------------------------------