├── test
├── unit
│ ├── __init__.py
│ ├── download_manager
│ │ └── ok_video_ranking_test.py
│ └── html_generator
│ │ └── html_templates_test.py
├── integration
│ ├── __init__.py
│ ├── analysis
│ │ ├── __init__.py
│ │ └── get_author_posts_test.py
│ ├── boosty_api
│ │ ├── __init__.py
│ │ └── boosty_api_test.py
│ ├── .env.example
│ ├── configuration.py
│ └── fixtures.py
└── ABOUT_TESTING.md
├── boosty_downloader
├── __init__.py
└── src
│ ├── __init__.py
│ ├── domain
│ ├── __init__.py
│ ├── post.py
│ └── post_data_chunks.py
│ ├── application
│ ├── __init__.py
│ ├── di
│ │ ├── __init__.py
│ │ ├── download_context.py
│ │ └── app_environment.py
│ ├── exceptions
│ │ ├── __init__.py
│ │ └── application_errors.py
│ ├── use_cases
│ │ ├── __init__.py
│ │ ├── check_total_posts.py
│ │ ├── download_specific_post.py
│ │ └── download_all_posts.py
│ ├── mappers
│ │ ├── image.py
│ │ ├── file.py
│ │ ├── external_video.py
│ │ ├── __init__.py
│ │ ├── ok_boosty_video.py
│ │ ├── list.py
│ │ ├── html_converter.py
│ │ ├── post_mapper.py
│ │ └── link_header_text.py
│ ├── filtering.py
│ └── ok_video_ranking.py
│ ├── infrastructure
│ ├── __init__.py
│ ├── loggers
│ │ ├── __init__.py
│ │ ├── logger_instances.py
│ │ ├── failed_downloads_logger.py
│ │ └── base.py
│ ├── html_reporter
│ │ ├── __init__.py
│ │ └── html_reporter.py
│ ├── post_caching
│ │ ├── __init__.py
│ │ └── post_cache.py
│ ├── update_checker
│ │ ├── __init__.py
│ │ └── pypi_checker.py
│ ├── boosty_api
│ │ ├── core
│ │ │ ├── __init__.py
│ │ │ ├── endpoints.py
│ │ │ └── client.py
│ │ ├── models
│ │ │ ├── __init__.py
│ │ │ └── post
│ │ │ │ ├── __init__.py
│ │ │ │ ├── post_data_types
│ │ │ │ ├── post_data_header.py
│ │ │ │ ├── post_data_video.py
│ │ │ │ ├── post_data_file.py
│ │ │ │ ├── post_data_link.py
│ │ │ │ ├── post_data_text.py
│ │ │ │ ├── post_data_image.py
│ │ │ │ ├── __init__.py
│ │ │ │ ├── post_data_list.py
│ │ │ │ └── post_data_ok_video.py
│ │ │ │ ├── posts_request.py
│ │ │ │ ├── extra.py
│ │ │ │ ├── post.py
│ │ │ │ └── base_post_data.py
│ │ ├── utils
│ │ │ ├── __init__.py
│ │ │ ├── filter_none_params.py
│ │ │ ├── textual_post_extractor.py
│ │ │ └── auth_parsers.py
│ │ └── __init__.py
│ ├── yaml_configuration
│ │ ├── __init__.py
│ │ ├── sample_config.py
│ │ └── config.py
│ ├── external_videos_downloader
│ │ ├── __init__.py
│ │ └── external_videos_downloader.py
│ ├── html_generator
│ │ ├── templates
│ │ │ ├── image.html
│ │ │ ├── video.html
│ │ │ ├── list.html
│ │ │ ├── text.html
│ │ │ └── base.html
│ │ ├── __init__.py
│ │ ├── models.py
│ │ └── renderer.py
│ ├── path_sanitizer.py
│ ├── human_readable_filesize.py
│ └── file_downloader.py
│ └── interfaces
│ ├── __init__.py
│ ├── help_panels.py
│ ├── cli_options.py
│ └── console_progress_reporter.py
├── assets
├── usage.png
├── example1.png
├── example2.png
├── auth_guide.png
├── config_guide.png
├── screenshot.png
├── total_check.png
└── boosty-black-badge.png
├── .github
├── renovate.json
├── pull_request_template.md
└── workflows
│ ├── any-pr-validation.yaml
│ ├── release-pr-validation.yaml
│ └── release.yaml
├── pyrightconfig.json
├── ruff.toml
├── LICENSE
├── pyproject.toml
├── CONTRIBUTING.md
├── Makefile
├── CHANGELOG.md
├── README.md
└── .gitignore
/test/unit/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/integration/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/integration/analysis/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/domain/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/integration/boosty_api/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/di/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/interfaces/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/exceptions/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/use_cases/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/loggers/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_reporter/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/post_caching/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/update_checker/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/core/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/yaml_configuration/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/external_videos_downloader/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assets/usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/usage.png
--------------------------------------------------------------------------------
/assets/example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/example1.png
--------------------------------------------------------------------------------
/assets/example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/example2.png
--------------------------------------------------------------------------------
/assets/auth_guide.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/auth_guide.png
--------------------------------------------------------------------------------
/assets/config_guide.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/config_guide.png
--------------------------------------------------------------------------------
/assets/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/screenshot.png
--------------------------------------------------------------------------------
/assets/total_check.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/total_check.png
--------------------------------------------------------------------------------
/assets/boosty-black-badge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/boosty-black-badge.png
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/templates/image.html:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/__init__.py:
--------------------------------------------------------------------------------
1 | from .core.client import BoostyAPIClient
2 |
3 | __all__ = [
4 | 'BoostyAPIClient',
5 | ]
6 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/core/endpoints.py:
--------------------------------------------------------------------------------
1 | """All constants for endpoints."""
2 |
3 | BOOSTY_DEFAULT_BASE_URL = 'https://api.boosty.to/v1/'
4 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/templates/video.html:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/loggers/logger_instances.py:
--------------------------------------------------------------------------------
1 | """Module contains loggers for different parts of the app"""
2 |
3 | from boosty_downloader.src.infrastructure.loggers.base import RichLogger
4 |
5 | downloader_logger = RichLogger('Boosty_Downloader')
6 |
--------------------------------------------------------------------------------
/boosty_downloader/src/interfaces/help_panels.py:
--------------------------------------------------------------------------------
1 | """Defines panels for grouping arguments in the CLI help interface."""
2 |
3 | from enum import Enum
4 |
5 |
6 | class HelpPanels(str, Enum):
7 | """Panels for grouping arguments in the CLI help."""
8 |
9 | actions = 'Actions'
10 | filtering = 'Filtering'
11 | network = 'Network'
12 |
--------------------------------------------------------------------------------
/.github/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json",
3 | "extends": [
4 | "config:recommended"
5 | ],
6 |
7 | "dependencyDashboard": true,
8 |
9 | "updateLockFiles": true,
10 |
11 | "automerge": false,
12 | "autoApprove": false,
13 | "platformAutomerge": false,
14 | "baseBranches": ["dev"]
15 | }
16 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/yaml_configuration/sample_config.py:
--------------------------------------------------------------------------------
1 | """Helper functions for working with invalid values in the config."""
2 |
3 | DEFAULT_YAML_CONFIG_VALUE = """
4 | auth:
5 | # Insert your own cookie and auth header values here
6 | cookie: ''
7 | auth_header: ''
8 | downloading_settings:
9 | target_directory: ./boosty-downloads
10 | """
11 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_header.py:
--------------------------------------------------------------------------------
1 | """Header of the posts"""
2 |
3 | from typing import Literal
4 |
5 | from pydantic import BaseModel
6 |
7 |
8 | class BoostyPostDataHeaderDTO(BaseModel):
9 | """Header content piece in posts"""
10 |
11 | type: Literal['header']
12 | content: str
13 | modificator: str
14 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_video.py:
--------------------------------------------------------------------------------
1 | """Usual video links (on youtube and other services)"""
2 |
3 | from typing import Literal
4 |
5 | from pydantic import BaseModel
6 |
7 |
8 | class BoostyPostDataExternalVideoDTO(BaseModel):
9 | """Video content piece in posts"""
10 |
11 | type: Literal['video']
12 | url: str
13 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/utils/filter_none_params.py:
--------------------------------------------------------------------------------
1 | """Just a little helper to make requests"""
2 |
3 | from __future__ import annotations
4 |
5 | from typing import Any
6 |
7 |
8 | def filter_none_params(kwargs: dict[str, Any | None]) -> dict[str, Any]:
9 | """Remove None values from kwargs"""
10 | return {k: v for k, v in kwargs.items() if v is not None}
11 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_file.py:
--------------------------------------------------------------------------------
1 | """The module with file representation of posts data"""
2 |
3 | from typing import Literal
4 |
5 | from pydantic import BaseModel
6 |
7 |
8 | class BoostyPostDataFileDTO(BaseModel):
9 | """File content piece in posts"""
10 |
11 | type: Literal['file']
12 | url: str
13 | title: str
14 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_link.py:
--------------------------------------------------------------------------------
1 | """Module with link representation of posts data"""
2 |
3 | from typing import Literal
4 |
5 | from pydantic import BaseModel
6 |
7 |
8 | class BoostyPostDataLinkDTO(BaseModel):
9 | """Link content piece in posts"""
10 |
11 | type: Literal['link']
12 | url: str
13 | content: str
14 | explicit: bool
15 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_text.py:
--------------------------------------------------------------------------------
1 | """The module with textual representation of posts data"""
2 |
3 | from typing import Literal
4 |
5 | from pydantic import BaseModel
6 |
7 |
8 | class BoostyPostDataTextDTO(BaseModel):
9 | """Textual content piece in posts"""
10 |
11 | type: Literal['text']
12 |
13 | content: str
14 | modificator: str
15 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/path_sanitizer.py:
--------------------------------------------------------------------------------
1 | """The modules helps with path sanitization to make it work on different platforms"""
2 |
3 | import re
4 |
5 |
6 | def sanitize_string(string: str) -> str:
7 | """Remove unsafe filesystem characters from a string"""
8 | # Convert path to a string and sanitize it
9 | unsafe_chars = r'[<>:"/\\|?*]'
10 | return re.sub(unsafe_chars, '', str(string))
11 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_image.py:
--------------------------------------------------------------------------------
1 | """The module with image representation of posts data"""
2 |
3 | from typing import Literal
4 |
5 | from pydantic import BaseModel
6 |
7 |
8 | class BoostyPostDataImageDTO(BaseModel):
9 | """Image content piece in posts"""
10 |
11 | type: Literal['image']
12 | url: str
13 | width: int | None = None
14 | height: int | None = None
15 |
--------------------------------------------------------------------------------
/pyrightconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "typeCheckingMode": "strict",
3 | "include": [
4 | "boosty_downloader"
5 | ],
6 | "exclude": [
7 | "**/node_modules",
8 | "**/__pycache__",
9 | "**/dist",
10 | "**/build"
11 | ],
12 | "defineConstant": {
13 | "DEBUG": true
14 | },
15 | "reportMissingImports": "error",
16 | "reportMissingTypeStubs": false,
17 | "pythonVersion": "3.10",
18 | "pythonPlatform": "Windows",
19 | }
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/posts_request.py:
--------------------------------------------------------------------------------
1 | """Models for posts responses to boosty.to"""
2 |
3 | from pydantic import BaseModel
4 |
5 | from boosty_downloader.src.infrastructure.boosty_api.models.post.extra import Extra
6 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post import PostDTO
7 |
8 |
9 | class PostsResponse(BaseModel):
10 | """Model representing a response from a posts request"""
11 |
12 | posts: list[PostDTO]
13 | extra: Extra
14 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/extra.py:
--------------------------------------------------------------------------------
1 | """Models for meta info about posts or requests to boosty.to"""
2 |
3 | from pydantic import BaseModel, ConfigDict
4 | from pydantic.alias_generators import to_camel
5 |
6 |
7 | class Extra(BaseModel):
8 | """Meta info for posts request, can be used for pagination mainly"""
9 |
10 | is_last: bool
11 | offset: str
12 |
13 | model_config = ConfigDict(
14 | alias_generator=to_camel,
15 | populate_by_name=True,
16 | from_attributes=True,
17 | )
18 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/image.py:
--------------------------------------------------------------------------------
1 | """Image content mapper module to transform Boosty API DTO to domain model."""
2 |
3 | from boosty_downloader.src.domain.post import PostDataChunkImage
4 | from boosty_downloader.src.infrastructure.boosty_api.models.post.base_post_data import (
5 | BoostyPostDataImageDTO,
6 | )
7 |
8 |
9 | def to_domain_image_chunk(api_image: BoostyPostDataImageDTO) -> PostDataChunkImage:
10 | """Convert API PostDataImage to domain PostDataChunkImage."""
11 | return PostDataChunkImage(
12 | url=api_image.url,
13 | )
14 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/file.py:
--------------------------------------------------------------------------------
1 | """Mapping functions for converting API PostDataFile objects to domain PostDataChunkFile objects."""
2 |
3 | from boosty_downloader.src.domain.post import PostDataChunkFile
4 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import (
5 | BoostyPostDataFileDTO,
6 | )
7 |
8 |
9 | def to_domain_file_chunk(
10 | api_file: BoostyPostDataFileDTO, signed_query: str
11 | ) -> PostDataChunkFile:
12 | """Convert API PostDataFile to domain PostDataChunkFile."""
13 | return PostDataChunkFile(
14 | url=api_file.url + signed_query,
15 | filename=api_file.title,
16 | )
17 |
--------------------------------------------------------------------------------
/test/integration/.env.example:
--------------------------------------------------------------------------------
1 |
2 | # Valid Boosty authentication token
3 | BOOSTY_TOKEN=your_boosty_token_here
4 |
5 | # Cookies for Boosty authentication (if required)
6 | BOOSTY_COOKIES=your_boosty_cookies_here
7 |
8 | # URL or ID of a post that is publicly accessible
9 | BOOSTY_AVAILABLE_POST=https://boosty.to/author/posts/12345
10 |
11 | # URL or ID of a post that exists but is behind a paywall or private
12 | BOOSTY_UNAVAILABLE_POST=https://boosty.to/author/posts/67890
13 |
14 | # Username of an author that doesn't exist
15 | BOOSTY_NONEXISTENT_AUTHOR=nonexistent_author_username
16 |
17 | # Username of an existing author with public content
18 | BOOSTY_EXISTING_AUTHOR=existing_author_username
19 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/utils/textual_post_extractor.py:
--------------------------------------------------------------------------------
1 | """Module to extract textual content from a post by its chunks"""
2 |
3 | from __future__ import annotations
4 |
5 | import json
6 | from io import StringIO
7 |
8 |
9 | def extract_textual_content(
10 | content: str,
11 | ) -> str:
12 | """Extract textual content from a post chunk Link/Text"""
13 | buffer = StringIO()
14 |
15 | # Merge all the text and link fragments into one file
16 | try:
17 | json_data: list[str] = json.loads(content)
18 | except json.JSONDecodeError:
19 | return buffer.getvalue()
20 |
21 | if len(json_data) == 0:
22 | return buffer.getvalue()
23 |
24 | clean_text = str(json_data[0])
25 |
26 | buffer.write(clean_text)
27 |
28 | return buffer.getvalue()
29 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/external_video.py:
--------------------------------------------------------------------------------
1 | """Mapping functions for converting external video API DTOs to domain objects."""
2 |
3 | from boosty_downloader.src.domain.post import PostDataChunkExternalVideo
4 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import (
5 | BoostyPostDataExternalVideoDTO,
6 | )
7 |
8 |
9 | def to_external_video_content(
10 | api_video_dto: BoostyPostDataExternalVideoDTO,
11 | ) -> PostDataChunkExternalVideo:
12 | """
13 | Convert API video data to domain external video content object.
14 |
15 | It uses the PostDataVideo DTO to extract the video URL and other metadata
16 | to create a domain external video content object.
17 | """
18 | return PostDataChunkExternalVideo(
19 | url=api_video_dto.url,
20 | )
21 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/__init__.py:
--------------------------------------------------------------------------------
1 | from .post_data_file import BoostyPostDataFileDTO
2 | from .post_data_header import BoostyPostDataHeaderDTO
3 | from .post_data_image import BoostyPostDataImageDTO
4 | from .post_data_link import BoostyPostDataLinkDTO
5 | from .post_data_list import BoostyPostDataListDTO
6 | from .post_data_ok_video import BoostyPostDataOkVideoDTO
7 | from .post_data_text import BoostyPostDataTextDTO
8 | from .post_data_video import BoostyPostDataExternalVideoDTO
9 |
10 | __all__ = [
11 | 'BoostyPostDataExternalVideoDTO',
12 | 'BoostyPostDataFileDTO',
13 | 'BoostyPostDataHeaderDTO',
14 | 'BoostyPostDataImageDTO',
15 | 'BoostyPostDataLinkDTO',
16 | 'BoostyPostDataListDTO',
17 | 'BoostyPostDataOkVideoDTO',
18 | 'BoostyPostDataTextDTO',
19 | ]
20 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/__init__.py:
--------------------------------------------------------------------------------
1 | """HTML generator module for independent HTML generation."""
2 |
3 | from .models import (
4 | HtmlGenChunk,
5 | HtmlGenFile,
6 | HtmlGenImage,
7 | HtmlGenList,
8 | HtmlGenText,
9 | HtmlGenVideo,
10 | HtmlListItem,
11 | HtmlListStyle,
12 | HtmlTextFragment,
13 | HtmlTextStyle,
14 | )
15 | from .renderer import (
16 | render_html,
17 | render_html_chunk,
18 | render_html_to_file,
19 | )
20 |
21 | __all__ = [
22 | 'HtmlGenChunk',
23 | 'HtmlGenFile',
24 | 'HtmlGenImage',
25 | 'HtmlGenList',
26 | 'HtmlGenText',
27 | 'HtmlGenVideo',
28 | 'HtmlListItem',
29 | 'HtmlListStyle',
30 | 'HtmlTextFragment',
31 | 'HtmlTextStyle',
32 | 'render_html',
33 | 'render_html_chunk',
34 | 'render_html_to_file',
35 | ]
36 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/human_readable_filesize.py:
--------------------------------------------------------------------------------
1 | """Module with functions for human-readable file size representation"""
2 |
3 | from __future__ import annotations
4 |
5 |
6 | def human_readable_size(size: float | None, decimal_places: int = 2) -> str:
7 | """
8 | Return a human-readable string representing the size of a file.
9 |
10 | Usage example:
11 | path = Path("example.txt")
12 |
13 | file_size = path.stat().st_size # Get file size in bytes
14 | print(human_readable_size(file_size))
15 | """
16 | if size is None:
17 | return 'N/A'
18 |
19 | kb_size = 1024
20 |
21 | for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
22 | if size < kb_size:
23 | return f'{size:.{decimal_places}f} {unit}'
24 | size /= kb_size
25 | return f'{size:.{decimal_places}f} PB'
26 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/utils/auth_parsers.py:
--------------------------------------------------------------------------------
1 | """Cookie and authorization parser module for raw-browser-data parsing"""
2 |
3 | from http.cookies import SimpleCookie
4 |
5 | import aiohttp
6 |
7 |
8 | def parse_session_cookie(cookie_string: str) -> aiohttp.CookieJar:
9 | """Parse the session cookie and return a dictionary with auth data for aiohttp client."""
10 | if cookie_string.lower().startswith('cookie: '):
11 | cookie_string = cookie_string[8:].strip()
12 |
13 | cookie = SimpleCookie()
14 | cookie.load(cookie_string)
15 |
16 | jar = aiohttp.CookieJar()
17 | for key, morsel in cookie.items():
18 | jar.update_cookies({key: morsel.value})
19 |
20 | return jar
21 |
22 |
23 | def parse_auth_header(header: str) -> dict[str, str]:
24 | """Parse the authorization header and return a dictionary with auth data."""
25 | return {'Authorization': header}
26 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/templates/list.html:
--------------------------------------------------------------------------------
1 | {% macro render_item(item) -%}
2 |
3 | {% for txt in item.data %}
4 | {{ render_chunk(txt) | safe }}
5 | {% endfor %}
6 | {% if item.nested_items %}
7 | {% if lst.style.value == 'ordered' %}
8 |
9 | {% else %}
10 |
11 | {% endif %}
12 | {% for nested in item.nested_items %}
13 | {{ render_item(nested) }}
14 | {% endfor %}
15 | {% if lst.style.value == 'ordered' %}
16 |
17 | {% else %}
18 |
19 | {% endif %}
20 | {% endif %}
21 |
22 | {%- endmacro %}
23 |
24 | {% if lst.style.value == 'ordered' %}
25 |
26 | {% else %}
27 |
28 | {% endif %}
29 | {% for item in lst.items %}
30 | {{ render_item(item) }}
31 | {% endfor %}
32 | {% if lst.style.value == 'ordered' %}
33 |
34 | {% else %}
35 |
36 | {% endif %}
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_list.py:
--------------------------------------------------------------------------------
1 | """The module with list representation of posts data"""
2 |
3 | from typing import Literal
4 |
5 | from pydantic import BaseModel
6 |
7 |
8 | class BoostyPostDataListDataItemDTO(BaseModel):
9 | """Represents a single data item in a list of post data chunks."""
10 |
11 | type: str
12 | modificator: str | None = ''
13 | content: str
14 |
15 |
16 | class BoostyPostDataListItemDTO(BaseModel):
17 | """Represents a single item in a list of post data chunks."""
18 |
19 | items: list['BoostyPostDataListItemDTO'] = []
20 | data: list[BoostyPostDataListDataItemDTO] = []
21 |
22 |
23 | BoostyPostDataListItemDTO.model_rebuild()
24 |
25 |
26 | class BoostyPostDataListDTO(BaseModel):
27 | """Represents a list of post data chunks."""
28 |
29 | type: Literal['list']
30 | items: list[BoostyPostDataListItemDTO]
31 | style: Literal['ordered', 'unordered'] | None = None
32 |
--------------------------------------------------------------------------------
/test/integration/configuration.py:
--------------------------------------------------------------------------------
1 | from pydantic import Field
2 | from pydantic_settings import BaseSettings, SettingsConfigDict
3 |
4 |
5 | class IntegrationTestConfig(BaseSettings):
6 | """
7 | Loads and validates integration test config from environment variables.
8 | """
9 |
10 | boosty_auth_token: str = Field(..., alias='BOOSTY_TOKEN')
11 | boosty_cookies: str = Field(..., alias='BOOSTY_COOKIES')
12 |
13 | boosty_available_post_url: str = Field(..., alias='BOOSTY_AVAILABLE_POST')
14 | boosty_unavailable_post_url: str = Field(..., alias='BOOSTY_UNAVAILABLE_POST')
15 | boosty_nonexistent_author: str = Field(..., alias='BOOSTY_NONEXISTENT_AUTHOR')
16 | boosty_existing_author: str = Field(..., alias='BOOSTY_EXISTING_AUTHOR')
17 |
18 | model_config = SettingsConfigDict(env_file='.env', extra='ignore')
19 |
20 | def summary(self) -> str:
21 | """
22 | Prints all loaded config fields for debug purposes.
23 | """
24 | return str(self.model_dump())
25 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | These modules contain mappers for converting Boosty API -> domain objects.
3 |
4 | This is the main entry point for data transformation.
5 | """
6 |
7 | from boosty_downloader.src.application.ok_video_ranking import (
8 | get_best_video,
9 | get_quality_ranking,
10 | )
11 |
12 | from .external_video import to_external_video_content
13 | from .file import to_domain_file_chunk
14 | from .image import to_domain_image_chunk
15 | from .link_header_text import to_domain_text_chunk
16 | from .list import to_domain_list_chunk
17 | from .ok_boosty_video import to_ok_boosty_video_content
18 | from .post_mapper import map_post_dto_to_domain
19 |
20 | __all__ = [
21 | 'get_best_video',
22 | 'get_quality_ranking',
23 | 'map_post_dto_to_domain',
24 | 'to_domain_file_chunk',
25 | 'to_domain_image_chunk',
26 | 'to_domain_list_chunk',
27 | 'to_domain_text_chunk',
28 | 'to_external_video_content',
29 | 'to_ok_boosty_video_content',
30 | ]
31 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post.py:
--------------------------------------------------------------------------------
1 | """The module describes the form of a post of a user on boosty.to"""
2 |
3 | from __future__ import annotations
4 |
5 | from datetime import datetime # noqa: TC003 Pydantic should know this type fully
6 |
7 | from pydantic import ConfigDict
8 | from pydantic.alias_generators import to_camel
9 | from pydantic.main import BaseModel
10 |
11 | from boosty_downloader.src.infrastructure.boosty_api.models.post.base_post_data import (
12 | BasePostData, # noqa: TC001 Pydantic should know this type fully
13 | )
14 |
15 |
16 | class PostDTO(BaseModel):
17 | """Post on boosty.to which also have data pieces"""
18 |
19 | id: str
20 | title: str
21 | created_at: datetime
22 | updated_at: datetime
23 | has_access: bool
24 |
25 | signed_query: str
26 |
27 | data: list[BasePostData]
28 |
29 | model_config = ConfigDict(
30 | alias_generator=to_camel,
31 | populate_by_name=True,
32 | from_attributes=True,
33 | )
34 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/base_post_data.py:
--------------------------------------------------------------------------------
1 | """
2 | The module contains a model for boosty 'post' data.
3 |
4 | Only essentials fields defined for parsing purposes.
5 | """
6 |
7 | from __future__ import annotations
8 |
9 | from typing import Annotated
10 |
11 | from pydantic import Field
12 |
13 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import (
14 | BoostyPostDataExternalVideoDTO,
15 | BoostyPostDataFileDTO,
16 | BoostyPostDataHeaderDTO,
17 | BoostyPostDataImageDTO,
18 | BoostyPostDataLinkDTO,
19 | BoostyPostDataListDTO,
20 | BoostyPostDataOkVideoDTO,
21 | BoostyPostDataTextDTO,
22 | )
23 |
24 | BasePostData = Annotated[
25 | BoostyPostDataTextDTO
26 | | BoostyPostDataImageDTO
27 | | BoostyPostDataLinkDTO
28 | | BoostyPostDataFileDTO
29 | | BoostyPostDataExternalVideoDTO
30 | | BoostyPostDataOkVideoDTO
31 | | BoostyPostDataHeaderDTO
32 | | BoostyPostDataListDTO,
33 | Field(
34 | discriminator='type',
35 | ),
36 | ]
37 |
--------------------------------------------------------------------------------
/ruff.toml:
--------------------------------------------------------------------------------
1 | # match black
2 |
3 | line-length = 88
4 | lint.select = [
5 | "ALL", # include all the rules, including new ones
6 | ]
7 | lint.ignore = [
8 | "E501", # line too long
9 | "D102", # missing docstring in public method
10 | "D212", # multiline docstring should start at the first line (personal preference)
11 | "D107", # missing docstring in __init__ (lol why)
12 | "D400", # first line should end with a period (sometimes mess with markdown or code blocks)
13 | "D415", # first line should end with a period (same as above but trickier)
14 | "RUF001", # unused variable
15 | "G004", # don't log f-strings (personal preference)
16 | "D203", # incorrected blank line before class is incompatible with D211
17 | "COM812", # missing trailing comma (formatter conflicts with this)
18 | ]
19 |
20 | [lint.per-file-ignores]
21 | "test/*" = ["D", "ANN201", "S101", "PLR2004", "INP001"]
22 | "__init__.py" = ["D104"]
23 |
24 | [format]
25 | quote-style = "single"
26 |
27 | [lint.flake8-quotes]
28 | inline-quotes = "single"
29 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | # 📌 Task
2 |
3 | ## 📝 Description
4 |
5 | Describe in detail what has changed, why it is needed, and what problems this PR solves.
6 |
7 | ## 🔄 Changelog
8 |
9 |
10 |
11 | - **✨ Added:** …
12 | - **🛠 Fixed:** …
13 | - **🔄 Changed:** …
14 | - **🗑 Removed:** …
15 |
16 | ## 🎯 Related Issue
17 |
18 |
19 | ## 📷 Screenshots (if applicable)
20 |
25 |
26 | ## ✅ Checklist
27 |
28 | - [ ] Locally tested (`make test` and your own judgment)
29 | - [ ] Documentation updated (if necessary)
30 | - [ ] Code follows the project's style guidelines (`make lint && make format`)
31 |
32 | ## ⚠ Notes
33 |
38 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Roman Berezkin
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/boosty_downloader/src/domain/post.py:
--------------------------------------------------------------------------------
1 | """Module define the Post domain model for further downloading."""
2 |
3 | from dataclasses import dataclass
4 | from datetime import datetime
5 |
6 | from boosty_downloader.src.domain.post_data_chunks import (
7 | PostDataChunkBoostyVideo,
8 | PostDataChunkExternalVideo,
9 | PostDataChunkFile,
10 | PostDataChunkImage,
11 | PostDataChunkText,
12 | PostDataChunkTextualList,
13 | )
14 |
15 | PostDataAllChunks = (
16 | PostDataChunkImage
17 | | PostDataChunkText
18 | | PostDataChunkBoostyVideo
19 | | PostDataChunkExternalVideo
20 | | PostDataChunkFile
21 | | PostDataChunkTextualList
22 | )
23 |
24 | PostDataAllChunksList = list[PostDataAllChunks]
25 |
26 | PostDataPostOnlyChunksList = list[
27 | PostDataChunkText | PostDataChunkImage | PostDataChunkTextualList
28 | ]
29 |
30 |
31 | @dataclass
32 | class Post:
33 | """Post on boosty.to which have different kinds of content (images, text, videos, etc.)"""
34 |
35 | uuid: str
36 | title: str
37 | created_at: datetime
38 | updated_at: datetime
39 | has_access: bool
40 |
41 | signed_query: str
42 |
43 | post_data_chunks: PostDataAllChunksList
44 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/templates/text.html:
--------------------------------------------------------------------------------
1 | {% for frag in text.text_fragments %}
2 | {% set lvl = frag.header_level|default(0)|int %}
3 | {% if lvl > 0 %}
4 | {% if lvl > 6 %}{% set lvl = 6 %}{% endif %}
5 | {{ frag.text }}
6 | {% else %}
7 | {% if frag.text in ['\n', '\r\n'] %}
8 |
9 | {% else %}
10 | {% if frag.link_url %}
11 |
12 | {% if frag.style.bold %}{% endif %}
13 | {% if frag.style.italic %}{% endif %}
14 | {% if frag.style.underline %}{% endif %}
15 | {{ frag.text }}
16 | {% if frag.style.underline %}{% endif %}
17 | {% if frag.style.italic %}{% endif %}
18 | {% if frag.style.bold %}{% endif %}
19 |
20 | {% else %}
21 | {% if frag.style.bold %}{% endif %}
22 | {% if frag.style.italic %}{% endif %}
23 | {% if frag.style.underline %}{% endif %}
24 | {{ frag.text }}
25 | {% if frag.style.underline %}{% endif %}
26 | {% if frag.style.italic %}{% endif %}
27 | {% if frag.style.bold %}{% endif %}
28 | {% endif %}
29 | {% endif %}
30 | {% endif %}
31 | {% endfor %}
--------------------------------------------------------------------------------
/boosty_downloader/src/application/di/download_context.py:
--------------------------------------------------------------------------------
1 | """Define the DownloadContext dataclass and its dependencies for the download workflow."""
2 |
3 | from dataclasses import dataclass
4 |
5 | from aiohttp_retry import RetryClient
6 |
7 | from boosty_downloader.src.application.filtering import (
8 | BoostyOkVideoType,
9 | DownloadContentTypeFilter,
10 | )
11 | from boosty_downloader.src.infrastructure.external_videos_downloader.external_videos_downloader import (
12 | ExternalVideosDownloader,
13 | )
14 | from boosty_downloader.src.infrastructure.loggers.failed_downloads_logger import (
15 | FailedDownloadsLogger,
16 | )
17 | from boosty_downloader.src.infrastructure.post_caching.post_cache import SQLitePostCache
18 | from boosty_downloader.src.interfaces.console_progress_reporter import ProgressReporter
19 |
20 |
21 | @dataclass
22 | class DownloadContext:
23 | """Aggregates dependencies and configuration for the download workflow."""
24 |
25 | author_name: str
26 | downloader_session: RetryClient
27 | external_videos_downloader: ExternalVideosDownloader
28 | post_cache: SQLitePostCache
29 | filters: list[DownloadContentTypeFilter]
30 | preferred_video_quality: BoostyOkVideoType
31 | progress_reporter: ProgressReporter
32 | failed_logger: FailedDownloadsLogger
33 |
--------------------------------------------------------------------------------
/test/ABOUT_TESTING.md:
--------------------------------------------------------------------------------
1 | # Structure
2 |
3 | Tests structure doesn't mirror the application structure, but rather groups tests by their functionality or "domain":
4 |
5 | ```
6 | test/
7 | ├── analysis - Tests ONLY for purpose to analyze responses by known endpoints
8 | │ └── ...
9 | │
10 | ├── unit - Unit tests for the application, groupped by "domains"
11 | │ └── ...
12 | │
13 | └── integration - Integration tests for the application, groupped by "domains"
14 | ```
15 |
16 | # Add a new test
17 |
18 | **If you want to add a new test:**
19 | 1. *Decide whether it is a unit test or an integration test.*
20 | - **Integration** tests depends on external services (Boosty) or network, can be configurable.
21 | - **Unit** tests are isolated and can be run any time without configuration or setup.
22 | 2. *Decide which "domain" it belongs to*
23 | - For example ok_video_ranking is the boosty_downloader's domain.
24 | 3. *Create test file, following the naming convention `_test.py`.*
25 | 4. Test some functionality with `test_` function name.
26 | - Use `assert` statements to check expected outcomes.
27 | 5. *Run the test using `make test` for unit tests or `make test-integration` for integration tests.*
28 | 6. *Make a pull request with your changes.* (see [CONTRIBUTING.md](../CONTRIBUTING.md) for more details)
29 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/ok_boosty_video.py:
--------------------------------------------------------------------------------
1 | """Mapper for converting Boosty API video DTOs to domain video content objects."""
2 |
3 | from boosty_downloader.src.application.ok_video_ranking import (
4 | get_best_video,
5 | )
6 | from boosty_downloader.src.domain.post import PostDataChunkBoostyVideo
7 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import (
8 | BoostyPostDataOkVideoDTO,
9 | )
10 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_ok_video import (
11 | BoostyOkVideoType,
12 | )
13 |
14 |
15 | def to_ok_boosty_video_content(
16 | api_video_dto: BoostyPostDataOkVideoDTO, preferred_quality: BoostyOkVideoType
17 | ) -> PostDataChunkBoostyVideo | None:
18 | """
19 | Convert API video data to domain video content object.
20 |
21 | It uses the PostDataVideo DTO to extract the video URL and other metadata
22 | to create a domain video content object.
23 | """
24 | best_video_info = get_best_video(
25 | preferred_quality=preferred_quality,
26 | video_urls=api_video_dto.player_urls,
27 | )
28 |
29 | if best_video_info is None:
30 | return None
31 |
32 | best_video, choosed_quality = best_video_info
33 |
34 | return PostDataChunkBoostyVideo(
35 | url=best_video.url,
36 | title=api_video_dto.title,
37 | quality=choosed_quality.name,
38 | )
39 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "boosty-downloader"
3 | version = "2.0.1"
4 | description = ""
5 | authors = [
6 | { name = "Roman Berezkin", email = "Glitchy-Sheep@users.noreply.github.com" },
7 | ]
8 | readme = "README.md"
9 | requires-python = ">=3.10,<4"
10 | dependencies = [
11 | "asyncio (>=3.4.3,<4.0.0)",
12 | "aiofiles (>=24.1.0,<25.0.0)",
13 | "aiohttp (>=3.11.12,<4.0.0)",
14 | "pydantic (>=2.10.6,<3.0.0)",
15 | "rich (>=14.0.0,<14.1.0)",
16 | "pydantic-settings[yaml] (>=2.7.1,<3.0.0)",
17 | "typer (>=0.16.0,<0.17.0)",
18 | "yt-dlp (>=2025.1.26,<2026.0.0)",
19 | "jinja2 (>=3.1.5,<4.0.0)",
20 | "aiohttp-retry (>=2.9.1,<3.0.0)",
21 | "yarl (>=1.18.3,<2.0.0)",
22 | "sqlalchemy (>=2.0.42,<3.0.0)",
23 | "aiolimiter (>=1.2.1,<2.0.0)",
24 | "packaging (>=25.0,<26.0)",
25 | ]
26 |
27 | [project.scripts]
28 | boosty-downloader = "boosty_downloader.main:entry_point"
29 |
30 | [build-system]
31 | requires = ["poetry-core>=2.0.0,<3.0.0"]
32 | build-backend = "poetry.core.masonry.api"
33 |
34 | [tool.poetry.group.dev.dependencies]
35 | ruff = ">=0.9.6,<0.13.0"
36 | pyright = "^1.1.394"
37 | pytest = "^8.3.4"
38 | pytest-asyncio = "^1.1.0"
39 |
40 |
41 | [tool.poetry]
42 | name = "boosty-downloader"
43 | version = "2.0.1"
44 | description = "Download any type of content from boosty.to"
45 | authors = ["Roman Berezkin"]
46 | readme = "README.md"
47 |
48 | packages = [{ include = "boosty_downloader" }]
49 |
--------------------------------------------------------------------------------
/.github/workflows/any-pr-validation.yaml:
--------------------------------------------------------------------------------
1 | # This workflow triggers on any pull request or push to main or dev branches
2 | name: 🔍 PR Code Health Checks (linters / type checks / tests)
3 |
4 | on:
5 | pull_request:
6 | branches:
7 | - main
8 | - dev
9 | push:
10 | branches:
11 | - dev
12 | - 'feature/**'
13 | - 'hotfix/**'
14 |
15 | env:
16 | PACKAGE_NAME: "boosty-downloader"
17 |
18 | jobs:
19 | lint-test-build:
20 | name: 🧪 Code Quality & Build
21 | runs-on: ubuntu-latest
22 | steps:
23 | - uses: actions/checkout@v4
24 |
25 | - name: 🐍 Set up Python
26 | uses: actions/setup-python@v5
27 | with:
28 | python-version: "3.12"
29 |
30 | - name: 📦 Install Poetry if missing
31 | uses: snok/install-poetry@v1
32 | with:
33 | version: 'latest'
34 |
35 | - name: 📥 Install dependencies
36 | run: poetry sync
37 |
38 | - name: 🔍 Run ruff linting
39 | run: make lint-check
40 |
41 | - name: 🎨 Run ruff formatting check
42 | run: make format-check
43 |
44 | - name: 🔎 Run type checking
45 | run: make types
46 |
47 | - name: 🧪 Run tests
48 | run: |
49 | make test-verbose
50 | make test-api-verbose
51 | timeout-minutes: 5
52 |
53 | - name: 🏗️ Build package
54 | run: make build
55 |
56 | - name: ✅ Verify build artifacts
57 | run: |
58 | ls -la dist/
59 | if [ ! -f dist/*.whl ] || [ ! -f dist/*.tar.gz ]; then
60 | echo "❌ Build artifacts missing"
61 | exit 1
62 | fi
63 | echo "✅ Build artifacts created successfully"
64 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_ok_video.py:
--------------------------------------------------------------------------------
1 | """Module with ok video representation of posts data"""
2 |
3 | from __future__ import annotations
4 |
5 | from datetime import timedelta # noqa: TC003 Pydantic should know this type fully
6 | from enum import Enum
7 | from typing import Literal
8 |
9 | from pydantic import BaseModel, ConfigDict
10 | from pydantic.alias_generators import to_camel
11 |
12 |
13 | class BoostyOkVideoType(Enum):
14 | """All the types which boosty provides for ok video"""
15 |
16 | live_playback_dash = 'live_playback_dash'
17 | live_playback_hls = 'live_playback_hls'
18 | live_ondemand_hls = 'live_ondemand_hls'
19 |
20 | live_dash = 'live_dash'
21 | live_hls = 'live_hls'
22 | hls = 'hls'
23 | dash = 'dash'
24 | dash_uni = 'dash_uni'
25 | live_cmaf = 'live_cmaf'
26 |
27 | ultra_hd = 'ultra_hd'
28 | quad_hd = 'quad_hd'
29 | full_hd = 'full_hd'
30 | high = 'high'
31 | medium = 'medium'
32 | low = 'low'
33 | tiny = 'tiny'
34 | lowest = 'lowest'
35 |
36 |
37 | class BoostyOkVideoUrl(BaseModel):
38 | """Link to video with specific format (link can be empty for some formats)"""
39 |
40 | url: str
41 | type: BoostyOkVideoType
42 |
43 |
44 | class BoostyPostDataOkVideoDTO(BaseModel):
45 | """Ok video content piece in posts"""
46 |
47 | type: Literal['ok_video']
48 |
49 | title: str
50 | failover_host: str
51 | duration: timedelta
52 |
53 | upload_status: str
54 | complete: bool
55 | player_urls: list[BoostyOkVideoUrl]
56 |
57 | model_config = ConfigDict(
58 | alias_generator=to_camel,
59 | populate_by_name=True,
60 | from_attributes=True,
61 | )
62 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/exceptions/application_errors.py:
--------------------------------------------------------------------------------
1 | """
2 | Custom exceptions for application-level download errors.
3 |
4 | These classes standardize handling of any download failures from posts or resources,
5 | wrapping lower-level errors into a unified application-level form.
6 | """
7 |
8 |
9 | class ApplicationBaseDownloadError(Exception):
10 | """
11 | Base class for all application-level download errors.
12 |
13 | Each error instance is bound to a specific post that triggered it.
14 |
15 | Attributes
16 | ----------
17 | post_uuid : str
18 | Unique identifier of the post related to the error.
19 |
20 | """
21 |
22 | def __init__(self, post_uuid: str) -> None:
23 | super().__init__()
24 | self.post_uuid = post_uuid
25 |
26 |
27 | class ApplicationFailedDownloadError(ApplicationBaseDownloadError):
28 | """
29 | Raised when downloading a specific resource from a post fails.
30 |
31 | Causes may include network errors, invalid URLs, or resource unavailability
32 | (e.g., a YouTube video becoming private).
33 |
34 | Attributes
35 | ----------
36 | resource : str
37 | Identifier or description of the resource that failed to download.
38 | message : str
39 | Human-readable details about the failure.
40 |
41 | """
42 |
43 | def __init__(self, post_uuid: str, resource: str, message: str) -> None:
44 | super().__init__(post_uuid)
45 | self.resource = resource
46 | self.message = message
47 |
48 |
49 | class ApplicationCancelledError(ApplicationBaseDownloadError):
50 | """
51 | Raised when a download for a specific post is cancelled by the user.
52 |
53 | Typically stops the entire download process.
54 | """
55 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/filtering.py:
--------------------------------------------------------------------------------
1 | """Content type filters for the download manager."""
2 |
3 | from enum import Enum
4 |
5 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_ok_video import (
6 | BoostyOkVideoType,
7 | )
8 |
9 |
10 | class DownloadContentTypeFilter(Enum):
11 | """
12 | Class that holds content type filters for the download manager
13 |
14 | They can be used to download only specific parts of content.
15 | """
16 |
17 | # -------------------------------------------------------------------
18 | # --------------------------- WARNING !!! ---------------------------
19 | # -------------------------------------------------------------------
20 | #
21 | # If you add any new content type filters here, please ensure that:
22 | # 1. You updated cache logic accordingly
23 | # 2. You updated all the use cases that use this filter
24 | # 3. You checked all other places in which those filters were used before
25 |
26 | boosty_videos = 'boosty_videos'
27 | external_videos = 'external_videos'
28 | post_content = 'post_content'
29 | files = 'files'
30 |
31 |
32 | class VideoQualityOption(str, Enum):
33 | """Preferred video quality option for cli"""
34 |
35 | smallest_size = 'smallest_size'
36 | low = 'low'
37 | medium = 'medium'
38 | high = 'high'
39 | highest = 'highest'
40 |
41 | def to_ok_video_type(self) -> BoostyOkVideoType:
42 | mapping = {
43 | VideoQualityOption.smallest_size: BoostyOkVideoType.lowest,
44 | VideoQualityOption.low: BoostyOkVideoType.low,
45 | VideoQualityOption.medium: BoostyOkVideoType.medium,
46 | VideoQualityOption.high: BoostyOkVideoType.high,
47 | VideoQualityOption.highest: BoostyOkVideoType.ultra_hd,
48 | }
49 | return mapping[self]
50 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/update_checker/pypi_checker.py:
--------------------------------------------------------------------------------
1 | """
2 | PyPI update checker
3 |
4 | Provides functions and data structures to check for updates of any package on PyPI.
5 | """
6 |
7 | import json
8 | from dataclasses import dataclass
9 | from enum import Enum, auto
10 | from urllib.request import urlopen
11 |
12 | from packaging import version
13 |
14 |
15 | class UpdateCheckStatus(Enum):
16 | """Represents the status of an update check."""
17 |
18 | NO_UPDATE = auto()
19 | UPDATE_AVAILABLE = auto()
20 | CHECK_FAILED = auto()
21 |
22 |
23 | @dataclass
24 | class UpdateAvailable:
25 | """Update is available."""
26 |
27 | current_version: str
28 | latest_version: str
29 |
30 |
31 | @dataclass
32 | class NoUpdate:
33 | """No update available."""
34 |
35 |
36 | @dataclass
37 | class CheckFailed:
38 | """Update check failed."""
39 |
40 |
41 | UpdateResult = UpdateAvailable | NoUpdate | CheckFailed
42 |
43 |
44 | def get_pypi_latest_version(package_name: str) -> str | None:
45 | """Fetch the latest version string of a package from PyPI."""
46 | try:
47 | with urlopen(f'https://pypi.org/pypi/{package_name}/json') as resp:
48 | data = json.load(resp)
49 | return data['info']['version']
50 | except Exception: # noqa: BLE001 It doesn't matter what exception is raised, we just need to 100% catch it
51 | return None
52 |
53 |
54 | def check_for_updates(current_version: str, package_name: str) -> UpdateResult:
55 | """Check PyPI for a newer version of a package and return update result."""
56 | latest_str = get_pypi_latest_version(package_name)
57 | if latest_str is None:
58 | return CheckFailed()
59 |
60 | try:
61 | current = version.parse(current_version)
62 | latest = version.parse(latest_str)
63 | except version.InvalidVersion:
64 | return CheckFailed()
65 |
66 | if latest > current:
67 | return UpdateAvailable(
68 | current_version=str(current),
69 | latest_version=str(latest),
70 | )
71 |
72 | return NoUpdate()
73 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/loggers/failed_downloads_logger.py:
--------------------------------------------------------------------------------
1 | """
2 | Deduplicating file logger for failed downloads.
3 |
4 | Format: "[]: "; duplicates are suppressed by .
5 | The log file and its parent directory are created on demand; writes append.
6 | """
7 |
8 | import re
9 | from pathlib import Path
10 |
11 | import aiofiles
12 |
13 |
14 | class FailedDownloadsLogger:
15 | """
16 | Append-only deduplicating logger keyed by error id.
17 |
18 | Will write to a log file created on demand.
19 | Each error id is unique and will be written only once.
20 | """
21 |
22 | def __init__(self, log_file_path: Path) -> None:
23 | self.file_path = log_file_path
24 | self.file_path.parent.mkdir(parents=True, exist_ok=True)
25 | self._seen_ids: set[str] = set()
26 | self._loaded = False
27 |
28 | async def _ensure_loaded(self) -> None:
29 | if self._loaded:
30 | return
31 | if not self.file_path.exists():
32 | self._loaded = True
33 | return
34 |
35 | pattern = re.compile(r'^\[(?P[^\]]+)\]:')
36 | async with aiofiles.open(self.file_path, encoding='utf-8') as f:
37 | async for line in f:
38 | m = pattern.match(line.strip())
39 | if m:
40 | self._seen_ids.add(m.group('id'))
41 | self._loaded = True
42 |
43 | async def _write_line(self, line: str) -> None:
44 | async with aiofiles.open(self.file_path, 'a', encoding='utf-8') as f:
45 | await f.write(line.rstrip() + '\n')
46 |
47 | async def add_error(self, error_id: str, message: str) -> None:
48 | """
49 | Add a failed download error to the log.
50 |
51 | If the error ID is already logged, the message will be suppressed.
52 | """
53 | error_id = error_id.strip()
54 | message = message.strip()
55 |
56 | await self._ensure_loaded()
57 | if error_id in self._seen_ids:
58 | return
59 |
60 | await self._write_line(f'[{error_id}]: {message}')
61 | self._seen_ids.add(error_id)
62 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/models.py:
--------------------------------------------------------------------------------
1 | """HTML generator models that are independent from domain types."""
2 |
3 | from __future__ import annotations
4 |
5 | from dataclasses import dataclass, field
6 | from enum import Enum
7 |
8 |
9 | @dataclass
10 | class HtmlTextStyle:
11 | """Text styling options for HTML generation."""
12 |
13 | bold: bool = False
14 | italic: bool = False
15 | underline: bool = False
16 |
17 |
18 | @dataclass
19 | class HtmlTextFragment:
20 | """A text fragment with optional styling and links."""
21 |
22 | text: str
23 | link_url: str | None = None
24 | header_level: int = 0 # 0 means no header, 1-6 for h1-h6
25 | style: HtmlTextStyle = field(default_factory=HtmlTextStyle)
26 |
27 |
28 | @dataclass
29 | class HtmlGenText:
30 | """Text content for HTML generation."""
31 |
32 | text_fragments: list[HtmlTextFragment]
33 |
34 |
35 | @dataclass
36 | class HtmlGenImage:
37 | """Image content for HTML generation."""
38 |
39 | url: str
40 | alt: str = 'Image'
41 | width: int | None = None
42 | height: int | None = None
43 |
44 |
45 | @dataclass
46 | class HtmlGenVideo:
47 | """Video content for HTML generation."""
48 |
49 | url: str
50 | title: str | None = None
51 | poster: str | None = None
52 |
53 |
54 | class HtmlListStyle(Enum):
55 | """List style for HTML generation."""
56 |
57 | ORDERED = 'ordered'
58 | UNORDERED = 'unordered'
59 |
60 |
61 | @dataclass
62 | class HtmlListItem:
63 | """A single item in an HTML list."""
64 |
65 | data: list[HtmlGenText]
66 | nested_items: list[HtmlListItem] = field(default_factory=list['HtmlListItem'])
67 |
68 |
69 | @dataclass
70 | class HtmlGenList:
71 | """List content for HTML generation."""
72 |
73 | items: list[HtmlListItem]
74 | style: HtmlListStyle = HtmlListStyle.UNORDERED
75 |
76 |
77 | @dataclass
78 | class HtmlGenFile:
79 | """File content for HTML generation."""
80 |
81 | url: str
82 | filename: str
83 | title: str | None = None
84 |
85 |
86 | # Union type for all HTML chunk types
87 | HtmlGenChunk = HtmlGenText | HtmlGenImage | HtmlGenVideo | HtmlGenList | HtmlGenFile
88 |
--------------------------------------------------------------------------------
/test/integration/analysis/get_author_posts_test.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | import pytest
4 | import rich
5 | from aiohttp_retry import RetryClient
6 |
7 | from boosty_downloader.src.infrastructure.boosty_api.core.endpoints import (
8 | BOOSTY_DEFAULT_BASE_URL,
9 | )
10 | from boosty_downloader.src.infrastructure.boosty_api.utils.filter_none_params import (
11 | filter_none_params,
12 | )
13 | from integration.configuration import IntegrationTestConfig
14 |
15 | pytest_plugins = [
16 | 'integration.fixtures',
17 | ]
18 |
19 |
20 | @pytest.mark.asyncio
21 | async def test_get_author_posts(
22 | authorized_retry_client: RetryClient, integration_config: IntegrationTestConfig
23 | ) -> None:
24 | """Test successful retrieval of posts from an existing author."""
25 | endpoint = f'{BOOSTY_DEFAULT_BASE_URL}blog/{integration_config.boosty_existing_author}/post/'
26 |
27 | posts_raw = await authorized_retry_client.get(
28 | endpoint,
29 | params=filter_none_params(
30 | {
31 | 'limit': 10,
32 | },
33 | ),
34 | )
35 | posts_data = await posts_raw.json()
36 |
37 | assert posts_data is not None
38 |
39 | rich.print_json(data=posts_data)
40 |
41 |
42 | @pytest.mark.asyncio
43 | async def test_all_data_chunk_types(
44 | authorized_retry_client: RetryClient,
45 | integration_config: IntegrationTestConfig,
46 | ) -> None:
47 | """Test successful retrieval of posts from an existing author."""
48 | endpoint = f'{BOOSTY_DEFAULT_BASE_URL}blog/{integration_config.boosty_existing_author}/post/'
49 |
50 | posts_raw = await authorized_retry_client.get(
51 | endpoint,
52 | params=filter_none_params(
53 | {
54 | 'limit': 25,
55 | },
56 | ),
57 | )
58 | posts_data = await posts_raw.json()
59 |
60 | assert posts_data is not None
61 |
62 | unique_data_types: Any = {}
63 |
64 | for post in posts_data['data']:
65 | rich.print(post)
66 | for chunk in post['data']:
67 | if chunk['type'] not in unique_data_types:
68 | unique_data_types[chunk['type']] = chunk
69 |
70 | rich.print_json(data=unique_data_types)
71 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/renderer.py:
--------------------------------------------------------------------------------
1 | """
2 | Module provides functions to render HTML content from structured data.
3 |
4 | You can also dump the rendered HTML to a file.
5 |
6 | Current implementation uses Jinja2 templates to render HTML with a little styling.
7 | """
8 |
9 | from pathlib import Path
10 |
11 | from jinja2 import Environment, PackageLoader, select_autoescape
12 |
13 | from boosty_downloader.src.infrastructure.html_generator.models import (
14 | HtmlGenChunk,
15 | HtmlGenFile,
16 | HtmlGenImage,
17 | HtmlGenList,
18 | HtmlGenText,
19 | HtmlGenVideo,
20 | )
21 |
22 | # Load all templates as a package files
23 | # So if ANY structure changed in this path - it should be reflected here.
24 | # There is also a test to check if templates are rendered correctly (available).
25 | env = Environment(
26 | loader=PackageLoader(
27 | 'boosty_downloader.src.infrastructure.html_generator', 'templates'
28 | ),
29 | autoescape=select_autoescape(['html']),
30 | )
31 |
32 |
33 | def render_html_chunk(chunk: HtmlGenChunk) -> str:
34 | """Render a single HtmlGenChunk to its HTML representation."""
35 | match chunk:
36 | case HtmlGenText():
37 | return env.get_template('text.html').render(text=chunk)
38 | case HtmlGenImage():
39 | return env.get_template('image.html').render(image=chunk)
40 | case HtmlGenVideo():
41 | chunk.url = str(chunk.url).replace('\\', '/')
42 | return env.get_template('video.html').render(video=chunk)
43 | case HtmlGenList():
44 | return env.get_template('list.html').render(
45 | lst=chunk, render_chunk=render_html_chunk
46 | )
47 | case HtmlGenFile():
48 | return f'{chunk.filename}'
49 |
50 |
51 | def render_html(chunks: list[HtmlGenChunk]) -> str:
52 | """Render a list of HTML chunks to HTML."""
53 | rendered = [render_html_chunk(chunk) for chunk in chunks]
54 | return env.get_template('base.html').render(content='\n'.join(rendered))
55 |
56 |
57 | def render_html_to_file(chunks: list[HtmlGenChunk], out_path: Path) -> None:
58 | """Render HTML chunks to HTML file."""
59 | html = render_html(chunks)
60 | out_path.parent.mkdir(parents=True, exist_ok=True)
61 | out_path.write_text(html, encoding='utf-8')
62 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/use_cases/check_total_posts.py:
--------------------------------------------------------------------------------
1 | """Use case for reporting the total number of posts and their accessibility for a given Boosty author."""
2 |
3 | from boosty_downloader.src.infrastructure.boosty_api.core.client import (
4 | BoostyAPIClient,
5 | )
6 | from boosty_downloader.src.infrastructure.loggers.logger_instances import RichLogger
7 |
8 |
9 | class ReportTotalPostsCountUseCase:
10 | """
11 | Reports the total number of posts and their accessibility for a given Boosty author.
12 |
13 | This use case iterates over all posts for the specified author, counts accessible and inaccessible posts,
14 | and reports the results using the provided ProgressReporter.
15 | """
16 |
17 | def __init__(
18 | self,
19 | author_name: str,
20 | logger: RichLogger,
21 | boosty_api: BoostyAPIClient,
22 | ) -> None:
23 | self.author_name = author_name
24 | self.logger = logger
25 | self.boosty_api = boosty_api
26 |
27 | async def execute(self) -> None:
28 | current_page = 0
29 | total_posts = 0
30 |
31 | accessible_posts_count = 0
32 | inaccessible_posts_count = 0
33 | inaccessible_posts_names: list[str] = []
34 |
35 | async for page in self.boosty_api.iterate_over_posts(
36 | self.author_name, posts_per_page=100
37 | ):
38 | current_page += 1
39 | total_posts += len(page.posts)
40 |
41 | self.logger.info(
42 | f'Processing page [bold]{current_page}[/bold]'
43 | ' | '
44 | f'Total posts so far: [bold]{total_posts}[/bold]'
45 | )
46 |
47 | for post in page.posts:
48 | if post.has_access:
49 | accessible_posts_count += 1
50 | else:
51 | inaccessible_posts_count += 1
52 | inaccessible_posts_names.append(' - ' + post.title + '\n')
53 |
54 | inaccessible_titles_str = ''.join(inaccessible_posts_names)
55 |
56 | self.logger.success(
57 | f'Total posts: [bold]{total_posts}[/bold]\n'
58 | f'Accessible posts: [bold]{accessible_posts_count}[/bold]\n'
59 | f'Inaccessible posts: [bold]{inaccessible_posts_count}[/bold] (need higher tier subscription) see their titles:\n'
60 | '\n'
61 | f'[bold]{inaccessible_titles_str}[/bold]'
62 | )
63 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/list.py:
--------------------------------------------------------------------------------
1 | """
2 | Contains the mapper function for converting Boosty API post data lists.
3 |
4 | This module is responsible for transforming the Boosty API's list representation
5 | to the domain's PostDataChunkTextualList object:
6 |
7 | - unordered list example
8 | - one
9 | - two
10 | - ...
11 |
12 | 1. ordered list example
13 | 1. one
14 | 2. two
15 | 2. ...
16 | """
17 |
18 | from boosty_downloader.src.application.mappers.link_header_text import (
19 | to_domain_text_chunk,
20 | )
21 | from boosty_downloader.src.domain.post_data_chunks import (
22 | PostDataChunkText,
23 | PostDataChunkTextualList,
24 | )
25 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_list import (
26 | BoostyPostDataListDTO,
27 | BoostyPostDataListItemDTO,
28 | )
29 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_text import (
30 | BoostyPostDataTextDTO,
31 | )
32 |
33 |
34 | def to_domain_list_chunk(post_list: BoostyPostDataListDTO) -> PostDataChunkTextualList:
35 | """Convert API PostDataList to domain PostDataChunkTextualList."""
36 |
37 | def convert_list_item(
38 | api_item: BoostyPostDataListItemDTO,
39 | ) -> PostDataChunkTextualList.ListItem:
40 | """Recursively convert API list item to domain list item."""
41 | # Convert data items to domain text chunks
42 | domain_data: list[PostDataChunkText] = []
43 | for data_item in api_item.data:
44 | if data_item.type == 'text':
45 | # Create proper DTO object for the text mapper
46 | text_dto = BoostyPostDataTextDTO(
47 | type='text',
48 | content=data_item.content,
49 | modificator=data_item.modificator or '',
50 | )
51 | text_fragments = to_domain_text_chunk(text_dto)
52 |
53 | # Create a PostDataChunkText with the text fragments
54 | text_chunk = PostDataChunkText(text_fragments=text_fragments)
55 | domain_data.append(text_chunk)
56 |
57 | # Recursively convert nested items
58 | nested_items = [
59 | convert_list_item(nested_item) for nested_item in api_item.items
60 | ]
61 |
62 | return PostDataChunkTextualList.ListItem(
63 | data=domain_data, nested_items=nested_items
64 | )
65 |
66 | # Convert all items
67 | domain_items = [convert_list_item(api_item) for api_item in post_list.items]
68 |
69 | return PostDataChunkTextualList(items=domain_items)
70 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/html_converter.py:
--------------------------------------------------------------------------------
1 | """Converters from domain models to HTML generator models."""
2 |
3 | from boosty_downloader.src.domain.post import (
4 | PostDataChunkImage,
5 | PostDataChunkText,
6 | PostDataChunkTextualList,
7 | )
8 | from boosty_downloader.src.domain.post_data_chunks import (
9 | PostDataChunkFile,
10 | )
11 | from boosty_downloader.src.infrastructure.html_generator.models import (
12 | HtmlGenFile,
13 | HtmlGenImage,
14 | HtmlGenList,
15 | HtmlGenText,
16 | HtmlGenVideo,
17 | HtmlListItem,
18 | HtmlListStyle,
19 | HtmlTextFragment,
20 | HtmlTextStyle,
21 | )
22 |
23 |
24 | def convert_text_to_html(chunk: PostDataChunkText) -> HtmlGenText:
25 | """Convert domain text chunk to HTML text model."""
26 | fragments: list[HtmlTextFragment] = []
27 | for frag in chunk.text_fragments:
28 | style = HtmlTextStyle(
29 | bold=frag.style.bold,
30 | italic=frag.style.italic,
31 | underline=frag.style.underline,
32 | )
33 | html_fragment = HtmlTextFragment(
34 | text=frag.text,
35 | link_url=frag.link_url,
36 | header_level=frag.header_level,
37 | style=style,
38 | )
39 | fragments.append(html_fragment)
40 |
41 | return HtmlGenText(text_fragments=fragments)
42 |
43 |
44 | def convert_image_to_html(chunk: PostDataChunkImage) -> HtmlGenImage:
45 | """Convert domain image chunk to HTML image model."""
46 | return HtmlGenImage(url=chunk.url)
47 |
48 |
49 | def convert_video_to_html(src: str, title: str) -> HtmlGenVideo:
50 | """Convert domain video chunk to HTML video model."""
51 | return HtmlGenVideo(url=src, title=title)
52 |
53 |
54 | def convert_file_to_html(chunk: PostDataChunkFile) -> HtmlGenFile:
55 | """Convert domain file chunk to HTML file model."""
56 | return HtmlGenFile(url=chunk.url, filename=chunk.filename)
57 |
58 |
59 | def convert_list_to_html(chunk: PostDataChunkTextualList) -> HtmlGenList:
60 | """Convert domain list chunk to HTML list model."""
61 |
62 | def convert_list_item(item: PostDataChunkTextualList.ListItem) -> HtmlListItem:
63 | data = [convert_text_to_html(text_chunk) for text_chunk in item.data]
64 | nested_items = [convert_list_item(nested) for nested in item.nested_items]
65 | return HtmlListItem(data=data, nested_items=nested_items)
66 |
67 | items = [convert_list_item(item) for item in chunk.items]
68 | # Default to unordered list since the domain model doesn't have style
69 | style = HtmlListStyle.UNORDERED
70 |
71 | return HtmlGenList(items=items, style=style)
72 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # 💖 Contributing to Boosty Downloader
2 |
3 | Hello, I'm glad you find this project useful and I appreciate your willingness to contribute.
4 |
5 | I created this note to help you understand the way you can help improve the project.
6 |
7 |
8 | ## 👩💻 Development Process
9 |
10 |
11 |

12 |
13 |
14 | ### 🔧 Quick Start
15 |
16 | 1. Fork and clone the repository
17 | 2. Install dependencies: `poetry install`
18 | 3. Create a feature branch and make your changes
19 | 4. Run tests: `poetry run pytest`
20 | 5. Don't forget version bump `poetry version patch` (or minor/major) and update `CHANGELOG.md`
21 | 6. Open a pull request and describe changes and why they are needed
22 |
23 | **Most of needed/handy commands are available via `make`.**
24 | To see available commands, run:
25 | ```bash
26 | make help
27 | ```
28 |
29 | ### 🩺 Code Quality
30 |
31 | We use:
32 | - **Ruff** for linting and formatting
33 | - **Pyright** for type checking
34 | - **pytest** for testing
35 |
36 | *Please ensure your IDE is configured to use these tools for a smooth development experience.*
37 |
38 |
39 | ### 📝 Writing Good Commit Messages
40 |
41 | **We use**:
42 | - [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) for commit messages.
43 | - [GitMoji](https://gitmoji.dev/) for visual representation of commit types. (**OPTIONAL**)
44 | - Describe not only the change but also **why** it was made.
45 |
46 |
47 | So a generic commit message would look like this:
48 | ```
49 | feat: ✨ Add hyperspace drive support
50 | The hyperspace drive allows faster travel between galaxies.
51 |
52 | fix: 🐛 Fix formatting.
53 | ```
54 |
55 | **To make it even easier for you, use VS Code extension:**
56 | - [VSCode Conventional Commits](https://marketplace.visualstudio.com/items?itemName=vivaxy.vscode-conventional-commits) - it speed up writing commit messages in our format.
57 |
58 |
59 | ### ✅ Pull Requests CI Checks
60 |
61 | **Now project uses Github Actions for:**
62 | - Check PRs for code quality (linting, type checking, tests)
63 | - Check `dev -> main` PRs for version bump
64 | - Automatically create releases on `main` merge (PyPi and GitHub Releases)
65 |
66 |
67 | ### 🔨 Other HOW TOs:
68 |
69 |
70 | 🏁 Making a Release
71 |
72 | 1. **Prepare in `dev` branch:**
73 | ```bash
74 | poetry version patch # or minor/major
75 | # Update CHANGELOG.md
76 | git commit -am "chore: bump version to X.Y.Z"
77 | git push origin dev
78 | ```
79 |
80 | 2. **Create PR:** `dev` → `main`
81 |
82 | 3. **Merge PR** → Automatic release! 🎉
83 |
84 |
85 |
86 | 🐛 Hotfix
87 |
88 | 1. **From main:**
89 | ```bash
90 | git checkout -b hotfix/fix-name
91 | poetry version patch
92 | # Fix bug, update changelog
93 | ```
94 |
95 | 2. **PR:** `hotfix/*` → `main`
96 |
97 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/post_mapper.py:
--------------------------------------------------------------------------------
1 | """Mapping logic for converting Boosty API post DTOs to domain Post objects."""
2 |
3 | from boosty_downloader.src.application import mappers
4 | from boosty_downloader.src.domain.post import Post
5 | from boosty_downloader.src.domain.post_data_chunks import PostDataChunkText
6 | from boosty_downloader.src.infrastructure.boosty_api.models.post.base_post_data import (
7 | BoostyPostDataExternalVideoDTO,
8 | )
9 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post import PostDTO
10 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import (
11 | BoostyPostDataFileDTO,
12 | BoostyPostDataHeaderDTO,
13 | BoostyPostDataImageDTO,
14 | BoostyPostDataLinkDTO,
15 | BoostyPostDataListDTO,
16 | BoostyPostDataOkVideoDTO,
17 | BoostyPostDataTextDTO,
18 | )
19 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_ok_video import (
20 | BoostyOkVideoType,
21 | )
22 |
23 |
24 | def map_post_dto_to_domain(
25 | post_dto: PostDTO, preferred_video_quality: BoostyOkVideoType
26 | ) -> Post:
27 | """Convert a Boosty API PostDTO object to a domain Post object, mapping all data chunks to their domain representations."""
28 | post = Post(
29 | uuid=post_dto.id,
30 | title=post_dto.title,
31 | created_at=post_dto.created_at,
32 | updated_at=post_dto.updated_at,
33 | has_access=post_dto.has_access,
34 | signed_query=post_dto.signed_query,
35 | post_data_chunks=[],
36 | )
37 |
38 | for data_chunk in post_dto.data:
39 | match data_chunk:
40 | case BoostyPostDataImageDTO():
41 | post.post_data_chunks.append(mappers.to_domain_image_chunk(data_chunk))
42 | case (
43 | BoostyPostDataHeaderDTO()
44 | | BoostyPostDataLinkDTO()
45 | | BoostyPostDataTextDTO()
46 | ):
47 | text_fragments = mappers.to_domain_text_chunk(data_chunk)
48 | text_chunk = PostDataChunkText(text_fragments=text_fragments)
49 | post.post_data_chunks.append(text_chunk)
50 | case BoostyPostDataListDTO():
51 | post.post_data_chunks.append(mappers.to_domain_list_chunk(data_chunk))
52 | case BoostyPostDataFileDTO():
53 | post.post_data_chunks.append(
54 | mappers.to_domain_file_chunk(data_chunk, post.signed_query)
55 | )
56 | case BoostyPostDataOkVideoDTO():
57 | video_chunk = mappers.to_ok_boosty_video_content(
58 | data_chunk, preferred_quality=preferred_video_quality
59 | )
60 | if video_chunk is not None:
61 | post.post_data_chunks.append(video_chunk)
62 | case BoostyPostDataExternalVideoDTO():
63 | post.post_data_chunks.append(
64 | mappers.to_external_video_content(data_chunk)
65 | )
66 |
67 | return post
68 |
--------------------------------------------------------------------------------
/boosty_downloader/src/interfaces/cli_options.py:
--------------------------------------------------------------------------------
1 | """CLI option definitions for Boosty Downloader."""
2 |
3 | from pathlib import Path
4 | from typing import Annotated
5 |
6 | import typer
7 |
8 | from boosty_downloader.src.application.filtering import (
9 | DownloadContentTypeFilter,
10 | VideoQualityOption,
11 | )
12 | from boosty_downloader.src.interfaces.help_panels import HelpPanels
13 |
14 | UsernameOption = Annotated[
15 | str,
16 | typer.Option(
17 | '--username',
18 | '-u',
19 | help='Username to download posts from.',
20 | ),
21 | ]
22 |
23 | RequestDelaySecondsOption = Annotated[
24 | float,
25 | typer.Option(
26 | '--request-delay-seconds',
27 | '-d',
28 | help='Delay between requests to the API, in seconds',
29 | min=1,
30 | rich_help_panel=HelpPanels.network,
31 | ),
32 | ]
33 |
34 |
35 | ContentTypeFilterOption = Annotated[
36 | list[DownloadContentTypeFilter] | None,
37 | typer.Option(
38 | '--content-type-filter',
39 | '-f',
40 | help='Choose what content you want to download\n\n(default: ALL SET)',
41 | metavar='Available options:\n- files\n- post_content\n- boosty_videos\n- external_videos\n',
42 | show_default=False,
43 | rich_help_panel=HelpPanels.filtering,
44 | ),
45 | ]
46 |
47 |
48 | PreferredVideoQualityOption = Annotated[
49 | VideoQualityOption,
50 | typer.Option(
51 | '--preferred-video-quality',
52 | '-q',
53 | help='Preferred video quality. If not available, the best quality will be used.',
54 | metavar='Available options:\n- smallest_size\n- low\n- medium\n- high\n- highest',
55 | rich_help_panel=HelpPanels.filtering,
56 | ),
57 | ]
58 |
59 | PostUrlOption = Annotated[
60 | str | None,
61 | typer.Option(
62 | '--post-url',
63 | '-p',
64 | help='Download only the specified post if possible',
65 | metavar='URL',
66 | show_default=False,
67 | rich_help_panel=HelpPanels.actions,
68 | ),
69 | ]
70 |
71 | CheckTotalCountOption = Annotated[
72 | bool,
73 | typer.Option(
74 | '--only-check-total',
75 | '-t',
76 | help='Check total count of accessible/inaccessible(+names) posts and exit, no download',
77 | rich_help_panel=HelpPanels.actions,
78 | ),
79 | ]
80 |
81 | CleanCacheOption = Annotated[
82 | bool,
83 | typer.Option(
84 | '--clean-cache',
85 | '-c',
86 | help='Remove posts cache for selected username [italic]completely[/italic], use with caution',
87 | rich_help_panel=HelpPanels.actions,
88 | ),
89 | ]
90 |
91 | DestinationDirectoryOption = Annotated[
92 | Path | None,
93 | typer.Option(
94 | '--destination-directory',
95 | '-o',
96 | help='Directory to save downloaded posts',
97 | dir_okay=True,
98 | file_okay=False,
99 | resolve_path=True,
100 | rich_help_panel=HelpPanels.actions,
101 | show_default=False,
102 | ),
103 | ]
104 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: build test posts-example
2 |
3 | # Ensure that all the pipe-like commands work correctly.
4 | export PYTHONIOENCODING = utf-8
5 |
6 | help:
7 | @echo ------------------------- To run locally: ----------------------------
8 | @echo Run make deps to install dependencies
9 | @echo And to run current project locally without installation:
10 | @echo poetry run python -m boosty_downloader.main
11 | @echo . .
12 | @echo ------------------------- Available commands: ------------------------
13 | @echo Building:
14 | @echo deps - Install project dependencies using poetry
15 | @echo build - Build the project whl file
16 | @echo ----------------------------------------------------------------------
17 | @echo Code Health:
18 | @echo dev-fix - Try to fix code issues, show problems if any
19 | @echo ci-check - Run CI checks (linter/formatter/type checks)
20 | @echo types - Code type checks using pyright
21 | @echo format-check - Code format check using ruff
22 | @echo format-fix - Code format using ruff
23 | @echo lint-check - Code linting (only check)
24 | @echo lint-fix - Code linting (try to fix)
25 | @echo ----------------------------------------------------------------------
26 | @echo Testing:
27 | @echo test - Run the project unit tests
28 | @echo test-verbose - Run the project unit tests
29 | @echo test-api - Run the project API integration tests
30 | @echo test-api-verbose - Run the project API integration tests with verbose output
31 | @echo ----------------------------------------------------------------------
32 | @echo Endpoints Analysis (Only work if integration tests config available):
33 | @echo posts_example - Show posts json for defined author
34 |
35 |
36 |
37 | # ------------------------------------------------------------------------------
38 | # 📦 Distribution
39 |
40 | deps:
41 | poetry sync --no-interaction
42 |
43 | build:
44 | poetry build --no-cache
45 | @echo Build complete at /dist/
46 |
47 | # ------------------------------------------------------------------------------
48 | # 🩺 Code Health Checks
49 |
50 | dev-fix: lint-fix format-fix types
51 | ci-check: lint-check types format-check
52 |
53 | lint-check:
54 | poetry run ruff check .
55 |
56 | lint-fix:
57 | poetry run ruff check --fix .
58 |
59 | format-check:
60 | poetry run ruff format --check .
61 |
62 | format-fix:
63 | poetry run ruff format .
64 |
65 | types:
66 | poetry run pyright
67 |
68 |
69 | # ------------------------------------------------------------------------------
70 | # 🧪 Testing
71 |
72 | test:
73 | poetry run pytest test/unit/
74 |
75 | test-verbose:
76 | poetry run pytest -v test/unit/
77 |
78 | test-api:
79 | poetry run pytest test/integration/
80 |
81 | test-api-verbose:
82 | poetry run pytest -v test/integration/
83 |
84 | # ------------------------------------------------------------------------------
85 | # 🔍 Endpoints analysis
86 |
87 | posts-example:
88 | poetry run pytest ./test/integration/analysis/get_author_posts_test.py::test_get_author_posts -s -q
89 |
90 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/yaml_configuration/config.py:
--------------------------------------------------------------------------------
1 | """Configuration for the whole application"""
2 |
3 | from __future__ import annotations
4 |
5 | import sys
6 | from pathlib import Path
7 |
8 | from pydantic import BaseModel, Field, ValidationError
9 | from pydantic_settings import (
10 | BaseSettings,
11 | PydanticBaseSettingsSource,
12 | SettingsConfigDict,
13 | YamlConfigSettingsSource,
14 | )
15 |
16 | from boosty_downloader.src.infrastructure.loggers import logger_instances
17 | from boosty_downloader.src.infrastructure.yaml_configuration.sample_config import (
18 | DEFAULT_YAML_CONFIG_VALUE,
19 | )
20 |
21 |
22 | class DownloadSettings(BaseModel):
23 | """Settings for the script downloading process"""
24 |
25 | target_directory: Path = Path('./boosty-downloads')
26 |
27 |
28 | class AuthSettings(BaseModel):
29 | """Configuration for authentication (cookies and authorization headers)"""
30 |
31 | cookie: str = Field(default='', min_length=1)
32 | auth_header: str = Field(default='', min_length=1)
33 |
34 |
35 | CONFIG_LOCATION: Path = Path('config.yaml')
36 |
37 |
38 | class Config(BaseSettings):
39 | """General script configuration with subsections"""
40 |
41 | model_config = SettingsConfigDict(
42 | yaml_file=CONFIG_LOCATION,
43 | yaml_file_encoding='utf-8',
44 | )
45 |
46 | auth: AuthSettings = AuthSettings()
47 | downloading_settings: DownloadSettings = DownloadSettings()
48 |
49 | @classmethod
50 | def settings_customise_sources(
51 | cls,
52 | settings_cls: type[BaseSettings],
53 | init_settings: PydanticBaseSettingsSource,
54 | env_settings: PydanticBaseSettingsSource,
55 | dotenv_settings: PydanticBaseSettingsSource,
56 | file_secret_settings: PydanticBaseSettingsSource,
57 | ) -> tuple[PydanticBaseSettingsSource, ...]:
58 | return (
59 | YamlConfigSettingsSource(settings_cls),
60 | init_settings,
61 | env_settings,
62 | dotenv_settings,
63 | file_secret_settings,
64 | )
65 |
66 |
67 | def create_sample_config_file() -> None:
68 | """Create a sample config file if it doesn't exist."""
69 | with CONFIG_LOCATION.open(mode='w') as f:
70 | f.write(DEFAULT_YAML_CONFIG_VALUE)
71 |
72 |
73 | def init_config() -> Config:
74 | """Initialize the config file with a sample if it doesn't exist"""
75 | try:
76 | if not CONFIG_LOCATION.exists():
77 | create_sample_config_file()
78 | logger_instances.downloader_logger.error("Config doesn't exist")
79 | logger_instances.downloader_logger.success(
80 | f'Created a sample config file at {CONFIG_LOCATION.absolute()}, please fill `auth_header` and `cookie` with yours before running the app',
81 | )
82 | sys.exit(1)
83 | return Config()
84 | except ValidationError:
85 | # If can't be parsed correctly
86 | create_sample_config_file()
87 | logger_instances.downloader_logger.error(
88 | 'Config is invalid (could not be parsed)'
89 | )
90 | logger_instances.downloader_logger.error(
91 | '[bold yellow]Make sure you fill `auth_header` and `cookie` with yours, they are required[/bold yellow]',
92 | )
93 | logger_instances.downloader_logger.success(
94 | f'Recreated it at [green bold]{CONFIG_LOCATION.absolute()}[/green bold]',
95 | )
96 | sys.exit(1)
97 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ## 2.0.1
2 |
3 | - 🐛 Fixed image data so posts download even when width/height is missing
4 | - 🐛 Fixed download process to stop automatically after the chosen post
5 |
6 | ## 2.0.0
7 |
8 | ### ⛔ BREAKING CHANGES ⛔
9 |
10 | - Because of the new caching system, the cache database changed.
11 | If you have an existing cache, you may need to clean it first to avoid issues.
12 |
13 | The utility will automatically detect cache inconsistencies and prompt you to clean it though.
14 |
15 | I tried to figgure some sort of db migration but it is too complex for the current state of the project, so I decided to just make it a breaking change yet.
16 |
17 | If you know how I can keep migrating the cache given the fact that dbs are
18 | scattered across multiple author directories, and even possibly have different versions
19 | please let me know with an issue!
20 |
21 | - Some options were renamed but their functionality remains the same
22 |
23 | ### 🔔 New Features
24 |
25 | - 🔔 **Automatic Update Checker**
26 | You'll now be notified when a new version is available on PyPI.
27 |
28 | - 📦 **Improved Caching Layer**
29 | - Only the requested parts are cached to avoid unnecessary re-downloads/skips (before this change the post was cached entirely not just the requested parts), so now partial updates are possible.
30 | - Cache is properly **invalidated** if a post is updated by its author (will be re-downloaded).
31 | - More **robust and accurate** caching system: better handling of missing post parts.
32 |
33 | - **HTML Generation Enhancements**
34 | - New **HTML generator engine** with support for **Dark/Light modes**. 🦉
35 | - Added support for **headings and lists** in HTML output.
36 | - Added better support for styling (italic/bold/etc)
37 | - `post_content` now includes both **images AND videos** (offline only).
38 |
39 | - **Improved CLI UX**
40 | - New destination option to allow override config values.
41 | - Better help descriptions with logical **option grouping**.
42 | - More informative **post counter**: displays both accessible and inaccessible posts, with names listed for all inaccessible posts.
43 | - Enhanced **logging and error handling** for a more readable and helpful output.
44 |
45 | - **Retry Logic**
46 | - If post download fails, it will be retried up to 5 times with exponential backoff.
47 | - After 5 failed attempts, the post will be skipped and not cached.
48 |
49 | ### 🐛 Fixes
50 |
51 | - Fixed duplication problem [#12](https://github.com/Glitchy-Sheep/boosty-downloader/issues/12) (now posts are cached by UUID and have it as part of the filename, so duplication is no longer an issue)
52 | - Fixed external video downloading for unsupported formats (now format >=720p is preferred, less otherwise).
53 | - Fixed HTML generation for posts with **no content**, now it won't be created.
54 | - Resolved issues with **newline handling** in some HTML outputs.
55 | - Fixed **Ctrl+C interruption** handling with proper cleanup and messaging.
56 | - Prevented creation of **empty directories** for posts with no downloadable content.
57 | now the utility do the job only if there is one.
58 |
59 | ### 🧹 Miscellaneous
60 |
61 | - Internal **project structure refactored** for better maintainability and scalability.
62 |
63 | ## 1.0.1
64 | - Fix: 🐛 Support new boosty API response schema (as a placeholder)
65 |
66 | ## 1.0.0
67 |
68 | - First stable release
69 | - Main downloader functions such as video/post/external_video/files
70 | - Added CLI interface with typer (with customizable options)
71 |
--------------------------------------------------------------------------------
/test/unit/download_manager/ok_video_ranking_test.py:
--------------------------------------------------------------------------------
1 | from boosty_downloader.src.application.mappers import (
2 | get_best_video,
3 | get_quality_ranking,
4 | )
5 | from boosty_downloader.src.application.ok_video_ranking import (
6 | BoostyOkVideoType,
7 | BoostyOkVideoUrl,
8 | RankingDict,
9 | )
10 |
11 |
12 | def test_ranking_dict_basic_operations():
13 | ranking = RankingDict[str]()
14 | ranking['a'] = 10
15 | ranking['b'] = 20
16 | ranking['c'] = 15
17 |
18 | assert ranking['a'] == 10
19 | assert ranking['b'] == 20
20 | assert ranking['c'] == 15
21 |
22 | assert ranking.pop_max() == ('b', 20)
23 | assert ranking.pop_max() == ('c', 15)
24 | assert ranking.pop_max() == ('a', 10)
25 | assert ranking.pop_max() is None
26 |
27 |
28 | def test_ranking_dict_delete():
29 | ranking = RankingDict[str]()
30 | ranking['x'] = 5
31 | ranking['y'] = 10
32 |
33 | del ranking['x']
34 | assert 'x' not in ranking.data
35 | assert ranking.pop_max() == ('y', 10)
36 | assert ranking.pop_max() is None
37 |
38 |
39 | def test_get_quality_ranking():
40 | ranking = get_quality_ranking()
41 | assert ranking[BoostyOkVideoType.ultra_hd] == 17
42 | assert ranking[BoostyOkVideoType.lowest] == 10
43 | assert ranking.pop_max() == (BoostyOkVideoType.ultra_hd, 17)
44 | assert ranking.pop_max() == (BoostyOkVideoType.quad_hd, 16)
45 | assert ranking.pop_max() == (BoostyOkVideoType.full_hd, 15)
46 |
47 |
48 | def test_get_best_video():
49 | video_urls = [
50 | BoostyOkVideoUrl(type=BoostyOkVideoType.low, url='low.mp4'),
51 | BoostyOkVideoUrl(type=BoostyOkVideoType.medium, url='medium.mp4'),
52 | BoostyOkVideoUrl(type=BoostyOkVideoType.full_hd, url='full_hd.mp4'),
53 | ]
54 |
55 | best_video_info = get_best_video(video_urls)
56 | best_video = best_video_info[0] if best_video_info else None
57 | assert best_video is not None
58 | assert best_video.type == BoostyOkVideoType.medium # Default preference
59 | assert best_video.url == 'medium.mp4'
60 |
61 |
62 | def test_get_best_video_with_preference():
63 | video_urls = [
64 | BoostyOkVideoUrl(type=BoostyOkVideoType.low, url='low.mp4'),
65 | BoostyOkVideoUrl(type=BoostyOkVideoType.full_hd, url='full_hd.mp4'),
66 | ]
67 |
68 | best_video_info = get_best_video(
69 | video_urls, preferred_quality=BoostyOkVideoType.full_hd
70 | )
71 |
72 | best_video = best_video_info[0] if best_video_info else None
73 |
74 | assert best_video is not None
75 | assert best_video.type == BoostyOkVideoType.full_hd
76 | assert best_video.url == 'full_hd.mp4'
77 |
78 |
79 | def test_get_best_video_no_available():
80 | video_urls = [
81 | BoostyOkVideoUrl(type=BoostyOkVideoType.low, url=''), # No valid URL
82 | BoostyOkVideoUrl(type=BoostyOkVideoType.medium, url=''),
83 | ]
84 |
85 | best_video = get_best_video(video_urls)
86 | assert best_video is None
87 |
88 |
89 | def test_get_best_video_empty_list():
90 | best_video = get_best_video([])
91 | assert best_video is None
92 |
93 |
94 | def test_ranking_dict_with_duplicate_entries():
95 | ranking = RankingDict[str]()
96 | ranking['a'] = 10
97 | ranking['b'] = 20
98 | ranking['a'] = 30 # Overwriting "a" with a higher value
99 |
100 | assert ranking.pop_max() == ('a', 30)
101 | assert ranking.pop_max() == ('b', 20)
102 | assert ranking.pop_max() is None
103 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/loggers/base.py:
--------------------------------------------------------------------------------
1 | """Logger for the application."""
2 |
3 | import io
4 | import logging
5 | import sys
6 |
7 | from rich.logging import RichHandler
8 |
9 | # Detect if running in a terminal
10 | is_terminal = sys.stdout.isatty()
11 |
12 | # Ensure proper UTF-8 handling in non-interactive environments
13 | if not is_terminal and 'pytest' not in sys.modules:
14 | sys.stdout = io.TextIOWrapper(
15 | sys.stdout.buffer,
16 | encoding='utf-8',
17 | line_buffering=True,
18 | )
19 |
20 |
21 | class RichLogger:
22 | """Enhanced logger with Rich for colorful output while keeping severity levels."""
23 |
24 | def __init__(self, prefix: str) -> None:
25 | self.prefix = prefix
26 |
27 | # Avoid adding duplicate handlers
28 | handler = RichHandler(
29 | log_time_format='[%H:%M:%S]',
30 | markup=True,
31 | show_time=True,
32 | rich_tracebacks=True,
33 | show_path=False,
34 | show_level=False,
35 | )
36 |
37 | self._handler = handler
38 | self._log = logging.getLogger(prefix)
39 | self._log.setLevel(logging.DEBUG)
40 | self._log.addHandler(handler)
41 | self.console = self._handler.console
42 | self.logging_logger_obj = self._log
43 |
44 | def _log_message(
45 | self,
46 | level: int,
47 | msg: str,
48 | *,
49 | highlight: bool = True,
50 | tab_level: int = 0,
51 | ) -> None:
52 | if highlight:
53 | self._log.log(level, '\t' * tab_level + msg)
54 | else:
55 | self._handler.console.log('\t' * tab_level + msg, highlight=False)
56 |
57 | def info(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None:
58 | prefix = f'[cyan]{self.prefix}[/cyan][blue].INFO 🔹[/blue]:'
59 | self._log_message(
60 | logging.INFO,
61 | f'{prefix} {msg}',
62 | highlight=highlight,
63 | tab_level=tab_level,
64 | )
65 |
66 | def success(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None:
67 | prefix = f'[cyan]{self.prefix}[/cyan][green].SUCCESS ✔[/green]:'
68 | self._log_message(
69 | logging.INFO,
70 | f'{prefix} {msg}',
71 | highlight=highlight,
72 | tab_level=tab_level,
73 | )
74 |
75 | def error(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None:
76 | prefix = f'[cyan]{self.prefix}[/cyan][bold red].ERROR ❌[/bold red]:'
77 | self._log_message(
78 | logging.ERROR,
79 | f'{prefix} {msg}',
80 | highlight=highlight,
81 | tab_level=tab_level,
82 | )
83 |
84 | def wait(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None:
85 | prefix = f'[cyan]{self.prefix}[/cyan][yellow].WAIT ⏳[/yellow]:'
86 | self._log_message(
87 | logging.INFO,
88 | f'{prefix} {msg}',
89 | highlight=highlight,
90 | tab_level=tab_level,
91 | )
92 |
93 | def warning(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None:
94 | prefix = f'[cyan]{self.prefix}[/cyan][bold yellow].WARNING ⚠ [/bold yellow]:'
95 | self._log_message(
96 | logging.WARNING,
97 | f'{prefix} {msg}',
98 | highlight=highlight,
99 | tab_level=tab_level,
100 | )
101 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/ok_video_ranking.py:
--------------------------------------------------------------------------------
1 | """The module provides tools to work with ok video links (selecting them) by quality."""
2 |
3 | from __future__ import annotations
4 |
5 | import heapq
6 | from typing import Generic, TypeVar
7 |
8 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_ok_video import (
9 | BoostyOkVideoType,
10 | BoostyOkVideoUrl,
11 | )
12 |
13 | KT = TypeVar('KT')
14 |
15 |
16 | class RankingDict(Generic[KT]):
17 | """A dict which also keeps track of the max value, it's not thread-safe"""
18 |
19 | def __init__(self) -> None:
20 | self.data: dict[KT, float] = {}
21 | self.max_heap: list[tuple[float, KT]] = []
22 | self.entries: dict[KT, tuple[float, KT]] = {}
23 |
24 | def __getitem__(self, key: KT) -> float:
25 | """Get the value associated with the key"""
26 | return self.data[key]
27 |
28 | def __setitem__(self, key: KT, value: float) -> None:
29 | """Set the value associated with the key"""
30 | self.data[key] = value
31 | entry = (-value, key)
32 | self.entries[key] = entry
33 | heapq.heappush(self.max_heap, entry)
34 |
35 | def __delitem__(self, key: KT) -> None:
36 | """Remove the key and its value"""
37 | if key in self.data:
38 | del self.data[key]
39 | if key in self.entries:
40 | self.entries[key] = (float('-inf'), key) # Mark as deleted
41 |
42 | def pop_max(self) -> tuple[KT, float] | None:
43 | """Pop the maximum value"""
44 | while self.max_heap:
45 | value, key = heapq.heappop(self.max_heap)
46 | if key in self.data and self.entries[key] == (value, key):
47 | del self.data[key]
48 | del self.entries[key]
49 | return key, -value # Convert back to positive
50 | return None
51 |
52 |
53 | def get_quality_ranking() -> RankingDict[BoostyOkVideoType]:
54 | """Get the ranking dict for video quality"""
55 | quality_ranking = RankingDict[BoostyOkVideoType]()
56 | quality_ranking[BoostyOkVideoType.ultra_hd] = 17
57 | quality_ranking[BoostyOkVideoType.quad_hd] = 16
58 | quality_ranking[BoostyOkVideoType.full_hd] = 15
59 | quality_ranking[BoostyOkVideoType.high] = 14
60 | quality_ranking[BoostyOkVideoType.medium] = 13
61 | quality_ranking[BoostyOkVideoType.low] = 12
62 | quality_ranking[BoostyOkVideoType.tiny] = 11
63 | quality_ranking[BoostyOkVideoType.lowest] = 10
64 | quality_ranking[BoostyOkVideoType.live_playback_dash] = 9
65 | quality_ranking[BoostyOkVideoType.live_playback_hls] = 8
66 | quality_ranking[BoostyOkVideoType.live_ondemand_hls] = 7
67 | quality_ranking[BoostyOkVideoType.live_dash] = 6
68 | quality_ranking[BoostyOkVideoType.live_hls] = 5
69 | quality_ranking[BoostyOkVideoType.hls] = 4
70 | quality_ranking[BoostyOkVideoType.dash] = 3
71 | quality_ranking[BoostyOkVideoType.dash_uni] = 2
72 | quality_ranking[BoostyOkVideoType.live_cmaf] = 1
73 |
74 | return quality_ranking
75 |
76 |
77 | def get_best_video(
78 | video_urls: list[BoostyOkVideoUrl],
79 | preferred_quality: BoostyOkVideoType = BoostyOkVideoType.medium,
80 | ) -> tuple[BoostyOkVideoUrl, BoostyOkVideoType] | None:
81 | """Select the best video format for downloading according to user's preferences"""
82 | quality_ranking: RankingDict[BoostyOkVideoType] = get_quality_ranking()
83 | quality_ranking[preferred_quality] = float('inf')
84 |
85 | video_urls_map = {video.type: video for video in video_urls}
86 |
87 | while highest_rank_video_type := quality_ranking.pop_max():
88 | highest_rank_video_type = highest_rank_video_type[0]
89 |
90 | video_url = video_urls_map.get(highest_rank_video_type)
91 | if video_url and video_url.url:
92 | return video_url, highest_rank_video_type
93 |
94 | return None
95 |
--------------------------------------------------------------------------------
/test/integration/boosty_api/boosty_api_test.py:
--------------------------------------------------------------------------------
1 | """Integration tests for Boosty API client.
2 |
3 | These tests make real requests to the Boosty API and require proper configuration.
4 |
5 | Please see test/ABOUT_TESTING.md for more details.
6 | """
7 |
8 | import pytest
9 |
10 | from boosty_downloader.src.infrastructure.boosty_api import (
11 | BoostyAPIClient,
12 | )
13 | from boosty_downloader.src.infrastructure.boosty_api.core.client import (
14 | BoostyAPINoUsernameError,
15 | BoostyAPIUnauthorizedError,
16 | )
17 | from integration.configuration import IntegrationTestConfig
18 |
19 | # For automatic fixture discovery
20 | pytest_plugins = [
21 | 'integration.fixtures',
22 | ]
23 |
24 |
25 | @pytest.mark.asyncio
26 | async def test_get_posts_existing_author_success(
27 | authorized_boosty_client: BoostyAPIClient, integration_config: IntegrationTestConfig
28 | ) -> None:
29 | """Test successful retrieval of posts from an existing author."""
30 | response = await authorized_boosty_client.get_author_posts(
31 | author_name=integration_config.boosty_existing_author, limit=5
32 | )
33 |
34 | assert response.posts is not None
35 | assert response.extra is not None
36 | assert len(response.posts) >= 0
37 |
38 |
39 | @pytest.mark.asyncio
40 | async def test_get_posts_nonexistent_author_raises_error(
41 | authorized_boosty_client: BoostyAPIClient, integration_config: IntegrationTestConfig
42 | ) -> None:
43 | """Test that requesting posts from non-existent author raises BoostyAPINoUsernameError."""
44 | with pytest.raises(BoostyAPINoUsernameError):
45 | await authorized_boosty_client.get_author_posts(
46 | author_name=integration_config.boosty_nonexistent_author, limit=5
47 | )
48 |
49 |
50 | @pytest.mark.asyncio
51 | async def test_get_posts_with_pagination(
52 | authorized_boosty_client: BoostyAPIClient, integration_config: IntegrationTestConfig
53 | ) -> None:
54 | """Test pagination functionality for author posts."""
55 | first_page = await authorized_boosty_client.get_author_posts(
56 | author_name=integration_config.boosty_existing_author, limit=2
57 | )
58 |
59 | if not first_page.extra.is_last and first_page.extra.offset:
60 | second_page = await authorized_boosty_client.get_author_posts(
61 | author_name=integration_config.boosty_existing_author,
62 | limit=2,
63 | offset=first_page.extra.offset,
64 | )
65 |
66 | # Posts should be different between pages (assuming author has more than 2 posts)
67 | first_page_ids = {post.id for post in first_page.posts}
68 | second_page_ids = {post.id for post in second_page.posts}
69 | assert first_page_ids.isdisjoint(second_page_ids), (
70 | 'Pages should contain different posts'
71 | )
72 |
73 |
74 | @pytest.mark.asyncio
75 | async def test_iterate_over_posts(
76 | authorized_boosty_client: BoostyAPIClient, integration_config: IntegrationTestConfig
77 | ) -> None:
78 | """Test the async generator for iterating over all author posts."""
79 | pages_count = 0
80 | total_posts = 0
81 |
82 | async for response in authorized_boosty_client.iterate_over_posts(
83 | author_name=integration_config.boosty_existing_author,
84 | posts_per_page=2,
85 | ):
86 | pages_count += 1
87 | total_posts += len(response.posts)
88 |
89 | # Limit iteration to avoid running too long in tests
90 | if pages_count >= 3:
91 | break
92 |
93 | assert pages_count > 0, 'Should retrieve at least one page'
94 | assert total_posts >= 0, 'Should count posts correctly'
95 |
96 |
97 | @pytest.mark.asyncio
98 | async def test_unathoirized_raises_error(
99 | invalid_auth_boosty_client: BoostyAPIClient,
100 | integration_config: IntegrationTestConfig,
101 | ) -> None:
102 | """Test that unauthorized access raises an error."""
103 | with pytest.raises(BoostyAPIUnauthorizedError):
104 | await invalid_auth_boosty_client.get_author_posts(
105 | author_name=integration_config.boosty_existing_author, limit=5
106 | )
107 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | # 🖥️ About
7 |
8 | Welcome to the **Boosty Downloader** project!
9 |
10 | This CLI tool allows you to download most of the content from Boosty.to in bulk.
11 | The post content itself is saved in html with a little bit of styling.
12 |
13 | **You can download:**
14 | - Boosty Videos
15 | - External Videos (YouTube, Vimeo)
16 | - Files
17 | - Full Post content (including photos and links)
18 |
19 | ## 📑 Table of Contents
20 | - [🖥️ About](#️-about)
21 | - [📑 Table of Contents](#-table-of-contents)
22 | - [✨ Features](#-features)
23 | - [📸 Screenshots \& Usage](#-screenshots--usage)
24 | - [🛠️ Installation](#️-installation)
25 | - [🚀 Configuration for Usage](#-configuration-for-usage)
26 | - [Step 1: Get the auth cookie and auth header](#step-1-get-the-auth-cookie-and-auth-header)
27 | - [Step 2: Paste the cookie and auth header into the config file](#step-2-paste-the-cookie-and-auth-header-into-the-config-file)
28 | - [Step 3: Run the utility](#step-3-run-the-utility)
29 | - [💖 Contributing](#-contributing)
30 | - [📜 License](#-license)
31 |
32 |
33 |
34 | ## ✨ Features
35 |
36 | - 📦 **Bulk download**: Download all available content from your favorite creator.
37 | - 🔎 **Total checker**: See how many posts are available to you, and which are not.
38 | - 📂 **Content type filters**: Download only the content you need (videos, images, etc), choose what you really want with flags (see below).
39 | - 📄 **Download specific posts**: Download post by url and username.
40 | - 🔃 **Sync content seamlessly**: The utility keeps cache of already downloaded posts, so you can resume your download at any time or get new content after a while.
41 | - 📼 **Choose your video quality**: You can choose preferred video quality to download (for boosty videos)
42 | - 🎨 **Beauty posts preview**: You can see posts content with rendered offline html files with dark/light theme changing.
43 | - 📊 **Order matters**: Posts have dates in names, so you can just sort it by name in your file explorer and see them in the correct chronological order.
44 | - 🆙 **App update checker**: If new updates are available, you'll be notified when you use the application next time.
45 |
46 |
47 | ## 📸 Screenshots & Usage
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 | ## 🛠️ Installation
57 |
58 | 1. **Install python**:
59 | - Window:
60 | ```bash
61 | winget install Python.Python.3.13
62 | ```
63 | - Linux:
64 | ```bash
65 | sudo apt-get install python3
66 | ```
67 | - macOS:
68 | ```bash
69 | brew install python
70 | ```
71 |
72 | 2. **Install the boosty-downloader package:**
73 | ```bash
74 | pip install boosty-downloader
75 | ```
76 |
77 | 3. **Run the application:**
78 | ```bash
79 | boosty-downloader --help
80 | ```
81 |
82 | ## 🚀 Configuration for Usage
83 |
84 | ### Step 1: Get the auth cookie and auth header
85 |
86 | 1. Open the [Boosty](https://boosty.to) website.
87 | 2. Click the "Sign in" button and fill you credentials.
88 | 3. Navigate to any author you have access to and scroll post a little.
89 | 4. Copy auth token and cookie from browser network tab.
90 |
91 |
92 |
93 | ### Step 2: Paste the cookie and auth header into the config file
94 |
95 | This config will be created during first run of the app in the current working directory.
96 |
97 |
98 |
99 | ### Step 3: Run the utility
100 |
101 | Now you can just download your content with the following command:
102 |
103 | ```bash
104 | boosty-downloader --username YOUR_CREATOR_NAME
105 | ```
106 |
107 | ## 💖 Contributing
108 |
109 | If you want to contribute to this project, please see the [CONTRIBUTING.md](CONTRIBUTING.md).
110 |
111 | ## 📜 License
112 |
113 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
114 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by https://www.toptal.com/developers/gitignore/api/python
2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
3 |
4 | test/data
5 |
6 | # ------------- USER DEFINED --------------- #
7 | lab/
8 |
9 | # For local downloading tests
10 | boosty-downloads/
11 |
12 | # Credentials
13 | config.yaml
14 |
15 |
16 | ### Python ###
17 | # Byte-compiled / optimized / DLL files
18 | __pycache__/
19 | *.py[cod]
20 | *$py.class
21 |
22 | # C extensions
23 | *.so
24 |
25 | # Distribution / packaging
26 | .Python
27 | build/
28 | develop-eggs/
29 | dist/
30 | downloads/
31 | eggs/
32 | .eggs/
33 | lib/
34 | lib64/
35 | parts/
36 | sdist/
37 | var/
38 | wheels/
39 | share/python-wheels/
40 | *.egg-info/
41 | .installed.cfg
42 | *.egg
43 | MANIFEST
44 |
45 | # PyInstaller
46 | # Usually these files are written by a python script from a template
47 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
48 | *.manifest
49 | *.spec
50 |
51 | # Installer logs
52 | pip-log.txt
53 | pip-delete-this-directory.txt
54 |
55 | # Unit test / coverage reports
56 | htmlcov/
57 | .tox/
58 | .nox/
59 | .coverage
60 | .coverage.*
61 | .cache
62 | nosetests.xml
63 | coverage.xml
64 | *.cover
65 | *.py,cover
66 | .hypothesis/
67 | .pytest_cache/
68 | cover/
69 |
70 | # Translations
71 | *.mo
72 | *.pot
73 |
74 | # Django stuff:
75 | *.log
76 | local_settings.py
77 | db.sqlite3
78 | db.sqlite3-journal
79 |
80 | # Flask stuff:
81 | instance/
82 | .webassets-cache
83 |
84 | # Scrapy stuff:
85 | .scrapy
86 |
87 | # Sphinx documentation
88 | docs/_build/
89 |
90 | # PyBuilder
91 | .pybuilder/
92 | target/
93 |
94 | # Jupyter Notebook
95 | .ipynb_checkpoints
96 |
97 | # IPython
98 | profile_default/
99 | ipython_config.py
100 |
101 | # pyenv
102 | # For a library or package, you might want to ignore these files since the code is
103 | # intended to run in multiple environments; otherwise, check them in:
104 | # .python-version
105 |
106 | # pipenv
107 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
108 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
109 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
110 | # install all needed dependencies.
111 | #Pipfile.lock
112 |
113 | # poetry
114 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
115 | # This is especially recommended for binary packages to ensure reproducibility, and is more
116 | # commonly ignored for libraries.
117 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
118 | #poetry.lock
119 |
120 | # pdm
121 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
122 | #pdm.lock
123 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
124 | # in version control.
125 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
126 | .pdm.toml
127 | .pdm-python
128 | .pdm-build/
129 |
130 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
131 | __pypackages__/
132 |
133 | # Celery stuff
134 | celerybeat-schedule
135 | celerybeat.pid
136 |
137 | # SageMath parsed files
138 | *.sage.py
139 |
140 | # Environments
141 | .env
142 | .venv
143 | env/
144 | venv/
145 | ENV/
146 | env.bak/
147 | venv.bak/
148 |
149 | # Spyder project settings
150 | .spyderproject
151 | .spyproject
152 |
153 | # Rope project settings
154 | .ropeproject
155 |
156 | # mkdocs documentation
157 | /site
158 |
159 | # mypy
160 | .mypy_cache/
161 | .dmypy.json
162 | dmypy.json
163 |
164 | # Pyre type checker
165 | .pyre/
166 |
167 | # pytype static type analyzer
168 | .pytype/
169 |
170 | # Cython debug symbols
171 | cython_debug/
172 |
173 | # PyCharm
174 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
175 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
176 | # and can be added to the global gitignore or merged into this file. For a more nuclear
177 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
178 | #.idea/
179 |
180 | ### Python Patch ###
181 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
182 | poetry.toml
183 |
184 | # ruff
185 | .ruff_cache/
186 |
187 | # LSP config files
188 | # pyrightconfig.json Make those rules crucial to the project's quality
189 |
190 | # End of https://www.toptal.com/developers/gitignore/api/python
191 |
192 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/di/app_environment.py:
--------------------------------------------------------------------------------
1 | """Defines the application environment and dependency injection context for resource management."""
2 |
3 | from contextlib import AsyncExitStack
4 | from dataclasses import dataclass
5 | from pathlib import Path
6 | from types import TracebackType
7 |
8 | import aiohttp
9 | from aiohttp.typedefs import LooseHeaders
10 | from aiohttp_retry import RetryClient, RetryOptionsBase
11 |
12 | from boosty_downloader.src.infrastructure.boosty_api.core.client import BoostyAPIClient
13 | from boosty_downloader.src.infrastructure.loggers.logger_instances import RichLogger
14 | from boosty_downloader.src.infrastructure.post_caching.post_cache import SQLitePostCache
15 | from boosty_downloader.src.interfaces.console_progress_reporter import (
16 | ProgressReporter,
17 | use_reporter,
18 | )
19 |
20 |
21 | class AppEnvironment:
22 | """Manages the application's resource initialization and cleanup, providing an async context for dependency injection."""
23 |
24 | @dataclass
25 | class Environment:
26 | """Holds initialized application resources for use within the app context."""
27 |
28 | boosty_api_client: BoostyAPIClient
29 | downloading_retry_client: RetryClient
30 | progress_reporter: ProgressReporter
31 | destination_directory: Path
32 | post_cache: SQLitePostCache
33 |
34 | @dataclass
35 | class AppConfig:
36 | """Configuration for the application environment."""
37 |
38 | author_name: str
39 | target_directory: Path
40 | boosty_headers: LooseHeaders
41 | boosty_cookies_jar: aiohttp.CookieJar
42 | retry_options: RetryOptionsBase
43 | request_delay_seconds: float
44 | logger: RichLogger
45 |
46 | def __init__(
47 | self,
48 | config: AppConfig,
49 | ) -> None:
50 | self.author_name = config.author_name
51 | self.target_directory = config.target_directory
52 | self.boosty_headers = config.boosty_headers
53 | self.boosty_cookies_jar = config.boosty_cookies_jar
54 | self.logger = config.logger
55 | self.retry_options = config.retry_options
56 | self._request_delay_seconds = config.request_delay_seconds
57 |
58 | async def __aenter__(self) -> 'Environment':
59 | """Enter the async context and initialize resources."""
60 | self._exit_stack = AsyncExitStack()
61 | await self._exit_stack.__aenter__()
62 |
63 | authorized_boosty_session = await self._exit_stack.enter_async_context(
64 | # Don't: set BASE_URL here, the BoostyAPIClient will handle it internally.
65 | # Why: this session will be used for both downloading and API requests with different bases.
66 | aiohttp.ClientSession(
67 | headers=self.boosty_headers,
68 | cookie_jar=self.boosty_cookies_jar,
69 | timeout=aiohttp.ClientTimeout(total=None),
70 | trust_env=True,
71 | )
72 | )
73 |
74 | progress_reporter = await self._exit_stack.enter_async_context(
75 | use_reporter(
76 | reporter=ProgressReporter(
77 | logger=self.logger.logging_logger_obj,
78 | console=self.logger.console,
79 | )
80 | )
81 | )
82 |
83 | authorized_retry_client = RetryClient(
84 | authorized_boosty_session, retry_options=self.retry_options
85 | )
86 |
87 | boosty_api_client = BoostyAPIClient(
88 | authorized_retry_client,
89 | request_delay_seconds=self._request_delay_seconds,
90 | )
91 |
92 | post_cache = SQLitePostCache(
93 | destination=self.target_directory / self.author_name,
94 | logger=self.logger,
95 | )
96 | post_cache.__enter__() # sync context manager
97 | self._exit_stack.callback(post_cache.__exit__, None, None, None)
98 |
99 | return self.Environment(
100 | boosty_api_client=boosty_api_client,
101 | downloading_retry_client=authorized_retry_client,
102 | progress_reporter=progress_reporter,
103 | destination_directory=self.target_directory / self.author_name,
104 | post_cache=post_cache,
105 | )
106 |
107 | async def __aexit__(
108 | self,
109 | exc_type: type[BaseException] | None,
110 | exc_val: BaseException | None,
111 | exc_tb: TracebackType | None,
112 | ) -> None:
113 | """Exit the async context and clean up resources"""
114 | await self._exit_stack.__aexit__(exc_type, exc_val, exc_tb)
115 |
--------------------------------------------------------------------------------
/boosty_downloader/src/domain/post_data_chunks.py:
--------------------------------------------------------------------------------
1 | """
2 | Module contains domain models for post data chunks.
3 |
4 | These are used to represent different parts of a post, such as text, images, etc.
5 | """
6 |
7 | from dataclasses import dataclass, field
8 | from enum import Enum
9 |
10 |
11 | @dataclass
12 | class PostDataChunkImage:
13 | """Represent an image data chunk within a post."""
14 |
15 | url: str
16 |
17 |
18 | @dataclass
19 | class PostDataChunkText:
20 | """
21 | Represent a textual data chunk within a post.
22 |
23 | It can contain multiple text fragments, each with optional styling and links.
24 |
25 | For example:
26 | - PostDataChunkText(
27 | text_fragments=[
28 | PostDataChunkText.TextFragment(text="Hello, world!", bold=True),
29 | PostDataChunkText.TextFragment(text="Visit Boosty", link_data="https://boosty.com", header_level=1),
30 | PostDataChunkText.TextFragment(text="This is a normal text."),
31 | PostDataChunkText.TextFragment(text=""),
32 | ]
33 | """
34 |
35 | @dataclass
36 | class TextFragment:
37 | """
38 | Represent a text fragment within a post with possibly additional styling.
39 |
40 | It also can contain a link to external resources (if link_data == None - it's just a text).
41 | """
42 |
43 | @dataclass
44 | class TextStyle:
45 | """Represent text styling options."""
46 |
47 | bold: bool = False
48 | italic: bool = False
49 | underline: bool = False
50 |
51 | text: str
52 | link_url: str | None = None
53 | header_level: int = 0 # Header level (0-6), 0 means no header
54 | style: TextStyle = field(default_factory=TextStyle)
55 |
56 | text_fragments: list[TextFragment]
57 |
58 |
59 | @dataclass
60 | class PostDataChunkBoostyVideo:
61 | """Represent a Boosty video data chunk within a post."""
62 |
63 | title: str
64 | url: str
65 | quality: str
66 |
67 |
68 | @dataclass
69 | class PostDataChunkExternalVideo:
70 | """
71 | Represent an external video data chunk within a post.
72 |
73 | Can be from: YouTube, Vimeo, etc.
74 | """
75 |
76 | url: str
77 |
78 |
79 | @dataclass
80 | class PostDataChunkFile:
81 | """Represent a file data chunk within a post."""
82 |
83 | url: str
84 | filename: str
85 |
86 |
87 | @dataclass
88 | class PostDataChunkTextualList:
89 | """
90 | Represent a list of text items within a post.
91 |
92 | Each item can be a simple text or a more complex structure with optional styling.
93 | """
94 |
95 | """ 📃 About this creepy structure:
96 |
97 | Lists can be nested, so we use a union type for items
98 | each level of nesting means a new list of items:
99 |
100 | ----------------------------------------------------------------------------
101 | # For example this:
102 | ----------------------------------------------------------------------------
103 |
104 | PostDataChunkTextualList(
105 | items=[
106 | PostDataChunkTextualList.ListItem(
107 | data=[PostDataChunkText(text="Item 1")],
108 | nested_items=[]
109 | ),
110 | PostDataChunkTextualList.ListItem(
111 | data=[PostDataChunkText(text="Nested list:")],
112 | nested_items=[
113 | PostDataChunkTextualList.ListItem(
114 | data=[PostDataChunkText(text="Item 2")],
115 | nested_items=[]
116 | ),
117 | PostDataChunkTextualList.ListItem(
118 | data=[PostDataChunkText(text="Item 3")],
119 | nested_items=[]
120 | )
121 | ]
122 | )
123 | ]
124 | )
125 |
126 | ----------------------------------------------------------------------------
127 | # Becomes this:
128 | ----------------------------------------------------------------------------
129 |
130 | - Item 1
131 | - Nested list:
132 | - Item 2
133 | - Item 3
134 | """
135 |
136 | @dataclass
137 | class ListItem:
138 | """'Represent a single item in a textual list."""
139 |
140 | data: list['PostDataChunkText']
141 | nested_items: list['PostDataChunkTextualList.ListItem']
142 |
143 | class ListStyle(Enum):
144 | """Style of the list, can be ordered or unordered."""
145 |
146 | ordered = 'ordered'
147 | unordered = 'unordered'
148 |
149 | items: list[ListItem]
150 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/use_cases/download_specific_post.py:
--------------------------------------------------------------------------------
1 | """Use case for downloading a specific Boosty post by URL."""
2 |
3 | from pathlib import Path
4 |
5 | from boosty_downloader.src.application.di.download_context import DownloadContext
6 | from boosty_downloader.src.application.exceptions.application_errors import (
7 | ApplicationCancelledError,
8 | )
9 | from boosty_downloader.src.application.use_cases.check_total_posts import (
10 | BoostyAPIClient,
11 | )
12 | from boosty_downloader.src.application.use_cases.download_single_post import (
13 | ApplicationFailedDownloadError,
14 | DownloadSinglePostUseCase,
15 | )
16 | from boosty_downloader.src.infrastructure.file_downloader import sanitize_string
17 |
18 |
19 | class DownloadPostByUrlUseCase:
20 | """
21 | Handles downloading a specific Boosty post given its URL.
22 |
23 | Right now it just iterates over the post and downloads it if UUID matches.
24 | Because I can't find a way to get post by URL directly at this moment.
25 |
26 | If you know how to do it, please open an issue on GitHub or PR with this functionality.
27 | """
28 |
29 | def __init__(
30 | self,
31 | post_url: str,
32 | boosty_api: BoostyAPIClient,
33 | destination: Path,
34 | download_context: DownloadContext,
35 | ) -> None:
36 | self.post_url = post_url
37 | self.boosty_api = boosty_api
38 | self.destination = destination
39 | self.context = download_context
40 |
41 | def extract_author_and_uuid_from_url(self) -> tuple[str | None, str | None]:
42 | """
43 | Parse Boosty post URL and returns (author_name, post_uuid) if possible.
44 |
45 | Expects URLs like: https://boosty.to/author_name/posts/post_uuid
46 | Returns None if parsing fails or URL is not Boosty.
47 | """
48 | url = self.post_url
49 | if 'boosty.to' not in url:
50 | self.context.progress_reporter.error(
51 | "Provided URL doesn't match Boosty format (https://boosty.to/...)"
52 | )
53 | return None, None
54 | try:
55 | parts = url.split('/')
56 | author = parts[3]
57 | post_uuid = parts[5].split('?')[0]
58 | except (IndexError, AttributeError):
59 | self.context.progress_reporter.error(
60 | 'Failed to parse author or post UUID from the provided URL. '
61 | )
62 | return None, None
63 | else:
64 | return author, post_uuid
65 |
66 | async def execute(self) -> None:
67 | author_name, post_uuid = self.extract_author_and_uuid_from_url()
68 | if not author_name or not post_uuid:
69 | self.context.progress_reporter.error(
70 | 'Failed to extract author and UUID from the provided URL, aborting...'
71 | )
72 | return
73 |
74 | current_page = 0
75 |
76 | async for page in self.boosty_api.iterate_over_posts(
77 | author_name=author_name, posts_per_page=100
78 | ):
79 | current_page += 1
80 | self.context.progress_reporter.info(
81 | f'[Page({current_page})] Searching for the post with UUID: {post_uuid}... '
82 | )
83 | for post in page.posts:
84 | if post.id == post_uuid:
85 | self.context.progress_reporter.success(
86 | f'Found post with UUID: {post_uuid}, starting download...'
87 | )
88 |
89 | post_name = f'{post.created_at.date()} - {post.title}'
90 | post_name = sanitize_string(post_name).replace('.', '').strip()
91 |
92 | try:
93 | await DownloadSinglePostUseCase(
94 | post_dto=post,
95 | destination=self.destination / post_name,
96 | download_context=self.context,
97 | ).execute()
98 | except ApplicationCancelledError:
99 | self.context.progress_reporter.warn(
100 | 'Download cancelled by user. Bye!'
101 | )
102 | except ApplicationFailedDownloadError as e:
103 | self.context.progress_reporter.error(
104 | f'Failed to download post: {e.message}, RESOURCE: ({e.resource})'
105 | )
106 | else:
107 | return
108 |
109 | self.context.progress_reporter.error(
110 | 'Failed to find and download the specified post.'
111 | )
112 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/templates/base.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | HTML Report
7 |
8 |
14 |
15 |
133 |
134 |
135 |
136 |
137 |
138 |
139 | {{ content | safe }}
140 |
141 |
142 |
158 |
159 |
160 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/use_cases/download_all_posts.py:
--------------------------------------------------------------------------------
1 | """Implements the use case for downloading all posts from a Boosty author, applying filters and caching as needed."""
2 |
3 | import asyncio
4 | from pathlib import Path
5 |
6 | from boosty_downloader.src.application.di.download_context import DownloadContext
7 | from boosty_downloader.src.application.exceptions.application_errors import (
8 | ApplicationCancelledError,
9 | ApplicationFailedDownloadError,
10 | )
11 | from boosty_downloader.src.application.use_cases.download_single_post import (
12 | DownloadSinglePostUseCase,
13 | )
14 | from boosty_downloader.src.infrastructure.boosty_api.core.client import BoostyAPIClient
15 | from boosty_downloader.src.infrastructure.path_sanitizer import (
16 | sanitize_string,
17 | )
18 |
19 |
20 | class DownloadAllPostUseCase:
21 | """
22 | Use case for downloading all user's posts.
23 |
24 | This class encapsulates the logic required to download all posts from a source.
25 | Initialize the use case and call its methods to perform the download operation.
26 |
27 | All the downloaded content parts will be saved under the specified destination path.
28 | """
29 |
30 | def __init__(
31 | self,
32 | author_name: str,
33 | boosty_api: BoostyAPIClient,
34 | destination: Path,
35 | download_context: DownloadContext,
36 | ) -> None:
37 | self.author_name = author_name
38 |
39 | self.boosty_api = boosty_api
40 | self.destination = destination
41 | self.context = download_context
42 |
43 | async def execute(self) -> None:
44 | posts_iterator = self.boosty_api.iterate_over_posts(
45 | author_name=self.author_name
46 | )
47 |
48 | current_page = 0
49 |
50 | async for page in posts_iterator:
51 | count = len(page.posts)
52 | current_page += 1
53 |
54 | page_task_id = self.context.progress_reporter.create_task(
55 | f'Got new posts: [{count}]',
56 | total=count,
57 | indent_level=0, # Each page prints without indentation
58 | )
59 |
60 | for post_dto in page.posts:
61 | if not post_dto.has_access:
62 | self.context.progress_reporter.warn(
63 | f'Skip post ([red]no access to content[/red]): {post_dto.title}'
64 | )
65 | continue
66 |
67 | # For empty titles use post ID as a fallback (first 8 chars)
68 | if len(post_dto.title) == 0:
69 | post_dto.title = f'Not title (id_{post_dto.id[:8]})'
70 |
71 | post_dto.title = (
72 | sanitize_string(post_dto.title).replace('.', '').strip()
73 | )
74 |
75 | # date - TITLE (UUID_PART) for deduplication in case of same names with different posts
76 | full_post_title = f'{post_dto.created_at.date()} - {post_dto.title} ({post_dto.id[:8]})'
77 |
78 | single_post_use_case = DownloadSinglePostUseCase(
79 | destination=self.destination / full_post_title,
80 | post_dto=post_dto,
81 | download_context=self.context,
82 | )
83 |
84 | self.context.progress_reporter.update_task(
85 | page_task_id,
86 | advance=1,
87 | description=f'Processing page [bold]{current_page}[/bold]',
88 | )
89 |
90 | max_attempts = 5
91 | delay = 1.0
92 | for attempt in range(1, max_attempts + 1):
93 | try:
94 | await single_post_use_case.execute()
95 | break
96 | except ApplicationCancelledError:
97 | raise
98 | except ApplicationFailedDownloadError as e:
99 | if attempt == max_attempts:
100 | self.context.progress_reporter.error(
101 | f'Skip post after {attempt} failed attempts: {full_post_title} ({e.message})'
102 | )
103 | else:
104 | self.context.progress_reporter.warn(
105 | f'Attempt {attempt} failed for post: {full_post_title} ({e.message}), RESOURCE: ({e.resource})'
106 | )
107 | self.context.progress_reporter.warn(
108 | f'Retrying in {delay:.1f}s... ({e.message})'
109 | )
110 | await asyncio.sleep(delay)
111 | delay = min(delay * 1.5, 10.0)
112 |
113 | self.context.progress_reporter.complete_task(page_task_id)
114 | self.context.progress_reporter.success(
115 | f'--- Finished page {current_page} ---'
116 | )
117 |
--------------------------------------------------------------------------------
/.github/workflows/release-pr-validation.yaml:
--------------------------------------------------------------------------------
1 | # This workflow runs only for dev -> main PRs to ensure that:
2 | # - CHANGELOG updated
3 | # - pyproject.toml version updated
4 | # - Version is higher than the one on PyPI
5 | name: 🔍 Release PR Validation (version checks)
6 |
7 | on:
8 | pull_request:
9 | branches:
10 | - main
11 |
12 | env:
13 | PACKAGE_NAME: "boosty-downloader"
14 |
15 | jobs:
16 | # About Inter-step Communication:
17 | # Steps share data (versions) using GitHub Actions outputs mechanism:
18 | #
19 | # Creating output: echo "key=value" >> "$GITHUB_OUTPUT"
20 | # Using output: ${{ steps.STEP_ID.outputs.key }}
21 | #
22 | version-validation:
23 | name: 📋 Version Validation (Main Branch PRs)
24 | runs-on: ubuntu-latest
25 | # if: github.event_name == 'pull_request' && github.base_ref == 'main'
26 | steps:
27 | - uses: actions/checkout@v4
28 | with:
29 | fetch-depth: 0
30 | # ref: ${{ github.event.pull_request.head.sha }}
31 |
32 | - name: 🐍 Set up Python
33 | uses: actions/setup-python@v5
34 | with:
35 | python-version: "3.12"
36 |
37 | - name: 📦 Install Poetry if missing
38 | uses: snok/install-poetry@v1
39 | with:
40 | version: 'latest'
41 |
42 | - name: Get project versions (base and head)
43 | id: get_poetry_versions
44 | run: |
45 | HEAD_VERSION=$(poetry version --short)
46 | echo "head_version=$HEAD_VERSION" >> "$GITHUB_OUTPUT"
47 | echo "Current version: $HEAD_VERSION at $(git rev-parse --short HEAD)"
48 |
49 | git switch main
50 | BASE_VERSION=$(poetry version --short)
51 | echo "base_version=$BASE_VERSION" >> "$GITHUB_OUTPUT"
52 | echo "Base version: $BASE_VERSION at $(git rev-parse --short HEAD)"
53 |
54 | git switch - -d
55 |
56 |
57 | - name: ✅ Validate version bump in pyproject.toml
58 | run: |
59 | CURRENT_VERSION="${{ steps.get_poetry_versions.outputs.head_version }}"
60 | BASE_VERSION="${{ steps.get_poetry_versions.outputs.base_version }}"
61 |
62 | if [ "$CURRENT_VERSION" == "$BASE_VERSION" ]; then
63 | echo "❌ Version not updated! Please update version in pyproject.toml"
64 | echo "Current: $CURRENT_VERSION"
65 | echo "Base: $BASE_VERSION"
66 | exit 1
67 | fi
68 |
69 | if [ "$(printf '%s\n' "$BASE_VERSION" "$CURRENT_VERSION" | sort -rV | head -n 1)" != "$CURRENT_VERSION" ]; then
70 | echo "❌ Version should be higher than base version!"
71 | echo "Current: $CURRENT_VERSION"
72 | echo "Base: $BASE_VERSION"
73 | exit 1
74 | fi
75 |
76 | echo "✅ Version correctly updated: $BASE_VERSION → $CURRENT_VERSION"
77 |
78 | - name: 📝 Check for version in CHANGELOG.md
79 | run: |
80 | if [ ! -f CHANGELOG.md ]; then
81 | echo "❌ CHANGELOG.md not found! Please create it."
82 | exit 1
83 | fi
84 | VERSION="${{ steps.get_poetry_versions.outputs.head_version }}"
85 | if ! grep -q "$VERSION" CHANGELOG.md; then
86 | echo "at $(git rev-parse --short HEAD)"
87 | echo "❌ Version $VERSION not found in CHANGELOG.md"
88 | echo "Please add changelog entry for version $VERSION"
89 | exit 1
90 | fi
91 | echo "✅ Version $VERSION found in CHANGELOG.md"
92 |
93 | - name: 🩺 Check PyPi release version compatibility
94 | run: |
95 | echo "Checking package: $PACKAGE_NAME"
96 | echo "Current version: $CURRENT_VERSION"
97 |
98 | PACKAGE_NAME="${{ env.PACKAGE_NAME }}"
99 | CURRENT_VERSION="${{ steps.get_poetry_versions.outputs.head_version }}"
100 |
101 | response=$(curl -s "https://pypi.org/pypi/$PACKAGE_NAME/json" || echo "{}")
102 |
103 | pypi_version=$(echo "$response" | jq --raw-output "select(.releases != null) | .releases | keys_unsorted | last // empty")
104 |
105 | if [ -z "$pypi_version" ] || [ "$pypi_version" = "null" ]; then
106 | echo "Package not found on PyPI or no releases available."
107 | pypi_version="0.0.0"
108 | fi
109 |
110 | echo "Latest version on PyPI: $pypi_version"
111 | echo "pypi_version=$pypi_version" >> "$GITHUB_OUTPUT"
112 |
113 | # Compare versions using sort -rV
114 | if [ "$CURRENT_VERSION" = "$pypi_version" ]; then
115 | echo "❌ Current version equals PyPI version ($CURRENT_VERSION)"
116 | echo "is_newer=false" >> "$GITHUB_OUTPUT"
117 | exit 1
118 | elif [ "$(printf '%s\n' "$pypi_version" "$CURRENT_VERSION" | sort -rV | head -n 1)" = "$CURRENT_VERSION" ]; then
119 | echo "✅ Current version ($CURRENT_VERSION) is newer than PyPI version ($pypi_version)"
120 | echo "is_newer=true" >> "$GITHUB_OUTPUT"
121 | else
122 | echo "❌ Current version ($CURRENT_VERSION) is older than PyPI version ($pypi_version)"
123 | echo "is_newer=false" >> "$GITHUB_OUTPUT"
124 | exit 1
125 | fi
126 |
--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/link_header_text.py:
--------------------------------------------------------------------------------
1 | """
2 | Mapper for converting textual Boosty API post data chunks to domain text object.
3 |
4 | If the API responses change, this mapper may need to be updated accordingly.
5 | """
6 |
7 | import json
8 |
9 | from boosty_downloader.src.domain.post_data_chunks import PostDataChunkText
10 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import (
11 | BoostyPostDataHeaderDTO,
12 | BoostyPostDataLinkDTO,
13 | BoostyPostDataTextDTO,
14 | )
15 |
16 |
17 | def _parse_header(style_definition: str) -> int:
18 | r"""
19 | Parse header level (h1/h2/h3...) from the style definition.
20 |
21 | Style definition usually comes as a 2nd field in the "content" field of PostDataText.
22 |
23 | ```
24 | "content": "[\"Hello, world!\", \"unstyled\", <---- [[0, 0, 13]]"
25 | ```
26 | """
27 | # These values were reverse engineered from Boosty API responses.
28 | header_possible_values = {
29 | 'unstyled': 0,
30 | 'header-one': 1,
31 | 'header-two': 2,
32 | 'header-three': 3,
33 | 'header-four': 4,
34 | 'header-five': 5,
35 | 'header-six': 6,
36 | }
37 |
38 | # by default (and in other cases) have no header
39 | return header_possible_values.get(style_definition, 0)
40 |
41 |
42 | def _create_style_bitmap(
43 | text_length: int, style_array: list[list[int]]
44 | ) -> list[set[int]]:
45 | """Create bitmap of styles for each character position."""
46 | bitmap: list[set[int]] = [set() for _ in range(text_length)]
47 |
48 | for style_desc in style_array:
49 | style_id, start_idx, end_idx = style_desc
50 | for i in range(start_idx, min(end_idx, text_length)):
51 | bitmap[i].add(style_id)
52 |
53 | return bitmap
54 |
55 |
56 | def _create_text_fragments(
57 | text: str, style_bitmap: list[set[int]], header_level: int
58 | ) -> list[PostDataChunkText.TextFragment]:
59 | """Create text fragments based on style bitmap."""
60 | if not text:
61 | return []
62 |
63 | fragments: list[PostDataChunkText.TextFragment] = []
64 | current_fragment_start = 0
65 | current_styles: set[int] = style_bitmap[0] if style_bitmap else set()
66 |
67 | for i in range(1, len(text)):
68 | if i >= len(style_bitmap) or style_bitmap[i] != current_styles:
69 | fragment_text = text[current_fragment_start:i]
70 | fragment = PostDataChunkText.TextFragment(fragment_text)
71 | fragment.header_level = header_level
72 | fragment.style = _convert_style_set_to_text_style(current_styles)
73 | fragments.append(fragment)
74 |
75 | current_fragment_start = i
76 | current_styles = style_bitmap[i] if i < len(style_bitmap) else set()
77 |
78 | # Add the last fragment
79 | fragment_text = text[current_fragment_start:]
80 | fragment = PostDataChunkText.TextFragment(fragment_text)
81 | fragment.header_level = header_level
82 | fragment.style = _convert_style_set_to_text_style(current_styles)
83 | fragments.append(fragment)
84 |
85 | return fragments
86 |
87 |
88 | def _convert_style_set_to_text_style(
89 | style_set: set[int],
90 | ) -> PostDataChunkText.TextFragment.TextStyle:
91 | """Convert set of style IDs to TextStyle object."""
92 | bold = 0
93 | italic = 2
94 | underline = 4
95 |
96 | text_style = PostDataChunkText.TextFragment.TextStyle()
97 | text_style.bold = bold in style_set
98 | text_style.italic = italic in style_set
99 | text_style.underline = underline in style_set
100 |
101 | return text_style
102 |
103 |
104 | def _parse_content_field(
105 | content: str, modificator: str = ''
106 | ) -> list[PostDataChunkText.TextFragment]:
107 | def _extract_content_field(content: str) -> tuple[str, str, list[list[int]]]:
108 | r"""
109 | Extract text, style info, and style array from the content field.
110 |
111 | Boosty API returns "content" as a JSON-encoded string like this:
112 | "[\"Hello, world!\", \"unstyled\", [[0, 0, 13]]"
113 |
114 | The first part is just a text string, the other two parts are style information:
115 | - you can read about them in the _parse_style_array and _parse_header functions above.
116 | """
117 | try:
118 | parsed = json.loads(content)
119 | text = parsed[0]
120 | style_info = parsed[1]
121 | style_array = parsed[2]
122 | except json.JSONDecodeError:
123 | return content, '', []
124 | else:
125 | return text, style_info, style_array
126 |
127 | text, style_info, styles_array = _extract_content_field(content)
128 |
129 | if modificator == 'BLOCK_END':
130 | text += '\n'
131 |
132 | header_level = _parse_header(style_info)
133 | style_bitmap = _create_style_bitmap(len(text), styles_array)
134 | return _create_text_fragments(text, style_bitmap, header_level)
135 |
136 |
137 | def to_domain_text_chunk(
138 | api_textual_dto: BoostyPostDataTextDTO
139 | | BoostyPostDataHeaderDTO
140 | | BoostyPostDataLinkDTO,
141 | ) -> list[PostDataChunkText.TextFragment]:
142 | """
143 | Convert API textual data chunks to domain text fragments.
144 |
145 | It uses the PostDataText, PostDataHeader, or PostDataLink DTOs
146 | to extract the content and convert it to a list of domain text fragments.
147 | """
148 | modificator = getattr(api_textual_dto, 'modificator', '')
149 | text_fragments = _parse_content_field(api_textual_dto.content, modificator)
150 |
151 | # Attach link information to the text fragments if any is present
152 | if isinstance(api_textual_dto, BoostyPostDataLinkDTO):
153 | for fragment in text_fragments:
154 | fragment.link_url = api_textual_dto.url
155 |
156 | return text_fragments
157 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_reporter/html_reporter.py:
--------------------------------------------------------------------------------
1 | """HTML Reporter for generating HTML documents"""
2 |
3 | from __future__ import annotations
4 |
5 | from dataclasses import dataclass
6 | from typing import TYPE_CHECKING, TypedDict
7 |
8 | from jinja2 import Template
9 |
10 | if TYPE_CHECKING:
11 | from pathlib import Path
12 |
13 |
14 | @dataclass
15 | class NormalText:
16 | """Textual element, which can be added to the html document"""
17 |
18 | text: str
19 |
20 |
21 | @dataclass
22 | class HyperlinkText:
23 | """Hyperlink element, which can be added to the html document"""
24 |
25 | text: str
26 | url: str
27 |
28 |
29 | class TextElement(TypedDict):
30 | """Text element, which can be added to the html document"""
31 |
32 | type: str
33 | content: str
34 |
35 |
36 | class ImageElement(TypedDict):
37 | """Image element, which can be added to the html document"""
38 |
39 | type: str
40 | content: str
41 | width: int
42 |
43 |
44 | class LinkElement(TypedDict):
45 | """Link element, which can be added to the html document"""
46 |
47 | type: str
48 | content: str
49 | url: str
50 |
51 |
52 | class HTMLReport:
53 | """
54 | Representation of the document, which can be saved as an HTML file.
55 |
56 | You can add text/links/images to the document, they will be added one after another.
57 | """
58 |
59 | def __init__(self, filename: Path) -> None:
60 | self.filename = filename
61 | self.elements: list[TextElement | ImageElement | LinkElement] = []
62 |
63 | def _render_template(self) -> str:
64 | """Render the HTML document using Jinja2"""
65 | template = """
66 |
67 |
68 | HTML Report
69 |
115 |
116 |
117 |
118 | {% for element in elements %}
119 | {% if element.type == 'text' %}
120 |
{{ element.content }}
121 | {% elif element.type == 'image' %}
122 |
123 |

124 |
125 | {% elif element.type == 'link' %}
126 |
{{ element.content }}
127 | {% endif %}
128 | {% endfor %}
129 |
130 |
131 |
132 | """
133 | jinja_template = Template(template)
134 | return jinja_template.render(elements=self.elements)
135 |
136 | def new_paragraph(self) -> None:
137 | """Add an empty line between elements"""
138 | # Append a new paragraph using a proper TextElement type
139 | self.elements.append(TextElement(type='text', content='
'))
140 |
141 | def add_text(self, text: NormalText) -> None:
142 | """Add a text to the report right after the last added element"""
143 | # Append text content using TextElement
144 | self.elements.append(TextElement(type='text', content=text.text))
145 |
146 | def add_image(self, image_path: str, width: int = 600) -> None:
147 | """
148 | Add an image to the report right after the last added element
149 |
150 | - width 600 is usually enough for most HTML pages
151 | """
152 | # Append image content using ImageElement
153 | self.elements.append(
154 | ImageElement(type='image', content=image_path, width=width),
155 | )
156 |
157 | def add_link(self, text: NormalText, url: str) -> None:
158 | """Add a link to the report right after the last added element"""
159 | # Append link content using LinkElement
160 | self.elements.append(LinkElement(type='link', content=text.text, url=url))
161 |
162 | def save(self) -> None:
163 | """Save the whole document to the file"""
164 | html_content = self._render_template()
165 | with self.filename.open('w', encoding='utf-8') as file:
166 | file.write(html_content)
167 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/file_downloader.py:
--------------------------------------------------------------------------------
1 | """Module to download files with reporting process mechanisms"""
2 |
3 | from __future__ import annotations
4 |
5 | import http
6 | import mimetypes
7 | from asyncio import CancelledError
8 | from dataclasses import dataclass
9 | from typing import TYPE_CHECKING
10 |
11 | import aiofiles
12 | from aiohttp import ClientConnectionError
13 |
14 | from boosty_downloader.src.infrastructure.path_sanitizer import (
15 | sanitize_string,
16 | )
17 |
18 | if TYPE_CHECKING:
19 | from collections.abc import Callable
20 | from pathlib import Path
21 |
22 | from aiohttp_retry import RetryClient
23 |
24 |
25 | @dataclass
26 | class DownloadingStatus:
27 | """
28 | Model for status of the download.
29 |
30 | Can be used in status update callbacks.
31 | """
32 |
33 | name: str
34 | total_bytes: int | None
35 | total_downloaded_bytes: int
36 | downloaded_bytes: int = 0
37 |
38 |
39 | @dataclass
40 | class DownloadFileConfig:
41 | """General configuration for the file download"""
42 |
43 | session: RetryClient
44 | url: str
45 |
46 | filename: str
47 | destination: Path
48 | on_status_update: Callable[[DownloadingStatus], None] = lambda _: None
49 |
50 | guess_extension: bool = True
51 | chunk_size_bytes: int = 524288 # 512 KiB
52 |
53 |
54 | class DownloadError(Exception):
55 | """Exception raised when the download failed for any reason"""
56 |
57 | message: str
58 | file: Path | None
59 | resource_url: str
60 |
61 | def __init__(self, message: str, file: Path | None, resource_url: str) -> None:
62 | super().__init__(message)
63 | self.file = file
64 | self.resource_url = resource_url
65 |
66 |
67 | class DownloadCancelledError(DownloadError):
68 | """Exception raised when the download was cancelled by the user"""
69 |
70 | def __init__(self, resource_url: str, file: Path | None = None) -> None:
71 | super().__init__('Download cancelled by user', file, resource_url=resource_url)
72 |
73 |
74 | class DownloadTimeoutError(DownloadError):
75 | """Exception raised when the download timed out"""
76 |
77 | def __init__(self, resource_url: str, file: Path | None = None) -> None:
78 | super().__init__(
79 | 'Download timed out for the destination server',
80 | file,
81 | resource_url=resource_url,
82 | )
83 |
84 |
85 | class DownloadConnectionError(DownloadError):
86 | """Exception raised when there was a connection error during the download"""
87 |
88 | def __init__(self, resource_url: str, file: Path | None = None) -> None:
89 | super().__init__(
90 | 'Connection error during the download', file, resource_url=resource_url
91 | )
92 |
93 |
94 | class DownloadIOFailureError(DownloadError):
95 | """Exception raised when there was an IOError during the download"""
96 |
97 | def __init__(self, resource_url: str, file: Path | None = None) -> None:
98 | super().__init__('Failed during I/O operation', file, resource_url=resource_url)
99 |
100 |
101 | class DownloadUnexpectedStatusError(DownloadError):
102 | """Exception raised when the server returned an unexpected status code"""
103 |
104 | status_code: int
105 | response_message: str
106 |
107 | def __init__(self, status: int, response_message: str, resource_url: str) -> None:
108 | super().__init__(
109 | f'Unexpected status code: {status}', file=None, resource_url=resource_url
110 | )
111 | self.status_code = status
112 | self.response_message = response_message
113 |
114 |
115 | async def download_file(
116 | dl_config: DownloadFileConfig,
117 | ) -> Path:
118 | """Download files and report the downloading process via callback"""
119 | async with dl_config.session.get(dl_config.url) as response:
120 | if response.status != http.HTTPStatus.OK:
121 | raise DownloadUnexpectedStatusError(
122 | resource_url=dl_config.url,
123 | status=response.status,
124 | response_message=response.reason or 'No reason provided',
125 | )
126 |
127 | filename = sanitize_string(dl_config.filename)
128 | file_path = dl_config.destination / filename
129 |
130 | content_type = response.content_type
131 | if content_type and dl_config.guess_extension:
132 | ext = mimetypes.guess_extension(content_type)
133 | if ext is not None:
134 | file_path = file_path.with_suffix(ext)
135 |
136 | total_downloaded = 0
137 |
138 | async with aiofiles.open(file_path, mode='wb') as file:
139 | total_size = response.content_length
140 |
141 | try:
142 | async for chunk in response.content.iter_chunked(
143 | dl_config.chunk_size_bytes
144 | ):
145 | total_downloaded += len(chunk)
146 | dl_config.on_status_update(
147 | DownloadingStatus(
148 | name=filename,
149 | total_bytes=total_size,
150 | total_downloaded_bytes=total_downloaded,
151 | downloaded_bytes=len(chunk),
152 | ),
153 | )
154 | await file.write(chunk)
155 | except (CancelledError, KeyboardInterrupt) as e:
156 | raise DownloadCancelledError(
157 | file=file_path, resource_url=dl_config.url
158 | ) from e
159 | except DownloadTimeoutError as e:
160 | raise DownloadTimeoutError(
161 | file=file_path, resource_url=dl_config.url
162 | ) from e
163 | except (ConnectionResetError, BrokenPipeError, ClientConnectionError) as e:
164 | raise DownloadConnectionError(
165 | file=file_path, resource_url=dl_config.url
166 | ) from e
167 | except OSError as e:
168 | raise DownloadIOFailureError(
169 | file=file_path, resource_url=dl_config.url
170 | ) from e
171 |
172 | return file_path
173 |
--------------------------------------------------------------------------------
/test/integration/fixtures.py:
--------------------------------------------------------------------------------
1 | """Shared fixtures for Boosty API integration tests."""
2 |
3 | import logging
4 | from collections.abc import AsyncGenerator
5 |
6 | import pytest
7 | import pytest_asyncio
8 | from aiohttp import ClientSession, CookieJar
9 | from aiohttp.typedefs import LooseHeaders
10 | from aiohttp_retry import ExponentialRetry, RetryClient
11 | from pydantic import ValidationError
12 |
13 | from boosty_downloader.src.infrastructure.boosty_api.core.client import BoostyAPIClient
14 | from boosty_downloader.src.infrastructure.boosty_api.utils.auth_parsers import (
15 | parse_session_cookie,
16 | )
17 | from integration.configuration import IntegrationTestConfig
18 |
19 | logger = logging.getLogger(__name__)
20 |
21 | # ------------------------------------------------------------------------------
22 | # Utilities for further fixtures
23 |
24 |
25 | @pytest.fixture(scope='session')
26 | def integration_config() -> IntegrationTestConfig:
27 | """
28 | Provides configuration for integration tests.
29 |
30 | It loads the configuration from the environment or a configuration file.
31 | If the configuration is invalid, it logs the errors and skips the tests.
32 | """
33 | try:
34 | return IntegrationTestConfig() # pyright: ignore[reportCallIssue] : will be loaded automatically by pydantic_settings
35 |
36 | except ValidationError as e:
37 | logger.exception('❌ Failed to load integration test config:')
38 | for err in e.errors():
39 | loc = '.'.join(map(str, err['loc']))
40 | msg = err['msg']
41 | logger.exception(f' - {loc}: {msg}')
42 | pytest.skip('Integration tests require valid configuration')
43 |
44 |
45 | @pytest.fixture
46 | def boosty_headers(integration_config: IntegrationTestConfig) -> LooseHeaders:
47 | """Returns headers with authorization token for Boosty API requests."""
48 | return {
49 | 'Authorization': integration_config.boosty_auth_token,
50 | 'Content-Type': 'application/json',
51 | }
52 |
53 |
54 | @pytest_asyncio.fixture
55 | async def boosty_cookies_jar_async(
56 | integration_config: IntegrationTestConfig,
57 | ) -> CookieJar:
58 | # This avoids 'no running event loop' error by ensuring the jar is created in an async context
59 | return parse_session_cookie(integration_config.boosty_cookies)
60 |
61 |
62 | # ------------------------------------------------------------------------------
63 | # Different session setups
64 |
65 |
66 | @pytest_asyncio.fixture
67 | async def authorized_http_session(
68 | boosty_headers: LooseHeaders,
69 | boosty_cookies_jar_async: CookieJar,
70 | ) -> AsyncGenerator[ClientSession, None]:
71 | """Creates an HTTP session for making requests."""
72 | session = ClientSession(
73 | headers=boosty_headers,
74 | cookie_jar=boosty_cookies_jar_async,
75 | )
76 | yield session
77 | await session.close()
78 |
79 |
80 | @pytest_asyncio.fixture
81 | async def unauthorized_http_session() -> AsyncGenerator[ClientSession, None]:
82 | """Creates an HTTP session without authorization headers."""
83 | session = ClientSession()
84 | yield session
85 | await session.close()
86 |
87 |
88 | @pytest_asyncio.fixture
89 | async def invalid_auth_http_session() -> AsyncGenerator[ClientSession, None]:
90 | session = ClientSession(
91 | headers={
92 | 'Authorization': 'Bearer '
93 | + 'a' * 64, # Looks valid (64 hex chars), but not actually valid
94 | },
95 | )
96 | yield session
97 | await session.close()
98 |
99 |
100 | # ------------------------------------------------------------------------------
101 | # Clients for Boosty API
102 |
103 |
104 | @pytest_asyncio.fixture
105 | async def authorized_retry_client(
106 | authorized_http_session: ClientSession,
107 | ) -> AsyncGenerator[RetryClient, None]:
108 | """Creates a retry client for handling transient failures."""
109 | retry_options = ExponentialRetry(attempts=3, start_timeout=1.0)
110 | client = RetryClient(
111 | client_session=authorized_http_session,
112 | retry_options=retry_options,
113 | )
114 | yield client
115 | await client.close()
116 |
117 |
118 | @pytest_asyncio.fixture
119 | async def unauthorized_retry_client(
120 | unauthorized_http_session: ClientSession,
121 | ) -> AsyncGenerator[RetryClient, None]:
122 | """Creates a retry client without authentication for testing unauthorized scenarios."""
123 | retry_options = ExponentialRetry(attempts=3, start_timeout=1.0)
124 | client = RetryClient(
125 | client_session=unauthorized_http_session,
126 | retry_options=retry_options,
127 | )
128 | yield client
129 | await client.close()
130 |
131 |
132 | @pytest_asyncio.fixture
133 | async def invalid_auth_retry_client(
134 | invalid_auth_http_session: ClientSession,
135 | ) -> AsyncGenerator[RetryClient, None]:
136 | """Creates a retry client with invalid authentication for testing error handling."""
137 | retry_options = ExponentialRetry(attempts=3, start_timeout=1.0)
138 | client = RetryClient(
139 | client_session=invalid_auth_http_session,
140 | retry_options=retry_options,
141 | )
142 | yield client
143 | await client.close()
144 |
145 |
146 | # ------------------------------------------------------------------------------
147 | # Clients for Boosty API
148 |
149 |
150 | @pytest_asyncio.fixture
151 | async def authorized_boosty_client(
152 | authorized_retry_client: RetryClient,
153 | ) -> BoostyAPIClient:
154 | """Creates a Boosty API client configured with authentication."""
155 | return BoostyAPIClient(session=authorized_retry_client)
156 |
157 |
158 | @pytest_asyncio.fixture
159 | async def unauthorized_boosty_client(
160 | unauthorized_retry_client: RetryClient,
161 | ) -> BoostyAPIClient:
162 | """Creates a Boosty API client without authentication for testing unauthorized scenarios."""
163 | return BoostyAPIClient(session=unauthorized_retry_client, request_delay_seconds=1)
164 |
165 |
166 | @pytest_asyncio.fixture
167 | async def invalid_auth_boosty_client(
168 | invalid_auth_retry_client: RetryClient,
169 | ) -> BoostyAPIClient:
170 | """Creates a Boosty API client with invalid authentication for testing error handling."""
171 | return BoostyAPIClient(session=invalid_auth_retry_client, request_delay_seconds=1)
172 |
--------------------------------------------------------------------------------
/test/unit/html_generator/html_templates_test.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from boosty_downloader.src.infrastructure.html_generator.models import (
4 | HtmlGenChunk,
5 | HtmlGenImage,
6 | HtmlGenList,
7 | HtmlGenText,
8 | HtmlGenVideo,
9 | HtmlListItem,
10 | HtmlTextFragment,
11 | HtmlTextStyle,
12 | )
13 | from boosty_downloader.src.infrastructure.html_generator.renderer import (
14 | render_html,
15 | render_html_to_file,
16 | )
17 |
18 |
19 | def test_html_generator_templates():
20 | chunks: list[HtmlGenChunk] = [
21 | HtmlGenText(
22 | text_fragments=[
23 | HtmlTextFragment(text='Welcome to my Boosty!', header_level=1),
24 | HtmlTextFragment(
25 | text='This post includes various elements: text, media, and lists.',
26 | ),
27 | HtmlTextFragment(text=''),
28 | HtmlTextFragment(
29 | text="Let's dive in below:",
30 | style=HtmlTextStyle(italic=True),
31 | ),
32 | ]
33 | ),
34 | HtmlGenText(
35 | text_fragments=[
36 | HtmlTextFragment(text='Highlights', header_level=2),
37 | HtmlTextFragment(
38 | text='This paragraph contains a mix of ',
39 | ),
40 | HtmlTextFragment(
41 | text='bold',
42 | style=HtmlTextStyle(bold=True),
43 | ),
44 | HtmlTextFragment(text=', '),
45 | HtmlTextFragment(
46 | text='italic',
47 | style=HtmlTextStyle(italic=True),
48 | ),
49 | HtmlTextFragment(text=', and '),
50 | HtmlTextFragment(
51 | text='underlined',
52 | style=HtmlTextStyle(underline=True),
53 | ),
54 | HtmlTextFragment(text=' text. You can '),
55 | HtmlTextFragment(
56 | text='click here',
57 | link_url='https://boosty.to/example',
58 | style=HtmlTextStyle(underline=True),
59 | ),
60 | HtmlTextFragment(text=' to support me.'),
61 | ]
62 | ),
63 | HtmlGenList(
64 | items=[
65 | HtmlListItem(
66 | data=[
67 | HtmlGenText(
68 | text_fragments=[
69 | HtmlTextFragment(text="📌 What you'll get inside:")
70 | ]
71 | )
72 | ],
73 | nested_items=[
74 | HtmlListItem(
75 | data=[
76 | HtmlGenText(
77 | text_fragments=[
78 | HtmlTextFragment(text='High-quality images')
79 | ]
80 | )
81 | ],
82 | nested_items=[],
83 | ),
84 | HtmlListItem(
85 | data=[
86 | HtmlGenText(
87 | text_fragments=[
88 | HtmlTextFragment(text='Source files (PSD, RAW)')
89 | ]
90 | )
91 | ],
92 | nested_items=[],
93 | ),
94 | HtmlListItem(
95 | data=[
96 | HtmlGenText(
97 | text_fragments=[
98 | HtmlTextFragment(text='Bonus video content')
99 | ]
100 | )
101 | ],
102 | nested_items=[
103 | HtmlListItem(
104 | data=[
105 | HtmlGenText(
106 | text_fragments=[
107 | HtmlTextFragment(
108 | text='Behind the scenes'
109 | )
110 | ]
111 | )
112 | ],
113 | nested_items=[],
114 | ),
115 | HtmlListItem(
116 | data=[
117 | HtmlGenText(
118 | text_fragments=[
119 | HtmlTextFragment(
120 | text='Unreleased footage'
121 | )
122 | ]
123 | )
124 | ],
125 | nested_items=[],
126 | ),
127 | ],
128 | ),
129 | ],
130 | )
131 | ]
132 | ),
133 | HtmlGenImage(url='https://example.com/banner.jpg'),
134 | HtmlGenVideo(
135 | title='Exclusive Behind the Scenes',
136 | url='https://example.com/video.mp4',
137 | ),
138 | HtmlGenVideo(url='https://www.youtube.com/watch?v=dQw4w9WgXcQ'),
139 | HtmlGenText(
140 | text_fragments=[
141 | HtmlTextFragment(text=''),
142 | HtmlTextFragment(text='Thanks for reading!', header_level=2),
143 | HtmlTextFragment(
144 | text='Feel free to leave a comment or suggestion below.',
145 | ),
146 | ]
147 | ),
148 | ]
149 |
150 | data = render_html(chunks)
151 |
152 | test_output_file = Path('test_output.html')
153 |
154 | render_html_to_file(chunks, test_output_file)
155 |
156 | assert test_output_file.exists()
157 | assert test_output_file.read_text(encoding='utf-8') == data
158 | assert len(data) > 0
159 |
160 | test_output_file.unlink(missing_ok=True)
161 |
--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
1 | name: release
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | workflow_dispatch:
8 |
9 | env:
10 | PACKAGE_NAME: "boosty-downloader"
11 | OWNER: "Glitchy-Sheep"
12 |
13 | jobs:
14 | extract_base_project_version:
15 | name: "📋 Extract pyproject.toml version from main"
16 | runs-on: ubuntu-latest
17 | outputs:
18 | version: ${{ steps.extract_version.outputs.version }}
19 | steps:
20 | # Ensure that we are on the main branch to get latest stable version
21 | - uses: actions/checkout@v4
22 | with:
23 | fetch-depth: 0
24 | ref: main
25 |
26 | - name: Set up Python
27 | uses: actions/setup-python@v5
28 | with:
29 | python-version: "3.12"
30 |
31 | - name: Install Poetry
32 | uses: snok/install-poetry@v1
33 |
34 | - name: Extract base version
35 | id: extract_version
36 | run: |
37 | VERSION=$(poetry version --short)
38 | echo "version=$VERSION" >> "$GITHUB_OUTPUT"
39 |
40 | extract_pushed_version:
41 | name: "📦 Extract pushed pyproject.toml version"
42 | runs-on: ubuntu-latest
43 | outputs:
44 | version: ${{ steps.extract_version.outputs.version }}
45 | steps:
46 | - uses: actions/checkout@v4
47 | with:
48 | fetch-depth: 0 # чтобы poetry могла читать pyproject.toml в любом случае
49 |
50 | - name: Set up Python
51 | uses: actions/setup-python@v5
52 | with:
53 | python-version: "3.12"
54 |
55 | - name: Install Poetry
56 | uses: snok/install-poetry@v1
57 |
58 | - name: Extract pushed version
59 | id: extract_version
60 | run: |
61 | VERSION=$(poetry version --short)
62 | echo "version=$VERSION" >> "$GITHUB_OUTPUT"
63 |
64 |
65 |
66 | # Check if new version is greater than the latest version on PyPI
67 | check_pypi:
68 | name: "🔍 Validate version against PyPI"
69 | needs: extract_pushed_version
70 | runs-on: ubuntu-latest
71 | steps:
72 | - uses: actions/checkout@v4
73 | - name: 🩺 Check PyPi release version compatibility
74 | run: |
75 | echo "Checking package: $PACKAGE_NAME"
76 | echo "Current version: $CURRENT_VERSION"
77 |
78 | PACKAGE_NAME="${{ env.PACKAGE_NAME }}"
79 | CURRENT_VERSION="${{ needs.extract_pushed_version.outputs.version }}"
80 |
81 | response=$(curl -s "https://pypi.org/pypi/$PACKAGE_NAME/json" || echo "{}")
82 |
83 | pypi_version=$(echo "$response" | jq --raw-output "select(.releases != null) | .releases | keys_unsorted | last // empty")
84 |
85 | if [ -z "$pypi_version" ] || [ "$pypi_version" = "null" ]; then
86 | echo "Package not found on PyPI or no releases available."
87 | pypi_version="0.0.0"
88 | fi
89 |
90 | echo "Latest version on PyPI: $pypi_version"
91 | echo "pypi_version=$pypi_version" >> "$GITHUB_OUTPUT"
92 |
93 | # Compare versions using sort -rV
94 | if [ "$CURRENT_VERSION" = "$pypi_version" ]; then
95 | echo "❌ Current version equals PyPI version ($CURRENT_VERSION)"
96 | echo "is_newer=false" >> "$GITHUB_OUTPUT"
97 | exit 1
98 | elif [ "$(printf '%s\n' "$pypi_version" "$CURRENT_VERSION" | sort -rV | head -n 1)" = "$CURRENT_VERSION" ]; then
99 | echo "✅ Current version ($CURRENT_VERSION) is newer than PyPI version ($pypi_version)"
100 | echo "is_newer=true" >> "$GITHUB_OUTPUT"
101 | else
102 | echo "❌ Current version ($CURRENT_VERSION) is older than PyPI version ($pypi_version)"
103 | echo "is_newer=false" >> "$GITHUB_OUTPUT"
104 | exit 1
105 | fi
106 |
107 |
108 | # Install dependencies, run tests, and build distribution packages
109 | setup_and_build:
110 | name: "🔨 Build distribution packages"
111 | needs: [extract_pushed_version, extract_base_project_version, check_pypi]
112 | runs-on: ubuntu-latest
113 | steps:
114 | # Checkout the code on MAIN to get current latest stable version
115 | - uses: actions/checkout@v4
116 | with:
117 | fetch-depth: 0
118 |
119 | - name: Set up Python
120 | uses: actions/setup-python@v5
121 | with:
122 | python-version: "3.12"
123 |
124 | - name: Install Poetry
125 | uses: snok/install-poetry@v1
126 |
127 | - name: Install dependencies
128 | run: make deps
129 | timeout-minutes: 10
130 |
131 | - name: Build source and wheel distribution
132 | run: make build
133 |
134 | - name: Upload artifacts
135 | uses: actions/upload-artifact@v4
136 | with:
137 | name: dist
138 | path: dist/
139 |
140 | create_tag:
141 | name: "🏷️ Create release tag"
142 | needs: [extract_pushed_version, setup_and_build]
143 | runs-on: ubuntu-latest
144 | permissions:
145 | contents: write
146 | steps:
147 | - uses: actions/checkout@v4
148 |
149 | - name: Create and push tag
150 | run: |
151 | VERSION=${{ needs.extract_pushed_version.outputs.version }}
152 | git config user.name "github-actions[bot]"
153 | git config user.email "github-actions[bot]@users.noreply.github.com"
154 | git tag -a "v$VERSION" -m "Release v$VERSION"
155 | git push origin "v$VERSION"
156 |
157 |
158 | pypi_publish:
159 | name: "📦 Upload release to PyPI"
160 | needs: [setup_and_build, extract_pushed_version, create_tag]
161 | runs-on: ubuntu-latest
162 | environment:
163 | name: release
164 | permissions:
165 | id-token: write
166 | steps:
167 | - name: Download artifacts
168 | uses: actions/download-artifact@v5
169 | with:
170 | name: dist
171 | path: dist/
172 |
173 | - name: Publish distribution to PyPI
174 | uses: pypa/gh-action-pypi-publish@release/v1
175 |
176 |
177 | github_release:
178 | name: "🚀 Create GitHub Release"
179 | needs: [setup_and_build, extract_pushed_version, create_tag]
180 | runs-on: ubuntu-latest
181 | permissions:
182 | contents: write
183 | steps:
184 | - name: Checkout Code
185 | uses: actions/checkout@v4
186 | with:
187 | fetch-depth: 0
188 |
189 | - name: Download artifacts
190 | uses: actions/download-artifact@v5
191 | with:
192 | name: dist
193 | path: dist/
194 |
195 | - name: Create GitHub Release
196 | env:
197 | GH_TOKEN: ${{ github.token }}
198 | run: |
199 | VERSION=${{ needs.extract_pushed_version.outputs.version }}
200 | gh release create "v$VERSION" dist/* --title "v$VERSION" --generate-notes
201 |
202 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/core/client.py:
--------------------------------------------------------------------------------
1 | """Boosty API client for accessing content."""
2 |
3 | from __future__ import annotations
4 |
5 | from http import HTTPStatus
6 | from typing import TYPE_CHECKING
7 |
8 | from aiolimiter import AsyncLimiter
9 | from pydantic import ValidationError
10 | from yarl import URL
11 |
12 | from boosty_downloader.src.infrastructure.boosty_api.core.endpoints import (
13 | BOOSTY_DEFAULT_BASE_URL,
14 | )
15 | from boosty_downloader.src.infrastructure.boosty_api.models.post.extra import Extra
16 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post import PostDTO
17 | from boosty_downloader.src.infrastructure.boosty_api.models.post.posts_request import (
18 | PostsResponse,
19 | )
20 | from boosty_downloader.src.infrastructure.boosty_api.utils.filter_none_params import (
21 | filter_none_params,
22 | )
23 |
24 | if TYPE_CHECKING:
25 | from collections.abc import AsyncGenerator, Mapping
26 |
27 | from aiohttp import ClientResponse
28 | from aiohttp_retry import RetryClient
29 | from pydantic_core import ErrorDetails
30 |
31 |
32 | class BoostyAPIError(Exception):
33 | """Base class for all Boosty API related errors."""
34 |
35 |
36 | class BoostyAPINoUsernameError(BoostyAPIError):
37 | """Raised when no username is specified."""
38 |
39 | username: str
40 |
41 | def __init__(self, username: str) -> None:
42 | super().__init__(f'Username not found: {username}')
43 | self.username = username
44 |
45 |
46 | class BoostyAPIUnauthorizedError(BoostyAPIError):
47 | """Raised when authorization error occurs, e.g when credentials is invalid."""
48 |
49 |
50 | class BoostyAPIUnknownError(BoostyAPIError):
51 | """Raised when Boosty returns unexpected error."""
52 |
53 | details: str
54 |
55 | def __init__(self, status_code: int, details: str) -> None:
56 | super().__init__(f'Boosty returned unknown error[{status_code}]: {details}')
57 | self.details = details
58 |
59 |
60 | class BoostyAPIValidationError(BoostyAPIError):
61 | """
62 | Raised when validation error occurs, e.g. when response data is invalid.
63 |
64 | It can happen if the API response structure changes.
65 | In that case the client should be updated to match the new structure.
66 | """
67 |
68 | errors: list[ErrorDetails]
69 |
70 | def __init__(self, errors: list[ErrorDetails]) -> None:
71 | super().__init__('Boosty API response validation error')
72 | self.errors = errors
73 |
74 |
75 | def _create_limiter(request_delay_seconds: float) -> AsyncLimiter | None:
76 | # aiolimiter expects max_rate and time_period to be positive.
77 | # For delays <1s, we use a 1-second window and scale the rate to avoid exceptions and ensure correct throttling.
78 | # For delays >=1s, we allow 1 request per delay period, matching the intended throttle.
79 | # Without this logic, certain values (e.g. delay=0.5) would cause aiolimiter to raise or throttle incorrectly.
80 | if request_delay_seconds > 0:
81 | if request_delay_seconds < 1:
82 | max_rate = 1 / request_delay_seconds
83 | time_period = 1
84 | else:
85 | max_rate = 1
86 | time_period = request_delay_seconds
87 | return AsyncLimiter(max_rate=max_rate, time_period=time_period)
88 | return None
89 |
90 |
91 | class BoostyAPIClient:
92 | """
93 | Main client class for the Boosty API.
94 |
95 | The session you provide to this class MUST NOT CONTAIN BASE URL.
96 | It should only contain headers and cookies. Base url is set internally.
97 |
98 | It handles the connection and makes requests to the API.
99 | To work with private/paid posts you need to provide valid authentication token and cookies in the session.
100 | """
101 |
102 | def __init__(
103 | self,
104 | session: RetryClient,
105 | request_delay_seconds: float = 0.0,
106 | base_url: URL | None = None,
107 | ) -> None:
108 | self._base_url = base_url or BOOSTY_DEFAULT_BASE_URL
109 | self.session = session
110 | self._limiter = _create_limiter(request_delay_seconds)
111 |
112 | async def _throttled_get(
113 | self,
114 | endpoint: str,
115 | params: Mapping[str, str] | None = None,
116 | headers: Mapping[str, str] | None = None,
117 | ) -> ClientResponse:
118 | url = URL(self._base_url) / endpoint.lstrip('/')
119 |
120 | if self._limiter:
121 | async with self._limiter:
122 | return await self.session.get(url, params=params, headers=headers)
123 | return await self.session.get(url, params=params, headers=headers)
124 |
125 | async def get_author_posts(
126 | self,
127 | author_name: str,
128 | limit: int,
129 | offset: str | None = None,
130 | ) -> PostsResponse:
131 | """
132 | Request to get posts from the specified author.
133 |
134 | The request supports pagination, so the response contains meta info.
135 | If you want to get all posts, you need to repeat the request with the offset of previous response
136 | until the 'is_last' field becomes True.
137 | """
138 | endpoint = f'blog/{author_name}/post/'
139 |
140 | posts_raw = await self._throttled_get(
141 | endpoint,
142 | params=filter_none_params(
143 | {
144 | 'offset': offset,
145 | 'limit': limit,
146 | },
147 | ),
148 | )
149 | posts_data = await posts_raw.json()
150 |
151 | if posts_raw.status == HTTPStatus.NOT_FOUND:
152 | raise BoostyAPINoUsernameError(author_name)
153 |
154 | # This will be returned if the user has creds but they're invalid/expired
155 | if posts_raw.status == HTTPStatus.UNAUTHORIZED:
156 | raise BoostyAPIUnauthorizedError
157 |
158 | if posts_raw.status != HTTPStatus.OK:
159 | raise BoostyAPIUnknownError(
160 | posts_raw.status, f'Unexpected status code: {posts_raw.status}'
161 | )
162 |
163 | try:
164 | posts: list[PostDTO] = [
165 | PostDTO.model_validate(post) for post in posts_data['data']
166 | ]
167 | extra: Extra = Extra.model_validate(posts_data['extra'])
168 | except ValidationError as e:
169 | raise BoostyAPIValidationError(errors=e.errors()) from e
170 |
171 | return PostsResponse(
172 | posts=posts,
173 | extra=extra,
174 | )
175 |
176 | async def iterate_over_posts(
177 | self,
178 | author_name: str,
179 | posts_per_page: int = 5,
180 | ) -> AsyncGenerator[PostsResponse, None]:
181 | """
182 | Infinite generator iterating over posts of the specified author.
183 |
184 | The generator will yield all posts of the author, paginating internally.
185 | """
186 | offset = None
187 | while True:
188 | response = await self.get_author_posts(
189 | author_name,
190 | offset=offset,
191 | limit=posts_per_page,
192 | )
193 | yield response
194 | if response.extra.is_last:
195 | break
196 | offset = response.extra.offset
197 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/external_videos_downloader/external_videos_downloader.py:
--------------------------------------------------------------------------------
1 | """Manager for downloading external videos (e.g., YouTube, Vimeo) with progress reporting."""
2 | # ruff: noqa: I001
3 |
4 | from __future__ import annotations
5 |
6 | import contextlib
7 | from collections.abc import Callable
8 | from dataclasses import dataclass
9 | from pathlib import Path
10 | from typing import Any, ClassVar, cast
11 |
12 | from yt_dlp.YoutubeDL import YoutubeDL
13 | from yt_dlp.utils import DownloadError
14 |
15 | YtDlOptions = dict[str, object]
16 | ExternalVideoDownloadProgressHook = Callable[['ExternalVideoDownloadStatus'], None]
17 |
18 |
19 | class ExtVideoError(Exception):
20 | """Base class for external video download errors."""
21 |
22 |
23 | class ExtVideoInfoError(ExtVideoError):
24 | """Raised when video information (e.g., title) cannot be extracted."""
25 |
26 | def __init__(self, url: str) -> None:
27 | self.video_url = url
28 |
29 |
30 | class ExtVideoDownloadError(ExtVideoError):
31 | """Raised when the video download fails."""
32 |
33 | def __init__(self, url: str) -> None:
34 | self.video_url = url
35 |
36 |
37 | class ExtVideoInterruptedByUserError(ExtVideoError):
38 | """Raised when the user interrupts the download (Ctrl+C)."""
39 |
40 |
41 | @dataclass(slots=True)
42 | class ExternalVideoDownloadStatus:
43 | """Status payload for reporting external video download progress."""
44 |
45 | name: str
46 | total_bytes: int | None
47 | downloaded_bytes: int | None
48 | speed: float | None
49 | percentage: float
50 | delta_bytes: int
51 |
52 |
53 | @dataclass(slots=True)
54 | class _HookState:
55 | """Internal state holder for tracking the status of an external video download."""
56 |
57 | last_downloaded: int = 0
58 | final_filename: Path | None = None
59 |
60 |
61 | class _SilentLogger:
62 | """
63 | Silly hack for yt-dlp to supress any noisy logging output.
64 |
65 | For logging use ExternalVideoDownloadStatus with progress callback.
66 | And for errors use the downloader exceptions.
67 | """
68 |
69 | def debug(self, msg: str) -> None:
70 | pass
71 |
72 | def info(self, msg: str) -> None:
73 | pass
74 |
75 | def warning(self, msg: str) -> None:
76 | pass
77 |
78 | def error(self, msg: str) -> None:
79 | pass
80 |
81 | def critical(self, msg: str) -> None:
82 | pass
83 |
84 |
85 | class ExternalVideosDownloader:
86 | """Manager for downloading external videos (YouTube, Vimeo) with a 720p preference."""
87 |
88 | # Prefer 720p when available, otherwise choose the best >720
89 | _default_ydl_options: ClassVar[YtDlOptions] = {
90 | 'format': 'bv*[height=720]+ba/bv*[height>720]+ba/bv*+ba/b',
91 | 'quiet': True,
92 | 'no_warnings': True,
93 | 'no_color': True,
94 | 'noprogress': True, # Use progress hook instead
95 | 'logger': _SilentLogger(), # Suppress noisy error logging
96 | }
97 |
98 | def download_video(
99 | self,
100 | url: str,
101 | destination_directory: Path,
102 | progress_hook: ExternalVideoDownloadProgressHook | None = None,
103 | ) -> Path:
104 | """Download video using yt-dlp and repeatedly report progress via progress_hook callback until completion."""
105 | info = self._probe_video(url)
106 | title = info.get('title')
107 | if not isinstance(title, str) or not title.strip():
108 | raise ExtVideoInfoError(url)
109 |
110 | clean_title = self._sanitize_title(title)
111 | destination_directory.mkdir(parents=True, exist_ok=True)
112 |
113 | outtmpl = self._build_outtmpl(destination_directory, clean_title)
114 |
115 | state = _HookState()
116 | internal_hook = self._make_progress_hook(outtmpl, progress_hook, state)
117 |
118 | options: YtDlOptions = self._default_ydl_options.copy()
119 | options['outtmpl'] = outtmpl
120 | options['progress_hooks'] = [internal_hook]
121 |
122 | try:
123 | with YoutubeDL(params=options) as ydl:
124 | try:
125 | # yt-dlp isn't typed; cast to Any and coerce to int
126 | errors: int = int(cast('Any', ydl).download([url]))
127 | except KeyboardInterrupt as e:
128 | raise ExtVideoInterruptedByUserError from e
129 |
130 | if errors != 0:
131 | raise ExtVideoDownloadError(url)
132 |
133 | except DownloadError as e:
134 | raise ExtVideoError(url) from e
135 |
136 | if state.final_filename is not None:
137 | return state.final_filename
138 |
139 | ext = info.get('ext')
140 | guessed_ext = ext if isinstance(ext, str) and ext else 'mp4'
141 | return destination_directory / f'{clean_title}.{guessed_ext}'
142 |
143 | def _probe_video(self, url: str) -> dict[str, Any]:
144 | # Extract metadata without downloading to validate and fetch title/ext.
145 | try:
146 | with YoutubeDL({**self._default_ydl_options, 'skip_download': True}) as ydl:
147 | raw = cast('Any', ydl).extract_info(url, download=False)
148 | except DownloadError as e:
149 | raise ExtVideoInfoError(url) from e
150 |
151 | if not isinstance(raw, dict):
152 | raise ExtVideoInfoError(url)
153 | return cast('dict[str, Any]', raw)
154 |
155 | @staticmethod
156 | def _sanitize_title(text: str) -> str:
157 | # Cross-platform safe subset.
158 | return ''.join(ch for ch in text if ch.isalnum() or ch == ' ')
159 |
160 | @staticmethod
161 | def _build_outtmpl(destination_directory: Path, title: str) -> str:
162 | return str(destination_directory / f'{title}.%(ext)s')
163 |
164 | def _make_progress_hook(
165 | self,
166 | outtmpl: str,
167 | user_hook: ExternalVideoDownloadProgressHook | None,
168 | state: _HookState,
169 | ) -> Callable[[dict[str, Any]], None]:
170 | def _hook(d: dict[str, Any]) -> None:
171 | filename = d.get('filename') or d.get('tmpfilename') or outtmpl
172 | name = Path(str(filename)).name
173 |
174 | total = d.get('total_bytes') or d.get('total_bytes_estimate')
175 | downloaded = d.get('downloaded_bytes')
176 | speed = d.get('speed')
177 |
178 | total_i = int(total) if isinstance(total, (int, float)) else None
179 | downloaded_i = (
180 | int(downloaded) if isinstance(downloaded, (int, float)) else None
181 | )
182 | speed_f = float(speed) if isinstance(speed, (int, float)) else None
183 |
184 | if total_i and downloaded_i is not None and total_i > 0:
185 | percentage = (downloaded_i / total_i) * 100.0
186 | else:
187 | percentage = 0.0
188 |
189 | if downloaded_i is not None:
190 | delta = downloaded_i - state.last_downloaded
191 | state.last_downloaded = downloaded_i
192 | else:
193 | delta = 0
194 |
195 | status_payload = ExternalVideoDownloadStatus(
196 | name=name,
197 | total_bytes=total_i,
198 | downloaded_bytes=downloaded_i,
199 | speed=speed_f,
200 | percentage=percentage,
201 | delta_bytes=delta,
202 | )
203 |
204 | if user_hook is not None:
205 | with contextlib.suppress(Exception):
206 | user_hook(status_payload)
207 |
208 | if d.get('status') in {'finished', 'postprocessing'}:
209 | f = d.get('filename')
210 | if isinstance(f, str):
211 | state.final_filename = Path(f)
212 |
213 | return _hook
214 |
--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/post_caching/post_cache.py:
--------------------------------------------------------------------------------
1 | """Implementation of a post cache using SQLAlchemy + SQLite local database."""
2 |
3 | from datetime import datetime
4 | from pathlib import Path
5 | from types import TracebackType
6 |
7 | from sqlalchemy import String, create_engine, text
8 | from sqlalchemy.exc import DatabaseError, OperationalError
9 | from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column, sessionmaker
10 |
11 | from boosty_downloader.src.application.filtering import (
12 | DownloadContentTypeFilter,
13 | )
14 | from boosty_downloader.src.infrastructure.loggers.base import RichLogger
15 |
16 |
17 | class Base(DeclarativeBase):
18 | """Base class for SQLAlchemy models."""
19 |
20 |
21 | class _PostCacheEntryModel(Base):
22 | """Internal sqlite table structure of the caching layer"""
23 |
24 | __tablename__ = 'post_cache'
25 | _Iso8601Datetime = str
26 |
27 | post_uuid: Mapped[str] = mapped_column(String, primary_key=True)
28 |
29 | # Flags to see which parts of the posts were downloaded and which are not.
30 | files_downloaded: Mapped[bool] = mapped_column(default=False, nullable=False)
31 | post_content_downloaded: Mapped[bool] = mapped_column(default=False, nullable=False)
32 | external_videos_downloaded: Mapped[bool] = mapped_column(
33 | default=False, nullable=False
34 | )
35 | boosty_videos_downloaded: Mapped[bool] = mapped_column(
36 | default=False, nullable=False
37 | )
38 |
39 | # Timestamp of the last update of the post.
40 | # Useful to determine if the post is outdated and needs to be re-downloaded even if some parts were downloaded before.
41 | #
42 | # Should be in ISO 8601 format (e.g., "2023-10-01T12:00:00Z").
43 | # because SQLite does not have a native tz-aware datetime type.
44 | last_updated_timestamp: Mapped[_Iso8601Datetime] = mapped_column(
45 | String, nullable=False
46 | )
47 |
48 |
49 | class SQLitePostCache:
50 | """
51 | Post cache using SQLite with SQLAlchemy.
52 |
53 | Caches posts in a local SQLite database under a given directory.
54 | Automatically reinitializes the database if it's missing or corrupted.
55 |
56 | Caching mechanism is smart enough to determine which specific parts are up-to-date
57 | and which are not.
58 | """
59 |
60 | DEFAULT_CACHE_FILENAME = 'post_cache.db'
61 |
62 | def __enter__(self) -> 'SQLitePostCache':
63 | """Create a context manager for the SQLitePostCache."""
64 | return self
65 |
66 | def __exit__(
67 | self,
68 | exc_type: type[BaseException] | None,
69 | exc_value: BaseException | None,
70 | exc_tb: TracebackType | None,
71 | ) -> None:
72 | """Ensure that the database connection is closed when exiting the context."""
73 | self.close()
74 |
75 | def __init__(self, destination: Path, logger: RichLogger) -> None:
76 | """Make a connection with the SQLite database and create/init it if necessary."""
77 | self.logger = logger
78 |
79 | self.destination = destination
80 | self.db_file: Path = self.destination / self.DEFAULT_CACHE_FILENAME
81 | self.db_file.parent.mkdir(parents=True, exist_ok=True)
82 |
83 | self.engine = create_engine(f'sqlite:///{self.db_file}')
84 | Base.metadata.create_all(self.engine)
85 |
86 | self.Session = sessionmaker(bind=self.engine, expire_on_commit=False)
87 | self.session: Session = self.Session()
88 | self._dirty = False
89 |
90 | def _check_db_integrity(self) -> bool:
91 | """Check if post_cache table is available and the db itself is accessible."""
92 | try:
93 | # Ping the database to check if it's accessible
94 | self.session.execute(text('SELECT 1 FROM post_cache LIMIT 1'))
95 | # Ensure the expected schema (column names) is present; reinit if legacy schema is detected
96 | self.session.execute(text('SELECT post_uuid FROM post_cache LIMIT 1'))
97 | except (OperationalError, DatabaseError):
98 | return False
99 | else:
100 | return True
101 |
102 | def _reinitialize_db(self) -> None:
103 | """Reinitialize the database (recreate it from scratch) and recreate session."""
104 | self.session.close()
105 | self.engine.dispose()
106 |
107 | if self.db_file.exists():
108 | self.db_file.unlink() # Remove the corrupted file
109 |
110 | self.engine = create_engine(f'sqlite:///{self.db_file}')
111 | Base.metadata.create_all(self.engine)
112 | self.session = self.Session()
113 |
114 | def _ensure_valid(self) -> None:
115 | """Maintenance method to ensure the database is valid before use."""
116 | if not self._check_db_integrity():
117 | self.logger.error(
118 | 'Post cache database is corrupted or inaccessible. Reinitializing...'
119 | )
120 | self._reinitialize_db()
121 |
122 | def commit(self) -> None:
123 | """
124 | Commit any pending changes to the database if there are modifications.
125 |
126 | This method should be called after making changes to the database (e.g., adding,
127 | updating, or deleting records) to ensure that the changes are persisted.
128 | The `_dirty` flag is used to track whether there are uncommitted changes.
129 | """
130 | if self._dirty:
131 | self.session.commit()
132 | self._dirty = False
133 |
134 | def cache(
135 | self,
136 | post_uuid: str,
137 | updated_at: datetime,
138 | was_downloaded: list[DownloadContentTypeFilter],
139 | ) -> None:
140 | """Cache a post by its UUID and updated_at timestamp."""
141 | self._ensure_valid()
142 |
143 | entry = self.session.get(_PostCacheEntryModel, post_uuid)
144 |
145 | files_downloaded = DownloadContentTypeFilter.files in was_downloaded
146 | boosty_videos_downloaded = (
147 | DownloadContentTypeFilter.boosty_videos in was_downloaded
148 | )
149 | post_content_downloaded = (
150 | DownloadContentTypeFilter.post_content in was_downloaded
151 | )
152 | external_videos_downloaded = (
153 | DownloadContentTypeFilter.external_videos in was_downloaded
154 | )
155 |
156 | # If post already existed - just update False fields to True.
157 | if entry:
158 | entry.last_updated_timestamp = updated_at.isoformat()
159 | entry.files_downloaded = files_downloaded or entry.files_downloaded
160 | entry.boosty_videos_downloaded = (
161 | boosty_videos_downloaded or entry.boosty_videos_downloaded
162 | )
163 | entry.post_content_downloaded = (
164 | post_content_downloaded or entry.post_content_downloaded
165 | )
166 | entry.external_videos_downloaded = (
167 | external_videos_downloaded or entry.external_videos_downloaded
168 | )
169 | else:
170 | entry = _PostCacheEntryModel(
171 | post_uuid=post_uuid,
172 | last_updated_timestamp=updated_at.isoformat(),
173 | files_downloaded=files_downloaded,
174 | boosty_videos_downloaded=boosty_videos_downloaded,
175 | post_content_downloaded=post_content_downloaded,
176 | external_videos_downloaded=external_videos_downloaded,
177 | )
178 | self.session.add(entry)
179 |
180 | self._dirty = True
181 |
182 | def get_missing_parts(
183 | self,
184 | post_uuid: str,
185 | updated_at: datetime,
186 | required: list[DownloadContentTypeFilter],
187 | ) -> list[DownloadContentTypeFilter]:
188 | """
189 | Determine which parts of the post still need to be downloaded.
190 |
191 | Returns all required parts if the post is missing or outdated; otherwise, returns only those parts that haven't been
192 | downloaded yet based on the current cache state.
193 | """
194 | self._ensure_valid()
195 | post = self.session.get(_PostCacheEntryModel, post_uuid)
196 | if not post:
197 | return required
198 |
199 | # If cached post is outdated in general, just mark all required parts as missing.
200 | if datetime.fromisoformat(post.last_updated_timestamp) < updated_at:
201 | return required
202 |
203 | missing: list[DownloadContentTypeFilter] = [
204 | part
205 | for part in required
206 | if (
207 | (part is DownloadContentTypeFilter.files and not post.files_downloaded)
208 | or (
209 | part is DownloadContentTypeFilter.boosty_videos
210 | and not post.boosty_videos_downloaded
211 | )
212 | or (
213 | part is DownloadContentTypeFilter.external_videos
214 | and not post.external_videos_downloaded
215 | )
216 | or (
217 | part is DownloadContentTypeFilter.post_content
218 | and not post.post_content_downloaded
219 | )
220 | )
221 | ]
222 |
223 | return missing
224 |
225 | def remove_cache_completely(self) -> None:
226 | """Reinitialize the cache completely in case if user wants to start fresh."""
227 | self._reinitialize_db()
228 |
229 | def close(self) -> None:
230 | """Save and close the database connection."""
231 | self.commit()
232 | self.session.close()
233 | self.engine.dispose()
234 |
--------------------------------------------------------------------------------
/boosty_downloader/src/interfaces/console_progress_reporter.py:
--------------------------------------------------------------------------------
1 | """
2 | Progress reporting and logging utilities for console-based Boosty downloader interface.
3 |
4 | Includes a ProgressReporter class for rich progress bars and logging, and a FakeDownloader for demonstration/testing.
5 | """
6 |
7 | import asyncio
8 | import logging
9 | import secrets
10 | import uuid
11 | from collections.abc import AsyncGenerator, Sequence
12 | from contextlib import asynccontextmanager
13 |
14 | from rich.console import Console
15 | from rich.logging import RichHandler
16 | from rich.progress import (
17 | BarColumn,
18 | Progress,
19 | SpinnerColumn,
20 | TaskID,
21 | TaskProgressColumn,
22 | TimeElapsedColumn,
23 | )
24 |
25 | from boosty_downloader.src.infrastructure.loggers.base import RichLogger
26 |
27 |
28 | class ProgressReporter:
29 | """
30 | Provides progress bar management and rich logging for console-based interfaces using the Rich library.
31 |
32 | Tasks are identified by UUIDs and can be nested using `level` to visually indent sub-tasks.
33 | """
34 |
35 | def __init__(
36 | self,
37 | console: Console | None = None,
38 | logger: logging.Logger | None = None,
39 | ) -> None:
40 | self.console = console or Console()
41 | self.progress = Progress(
42 | SpinnerColumn(),
43 | '[progress.description]{task.description}',
44 | BarColumn(),
45 | TaskProgressColumn(),
46 | TimeElapsedColumn(),
47 | console=self.console,
48 | refresh_per_second=29,
49 | transient=True,
50 | )
51 | self._logger = logger or self._create_default_logger()
52 | self._uuid_to_task_id: dict[uuid.UUID, TaskID] = {}
53 | self._uuid_to_level: dict[uuid.UUID, int] = {}
54 | self._uuid_to_name: dict[uuid.UUID, str] = {}
55 |
56 | def _create_default_logger(self) -> logging.Logger:
57 | logger = logging.getLogger('ProgressLogger')
58 | logger.setLevel(logging.INFO)
59 | logger.addHandler(
60 | RichHandler(
61 | console=self.console, show_time=True, markup=True, show_path=False
62 | )
63 | )
64 | return logger
65 |
66 | def _format_description(self, name: str, level: int) -> str:
67 | indent = ' ' * level
68 | max_length = 80
69 | available = max_length - len(indent)
70 |
71 | if len(name) > available:
72 | name = name[: available - 1] + '…' # use ellipsis
73 |
74 | return f'{indent}{name}'
75 |
76 | def start(self) -> None:
77 | self.progress.start()
78 |
79 | def stop(self) -> None:
80 | self.progress.stop()
81 |
82 | def create_task(
83 | self, name: str, total: int | None = None, indent_level: int = 0
84 | ) -> uuid.UUID:
85 | task_id = self.progress.add_task(
86 | self._format_description(name, indent_level), total=total
87 | )
88 | task_uuid = uuid.uuid4()
89 | self._uuid_to_task_id[task_uuid] = task_id
90 | self._uuid_to_level[task_uuid] = indent_level
91 | self._uuid_to_name[task_uuid] = name
92 | return task_uuid
93 |
94 | def update_task(
95 | self,
96 | task_uuid: uuid.UUID,
97 | advance: int = 1,
98 | total: int | None = None,
99 | description: str | None = None,
100 | ) -> None:
101 | task_id = self._uuid_to_task_id.get(task_uuid)
102 | if task_id is not None and task_id in self.progress.task_ids:
103 | level = self._uuid_to_level.get(task_uuid, 0)
104 | base_name = description or self._uuid_to_name.get(task_uuid, '')
105 | formatted_description = self._format_description(base_name, level)
106 | self.progress.update(
107 | task_id,
108 | advance=advance,
109 | total=total,
110 | description=formatted_description,
111 | )
112 |
113 | def complete_task(self, task_uuid: uuid.UUID) -> None:
114 | task_id = self._uuid_to_task_id.get(task_uuid)
115 | if task_id is not None and task_id in self.progress.task_ids:
116 | total = self.progress.tasks[task_id].total
117 | self.progress.update(task_id, completed=total, visible=False)
118 | self._uuid_to_task_id.pop(task_uuid, None)
119 | self._uuid_to_level.pop(task_uuid, None)
120 | self._uuid_to_name.pop(task_uuid, None)
121 |
122 | def newline(self, count: int = 1) -> None:
123 | for _ in range(count):
124 | self.console.print()
125 |
126 | def headline_rule(self) -> None:
127 | self.console.rule()
128 |
129 | def info(self, message: str) -> None:
130 | self._logger.info(message)
131 |
132 | def success(self, message: str) -> None:
133 | self._logger.info(f'[bold green]✔ {message}[/bold green]')
134 |
135 | def warn(self, message: str) -> None:
136 | self._logger.warning(f'[bold yellow]⚠ {message}[/bold yellow]')
137 |
138 | def error(self, message: str) -> None:
139 | self._logger.error(f'[bold red]✖ {message}[/bold red]')
140 |
141 | def notice(self, message: str) -> None:
142 | self.console.print(
143 | f'[bold yellow]NOTICE:[/bold yellow] {message}', highlight=False
144 | )
145 |
146 | def log_list(self, title: str, items: Sequence[str]) -> None:
147 | self.console.print(f'[bold cyan]{title}[/bold cyan]:')
148 | for item in items:
149 | self.console.print(f' • {item}')
150 |
151 |
152 | @asynccontextmanager
153 | async def use_reporter(
154 | reporter: ProgressReporter,
155 | ) -> AsyncGenerator[ProgressReporter, None]:
156 | """Async context manager to start and stop a ProgressReporter instance."""
157 | try:
158 | reporter.start()
159 | yield reporter
160 | finally:
161 | reporter.stop()
162 |
163 |
164 | # ------------------------------------------------------------------------------
165 | # Usage example: run it as a script to see how it works:
166 | # poetry run boosty_downloader .../console_progress_reporter.py
167 |
168 | if __name__ == '__main__':
169 | import asyncio
170 |
171 | class FakeDownloader:
172 | """Just Stupid faker"""
173 |
174 | def __init__(self, reporter: ProgressReporter) -> None:
175 | self.reporter = reporter
176 |
177 | async def iterate_pages(
178 | self, total_pages: int = 3, posts_per_page: int = 5
179 | ) -> AsyncGenerator[list[str], None]:
180 | """Simulate stuff"""
181 | for page_num in range(1, total_pages + 1):
182 | await asyncio.sleep(0.5)
183 | posts = [
184 | f'post_{(page_num - 1) * posts_per_page + i + 1:02}'
185 | for i in range(posts_per_page)
186 | ]
187 | yield posts
188 |
189 | async def download_file(self, task_name: str, size_kb: int) -> None:
190 | """Simulate downloading a file of size size_kb KB with progress"""
191 | chunk_size = 50
192 | total_chunks = (size_kb + chunk_size - 1) // chunk_size
193 | download_task_id = self.reporter.create_task(task_name, total=total_chunks)
194 |
195 | for chunk in range(total_chunks):
196 | # Simulate delay proportional to chunk size
197 | await asyncio.sleep(secrets.randbelow(11) / 100 + 0.05)
198 | self.reporter.update_task(
199 | download_task_id,
200 | advance=1,
201 | description=f'{task_name} [{min((chunk + 1) * chunk_size, size_kb)} KB / {size_kb} KB]',
202 | )
203 | self.reporter.complete_task(download_task_id)
204 |
205 | async def download_all_posts(self, username: str) -> None:
206 | """Simulate downloading all posts for a user with progress reporting"""
207 | self.reporter.notice(f'Starting download for user: {username}')
208 | self.reporter.headline_rule()
209 |
210 | total_posts = None
211 | download_task_id = self.reporter.create_task('posts', total=total_posts)
212 |
213 | downloaded_posts = 0
214 |
215 | async for posts in self.iterate_pages():
216 | self.reporter.info(f'Loaded new page with {len(posts)} posts')
217 |
218 | for post_title in posts:
219 | self.reporter.info(f'Processing post: {post_title}')
220 |
221 | if secrets.randbelow(10) == 0:
222 | self.reporter.warn(f'Skipping inaccessible post: {post_title}')
223 | self.reporter.update_task(download_task_id, advance=1)
224 | continue
225 |
226 | files = {
227 | 'image_1': secrets.randbelow(201) + 100, # 100-300 KB
228 | 'video_1': secrets.randbelow(1501) + 1000, # 1-2.5 MB
229 | 'attachment_1': secrets.randbelow(301) + 200, # 200-500 KB
230 | }
231 |
232 | for fname, size_kb in files.items():
233 | task_name = f'{post_title}::{fname}'
234 | await self.download_file(task_name, size_kb)
235 | self.reporter.success(f'Finished {fname} of {post_title}')
236 |
237 | downloaded_posts += 1
238 | self.reporter.update_task(download_task_id, advance=1)
239 |
240 | self.reporter.headline_rule()
241 |
242 | self.reporter.success(f'✅ Finished downloading {downloaded_posts} posts.')
243 |
244 | async def main() -> None:
245 | """Run a demonstration of the FakeDownloader with progress reporting."""
246 | logger = RichLogger('dumb')
247 |
248 | reporter = ProgressReporter(
249 | logger=logger.logging_logger_obj,
250 | console=logger.console,
251 | )
252 | async with use_reporter(reporter):
253 | downloader = FakeDownloader(reporter)
254 | await downloader.download_all_posts('demo_user')
255 |
256 | asyncio.run(main())
257 |
--------------------------------------------------------------------------------