├── test
    ├── unit
    │   ├── __init__.py
    │   ├── download_manager
    │   │   └── ok_video_ranking_test.py
    │   └── html_generator
    │   │   └── html_templates_test.py
    ├── integration
    │   ├── __init__.py
    │   ├── analysis
    │   │   ├── __init__.py
    │   │   └── get_author_posts_test.py
    │   ├── boosty_api
    │   │   ├── __init__.py
    │   │   └── boosty_api_test.py
    │   ├── .env.example
    │   ├── configuration.py
    │   └── fixtures.py
    └── ABOUT_TESTING.md
├── boosty_downloader
    ├── __init__.py
    └── src
    │   ├── __init__.py
    │   ├── domain
    │       ├── __init__.py
    │       ├── post.py
    │       └── post_data_chunks.py
    │   ├── application
    │       ├── __init__.py
    │       ├── di
    │       │   ├── __init__.py
    │       │   ├── download_context.py
    │       │   └── app_environment.py
    │       ├── exceptions
    │       │   ├── __init__.py
    │       │   └── application_errors.py
    │       ├── use_cases
    │       │   ├── __init__.py
    │       │   ├── check_total_posts.py
    │       │   ├── download_specific_post.py
    │       │   └── download_all_posts.py
    │       ├── mappers
    │       │   ├── image.py
    │       │   ├── file.py
    │       │   ├── external_video.py
    │       │   ├── __init__.py
    │       │   ├── ok_boosty_video.py
    │       │   ├── list.py
    │       │   ├── html_converter.py
    │       │   ├── post_mapper.py
    │       │   └── link_header_text.py
    │       ├── filtering.py
    │       └── ok_video_ranking.py
    │   ├── infrastructure
    │       ├── __init__.py
    │       ├── loggers
    │       │   ├── __init__.py
    │       │   ├── logger_instances.py
    │       │   ├── failed_downloads_logger.py
    │       │   └── base.py
    │       ├── html_reporter
    │       │   ├── __init__.py
    │       │   └── html_reporter.py
    │       ├── post_caching
    │       │   ├── __init__.py
    │       │   └── post_cache.py
    │       ├── update_checker
    │       │   ├── __init__.py
    │       │   └── pypi_checker.py
    │       ├── boosty_api
    │       │   ├── core
    │       │   │   ├── __init__.py
    │       │   │   ├── endpoints.py
    │       │   │   └── client.py
    │       │   ├── models
    │       │   │   ├── __init__.py
    │       │   │   └── post
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── post_data_types
    │       │   │   │       ├── post_data_header.py
    │       │   │   │       ├── post_data_video.py
    │       │   │   │       ├── post_data_file.py
    │       │   │   │       ├── post_data_link.py
    │       │   │   │       ├── post_data_text.py
    │       │   │   │       ├── post_data_image.py
    │       │   │   │       ├── __init__.py
    │       │   │   │       ├── post_data_list.py
    │       │   │   │       └── post_data_ok_video.py
    │       │   │   │   ├── posts_request.py
    │       │   │   │   ├── extra.py
    │       │   │   │   ├── post.py
    │       │   │   │   └── base_post_data.py
    │       │   ├── utils
    │       │   │   ├── __init__.py
    │       │   │   ├── filter_none_params.py
    │       │   │   ├── textual_post_extractor.py
    │       │   │   └── auth_parsers.py
    │       │   └── __init__.py
    │       ├── yaml_configuration
    │       │   ├── __init__.py
    │       │   ├── sample_config.py
    │       │   └── config.py
    │       ├── external_videos_downloader
    │       │   ├── __init__.py
    │       │   └── external_videos_downloader.py
    │       ├── html_generator
    │       │   ├── templates
    │       │   │   ├── image.html
    │       │   │   ├── video.html
    │       │   │   ├── list.html
    │       │   │   ├── text.html
    │       │   │   └── base.html
    │       │   ├── __init__.py
    │       │   ├── models.py
    │       │   └── renderer.py
    │       ├── path_sanitizer.py
    │       ├── human_readable_filesize.py
    │       └── file_downloader.py
    │   └── interfaces
    │       ├── __init__.py
    │       ├── help_panels.py
    │       ├── cli_options.py
    │       └── console_progress_reporter.py
├── assets
    ├── usage.png
    ├── example1.png
    ├── example2.png
    ├── auth_guide.png
    ├── config_guide.png
    ├── screenshot.png
    ├── total_check.png
    └── boosty-black-badge.png
├── .github
    ├── renovate.json
    ├── pull_request_template.md
    └── workflows
    │   ├── any-pr-validation.yaml
    │   ├── release-pr-validation.yaml
    │   └── release.yaml
├── pyrightconfig.json
├── ruff.toml
├── LICENSE
├── pyproject.toml
├── CONTRIBUTING.md
├── Makefile
├── CHANGELOG.md
├── README.md
└── .gitignore


/test/unit/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/integration/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/integration/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/domain/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/integration/boosty_api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/di/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/interfaces/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/exceptions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/use_cases/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/loggers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_reporter/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/post_caching/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/update_checker/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/yaml_configuration/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/external_videos_downloader/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assets/usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/usage.png


--------------------------------------------------------------------------------
/assets/example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/example1.png


--------------------------------------------------------------------------------
/assets/example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/example2.png


--------------------------------------------------------------------------------
/assets/auth_guide.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/auth_guide.png


--------------------------------------------------------------------------------
/assets/config_guide.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/config_guide.png


--------------------------------------------------------------------------------
/assets/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/screenshot.png


--------------------------------------------------------------------------------
/assets/total_check.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/total_check.png


--------------------------------------------------------------------------------
/assets/boosty-black-badge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/HEAD/assets/boosty-black-badge.png


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/templates/image.html:
--------------------------------------------------------------------------------
1 | <img src="{{ image.url }}" alt="Image" style="max-width: 100%;">


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/__init__.py:
--------------------------------------------------------------------------------
1 | from .core.client import BoostyAPIClient
2 | 
3 | __all__ = [
4 |     'BoostyAPIClient',
5 | ]
6 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/core/endpoints.py:
--------------------------------------------------------------------------------
1 | """All constants for endpoints."""
2 | 
3 | BOOSTY_DEFAULT_BASE_URL = 'https://api.boosty.to/v1/'
4 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/templates/video.html:
--------------------------------------------------------------------------------
1 | <video controls>
2 |     <source src="{{ video.url }}" type="video/mp4">
3 |     Your browser does not support the video tag.
4 | </video>


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/loggers/logger_instances.py:
--------------------------------------------------------------------------------
1 | """Module contains loggers for different parts of the app"""
2 | 
3 | from boosty_downloader.src.infrastructure.loggers.base import RichLogger
4 | 
5 | downloader_logger = RichLogger('Boosty_Downloader')
6 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/interfaces/help_panels.py:
--------------------------------------------------------------------------------
 1 | """Defines panels for grouping arguments in the CLI help interface."""
 2 | 
 3 | from enum import Enum
 4 | 
 5 | 
 6 | class HelpPanels(str, Enum):
 7 |     """Panels for grouping arguments in the CLI help."""
 8 | 
 9 |     actions = 'Actions'
10 |     filtering = 'Filtering'
11 |     network = 'Network'
12 | 


--------------------------------------------------------------------------------
/.github/renovate.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://docs.renovatebot.com/renovate-schema.json",
 3 |   "extends": [
 4 |     "config:recommended"
 5 |   ],
 6 | 
 7 |   "dependencyDashboard": true,
 8 | 
 9 |   "updateLockFiles": true,
10 | 
11 |   "automerge": false,
12 |   "autoApprove": false,
13 |   "platformAutomerge": false,
14 |   "baseBranches": ["dev"]
15 | }
16 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/yaml_configuration/sample_config.py:
--------------------------------------------------------------------------------
 1 | """Helper functions for working with invalid values in the config."""
 2 | 
 3 | DEFAULT_YAML_CONFIG_VALUE = """
 4 | auth:
 5 |   # Insert your own cookie and auth header values here
 6 |   cookie: ''
 7 |   auth_header: ''
 8 | downloading_settings:
 9 |   target_directory: ./boosty-downloads
10 | """
11 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_header.py:
--------------------------------------------------------------------------------
 1 | """Header of the posts"""
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from pydantic import BaseModel
 6 | 
 7 | 
 8 | class BoostyPostDataHeaderDTO(BaseModel):
 9 |     """Header content piece in posts"""
10 | 
11 |     type: Literal['header']
12 |     content: str
13 |     modificator: str
14 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_video.py:
--------------------------------------------------------------------------------
 1 | """Usual video links (on youtube and other services)"""
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from pydantic import BaseModel
 6 | 
 7 | 
 8 | class BoostyPostDataExternalVideoDTO(BaseModel):
 9 |     """Video content piece in posts"""
10 | 
11 |     type: Literal['video']
12 |     url: str
13 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/utils/filter_none_params.py:
--------------------------------------------------------------------------------
 1 | """Just a little helper to make requests"""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Any
 6 | 
 7 | 
 8 | def filter_none_params(kwargs: dict[str, Any | None]) -> dict[str, Any]:
 9 |     """Remove None values from kwargs"""
10 |     return {k: v for k, v in kwargs.items() if v is not None}
11 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_file.py:
--------------------------------------------------------------------------------
 1 | """The module with file representation of posts data"""
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from pydantic import BaseModel
 6 | 
 7 | 
 8 | class BoostyPostDataFileDTO(BaseModel):
 9 |     """File content piece in posts"""
10 | 
11 |     type: Literal['file']
12 |     url: str
13 |     title: str
14 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_link.py:
--------------------------------------------------------------------------------
 1 | """Module with link representation of posts data"""
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from pydantic import BaseModel
 6 | 
 7 | 
 8 | class BoostyPostDataLinkDTO(BaseModel):
 9 |     """Link content piece in posts"""
10 | 
11 |     type: Literal['link']
12 |     url: str
13 |     content: str
14 |     explicit: bool
15 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_text.py:
--------------------------------------------------------------------------------
 1 | """The module with textual representation of posts data"""
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from pydantic import BaseModel
 6 | 
 7 | 
 8 | class BoostyPostDataTextDTO(BaseModel):
 9 |     """Textual content piece in posts"""
10 | 
11 |     type: Literal['text']
12 | 
13 |     content: str
14 |     modificator: str
15 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/path_sanitizer.py:
--------------------------------------------------------------------------------
 1 | """The modules helps with path sanitization to make it work on different platforms"""
 2 | 
 3 | import re
 4 | 
 5 | 
 6 | def sanitize_string(string: str) -> str:
 7 |     """Remove unsafe filesystem characters from a string"""
 8 |     # Convert path to a string and sanitize it
 9 |     unsafe_chars = r'[<>:"/\\|?*]'
10 |     return re.sub(unsafe_chars, '', str(string))
11 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_image.py:
--------------------------------------------------------------------------------
 1 | """The module with image representation of posts data"""
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from pydantic import BaseModel
 6 | 
 7 | 
 8 | class BoostyPostDataImageDTO(BaseModel):
 9 |     """Image content piece in posts"""
10 | 
11 |     type: Literal['image']
12 |     url: str
13 |     width: int | None = None
14 |     height: int | None = None
15 | 


--------------------------------------------------------------------------------
/pyrightconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "typeCheckingMode": "strict",
 3 |     "include": [
 4 |         "boosty_downloader"
 5 |     ],
 6 |     "exclude": [
 7 |         "**/node_modules",
 8 |         "**/__pycache__",
 9 |         "**/dist",
10 |         "**/build"
11 |     ],
12 |     "defineConstant": {
13 |         "DEBUG": true
14 |     },
15 |     "reportMissingImports": "error",
16 |     "reportMissingTypeStubs": false,
17 |     "pythonVersion": "3.10",
18 |     "pythonPlatform": "Windows",
19 | }


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/posts_request.py:
--------------------------------------------------------------------------------
 1 | """Models for posts responses to boosty.to"""
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | from boosty_downloader.src.infrastructure.boosty_api.models.post.extra import Extra
 6 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post import PostDTO
 7 | 
 8 | 
 9 | class PostsResponse(BaseModel):
10 |     """Model representing a response from a posts request"""
11 | 
12 |     posts: list[PostDTO]
13 |     extra: Extra
14 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/extra.py:
--------------------------------------------------------------------------------
 1 | """Models for meta info about posts or requests to boosty.to"""
 2 | 
 3 | from pydantic import BaseModel, ConfigDict
 4 | from pydantic.alias_generators import to_camel
 5 | 
 6 | 
 7 | class Extra(BaseModel):
 8 |     """Meta info for posts request, can be used for pagination mainly"""
 9 | 
10 |     is_last: bool
11 |     offset: str
12 | 
13 |     model_config = ConfigDict(
14 |         alias_generator=to_camel,
15 |         populate_by_name=True,
16 |         from_attributes=True,
17 |     )
18 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/image.py:
--------------------------------------------------------------------------------
 1 | """Image content mapper module to transform Boosty API DTO to domain model."""
 2 | 
 3 | from boosty_downloader.src.domain.post import PostDataChunkImage
 4 | from boosty_downloader.src.infrastructure.boosty_api.models.post.base_post_data import (
 5 |     BoostyPostDataImageDTO,
 6 | )
 7 | 
 8 | 
 9 | def to_domain_image_chunk(api_image: BoostyPostDataImageDTO) -> PostDataChunkImage:
10 |     """Convert API PostDataImage to domain PostDataChunkImage."""
11 |     return PostDataChunkImage(
12 |         url=api_image.url,
13 |     )
14 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/file.py:
--------------------------------------------------------------------------------
 1 | """Mapping functions for converting API PostDataFile objects to domain PostDataChunkFile objects."""
 2 | 
 3 | from boosty_downloader.src.domain.post import PostDataChunkFile
 4 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import (
 5 |     BoostyPostDataFileDTO,
 6 | )
 7 | 
 8 | 
 9 | def to_domain_file_chunk(
10 |     api_file: BoostyPostDataFileDTO, signed_query: str
11 | ) -> PostDataChunkFile:
12 |     """Convert API PostDataFile to domain PostDataChunkFile."""
13 |     return PostDataChunkFile(
14 |         url=api_file.url + signed_query,
15 |         filename=api_file.title,
16 |     )
17 | 


--------------------------------------------------------------------------------
/test/integration/.env.example:
--------------------------------------------------------------------------------
 1 | 
 2 | # Valid Boosty authentication token
 3 | BOOSTY_TOKEN=your_boosty_token_here
 4 | 
 5 | # Cookies for Boosty authentication (if required)
 6 | BOOSTY_COOKIES=your_boosty_cookies_here
 7 | 
 8 | # URL or ID of a post that is publicly accessible
 9 | BOOSTY_AVAILABLE_POST=https://boosty.to/author/posts/12345
10 | 
11 | # URL or ID of a post that exists but is behind a paywall or private
12 | BOOSTY_UNAVAILABLE_POST=https://boosty.to/author/posts/67890
13 | 
14 | # Username of an author that doesn't exist
15 | BOOSTY_NONEXISTENT_AUTHOR=nonexistent_author_username
16 | 
17 | # Username of an existing author with public content
18 | BOOSTY_EXISTING_AUTHOR=existing_author_username
19 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/utils/textual_post_extractor.py:
--------------------------------------------------------------------------------
 1 | """Module to extract textual content from a post by its chunks"""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import json
 6 | from io import StringIO
 7 | 
 8 | 
 9 | def extract_textual_content(
10 |     content: str,
11 | ) -> str:
12 |     """Extract textual content from a post chunk Link/Text"""
13 |     buffer = StringIO()
14 | 
15 |     # Merge all the text and link fragments into one file
16 |     try:
17 |         json_data: list[str] = json.loads(content)
18 |     except json.JSONDecodeError:
19 |         return buffer.getvalue()
20 | 
21 |     if len(json_data) == 0:
22 |         return buffer.getvalue()
23 | 
24 |     clean_text = str(json_data[0])
25 | 
26 |     buffer.write(clean_text)
27 | 
28 |     return buffer.getvalue()
29 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/external_video.py:
--------------------------------------------------------------------------------
 1 | """Mapping functions for converting external video API DTOs to domain objects."""
 2 | 
 3 | from boosty_downloader.src.domain.post import PostDataChunkExternalVideo
 4 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import (
 5 |     BoostyPostDataExternalVideoDTO,
 6 | )
 7 | 
 8 | 
 9 | def to_external_video_content(
10 |     api_video_dto: BoostyPostDataExternalVideoDTO,
11 | ) -> PostDataChunkExternalVideo:
12 |     """
13 |     Convert API video data to domain external video content object.
14 | 
15 |     It uses the PostDataVideo DTO to extract the video URL and other metadata
16 |     to create a domain external video content object.
17 |     """
18 |     return PostDataChunkExternalVideo(
19 |         url=api_video_dto.url,
20 |     )
21 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/__init__.py:
--------------------------------------------------------------------------------
 1 | from .post_data_file import BoostyPostDataFileDTO
 2 | from .post_data_header import BoostyPostDataHeaderDTO
 3 | from .post_data_image import BoostyPostDataImageDTO
 4 | from .post_data_link import BoostyPostDataLinkDTO
 5 | from .post_data_list import BoostyPostDataListDTO
 6 | from .post_data_ok_video import BoostyPostDataOkVideoDTO
 7 | from .post_data_text import BoostyPostDataTextDTO
 8 | from .post_data_video import BoostyPostDataExternalVideoDTO
 9 | 
10 | __all__ = [
11 |     'BoostyPostDataExternalVideoDTO',
12 |     'BoostyPostDataFileDTO',
13 |     'BoostyPostDataHeaderDTO',
14 |     'BoostyPostDataImageDTO',
15 |     'BoostyPostDataLinkDTO',
16 |     'BoostyPostDataListDTO',
17 |     'BoostyPostDataOkVideoDTO',
18 |     'BoostyPostDataTextDTO',
19 | ]
20 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/__init__.py:
--------------------------------------------------------------------------------
 1 | """HTML generator module for independent HTML generation."""
 2 | 
 3 | from .models import (
 4 |     HtmlGenChunk,
 5 |     HtmlGenFile,
 6 |     HtmlGenImage,
 7 |     HtmlGenList,
 8 |     HtmlGenText,
 9 |     HtmlGenVideo,
10 |     HtmlListItem,
11 |     HtmlListStyle,
12 |     HtmlTextFragment,
13 |     HtmlTextStyle,
14 | )
15 | from .renderer import (
16 |     render_html,
17 |     render_html_chunk,
18 |     render_html_to_file,
19 | )
20 | 
21 | __all__ = [
22 |     'HtmlGenChunk',
23 |     'HtmlGenFile',
24 |     'HtmlGenImage',
25 |     'HtmlGenList',
26 |     'HtmlGenText',
27 |     'HtmlGenVideo',
28 |     'HtmlListItem',
29 |     'HtmlListStyle',
30 |     'HtmlTextFragment',
31 |     'HtmlTextStyle',
32 |     'render_html',
33 |     'render_html_chunk',
34 |     'render_html_to_file',
35 | ]
36 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/human_readable_filesize.py:
--------------------------------------------------------------------------------
 1 | """Module with functions for human-readable file size representation"""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | 
 6 | def human_readable_size(size: float | None, decimal_places: int = 2) -> str:
 7 |     """
 8 |     Return a human-readable string representing the size of a file.
 9 | 
10 |     Usage example:
11 |         path = Path("example.txt")
12 | 
13 |         file_size = path.stat().st_size  # Get file size in bytes
14 |         print(human_readable_size(file_size))
15 |     """
16 |     if size is None:
17 |         return 'N/A'
18 | 
19 |     kb_size = 1024
20 | 
21 |     for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
22 |         if size < kb_size:
23 |             return f'{size:.{decimal_places}f} {unit}'
24 |         size /= kb_size
25 |     return f'{size:.{decimal_places}f} PB'
26 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/utils/auth_parsers.py:
--------------------------------------------------------------------------------
 1 | """Cookie and authorization parser module for raw-browser-data parsing"""
 2 | 
 3 | from http.cookies import SimpleCookie
 4 | 
 5 | import aiohttp
 6 | 
 7 | 
 8 | def parse_session_cookie(cookie_string: str) -> aiohttp.CookieJar:
 9 |     """Parse the session cookie and return a dictionary with auth data for aiohttp client."""
10 |     if cookie_string.lower().startswith('cookie: '):
11 |         cookie_string = cookie_string[8:].strip()
12 | 
13 |     cookie = SimpleCookie()
14 |     cookie.load(cookie_string)
15 | 
16 |     jar = aiohttp.CookieJar()
17 |     for key, morsel in cookie.items():
18 |         jar.update_cookies({key: morsel.value})
19 | 
20 |     return jar
21 | 
22 | 
23 | def parse_auth_header(header: str) -> dict[str, str]:
24 |     """Parse the authorization header and return a dictionary with auth data."""
25 |     return {'Authorization': header}
26 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/templates/list.html:
--------------------------------------------------------------------------------
 1 | {% macro render_item(item) -%}
 2 | <li>
 3 |     {% for txt in item.data %}
 4 |     {{ render_chunk(txt) | safe }}
 5 |     {% endfor %}
 6 |     {% if item.nested_items %}
 7 |     {% if lst.style.value == 'ordered' %}
 8 |     <ol>
 9 |         {% else %}
10 |         <ul>
11 |             {% endif %}
12 |             {% for nested in item.nested_items %}
13 |             {{ render_item(nested) }}
14 |             {% endfor %}
15 |             {% if lst.style.value == 'ordered' %}
16 |     </ol>
17 |     {% else %}
18 |     </ul>
19 |     {% endif %}
20 |     {% endif %}
21 | </li>
22 | {%- endmacro %}
23 | 
24 | {% if lst.style.value == 'ordered' %}
25 | <ol>
26 |     {% else %}
27 |     <ul>
28 |         {% endif %}
29 |         {% for item in lst.items %}
30 |         {{ render_item(item) }}
31 |         {% endfor %}
32 |         {% if lst.style.value == 'ordered' %}
33 | </ol>
34 | {% else %}
35 | </ul>
36 | {% endif %}


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_list.py:
--------------------------------------------------------------------------------
 1 | """The module with list representation of posts data"""
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from pydantic import BaseModel
 6 | 
 7 | 
 8 | class BoostyPostDataListDataItemDTO(BaseModel):
 9 |     """Represents a single data item in a list of post data chunks."""
10 | 
11 |     type: str
12 |     modificator: str | None = ''
13 |     content: str
14 | 
15 | 
16 | class BoostyPostDataListItemDTO(BaseModel):
17 |     """Represents a single item in a list of post data chunks."""
18 | 
19 |     items: list['BoostyPostDataListItemDTO'] = []
20 |     data: list[BoostyPostDataListDataItemDTO] = []
21 | 
22 | 
23 | BoostyPostDataListItemDTO.model_rebuild()
24 | 
25 | 
26 | class BoostyPostDataListDTO(BaseModel):
27 |     """Represents a list of post data chunks."""
28 | 
29 |     type: Literal['list']
30 |     items: list[BoostyPostDataListItemDTO]
31 |     style: Literal['ordered', 'unordered'] | None = None
32 | 


--------------------------------------------------------------------------------
/test/integration/configuration.py:
--------------------------------------------------------------------------------
 1 | from pydantic import Field
 2 | from pydantic_settings import BaseSettings, SettingsConfigDict
 3 | 
 4 | 
 5 | class IntegrationTestConfig(BaseSettings):
 6 |     """
 7 |     Loads and validates integration test config from environment variables.
 8 |     """
 9 | 
10 |     boosty_auth_token: str = Field(..., alias='BOOSTY_TOKEN')
11 |     boosty_cookies: str = Field(..., alias='BOOSTY_COOKIES')
12 | 
13 |     boosty_available_post_url: str = Field(..., alias='BOOSTY_AVAILABLE_POST')
14 |     boosty_unavailable_post_url: str = Field(..., alias='BOOSTY_UNAVAILABLE_POST')
15 |     boosty_nonexistent_author: str = Field(..., alias='BOOSTY_NONEXISTENT_AUTHOR')
16 |     boosty_existing_author: str = Field(..., alias='BOOSTY_EXISTING_AUTHOR')
17 | 
18 |     model_config = SettingsConfigDict(env_file='.env', extra='ignore')
19 | 
20 |     def summary(self) -> str:
21 |         """
22 |         Prints all loaded config fields for debug purposes.
23 |         """
24 |         return str(self.model_dump())
25 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | These modules contain mappers for converting Boosty API -> domain objects.
 3 | 
 4 | This is the main entry point for data transformation.
 5 | """
 6 | 
 7 | from boosty_downloader.src.application.ok_video_ranking import (
 8 |     get_best_video,
 9 |     get_quality_ranking,
10 | )
11 | 
12 | from .external_video import to_external_video_content
13 | from .file import to_domain_file_chunk
14 | from .image import to_domain_image_chunk
15 | from .link_header_text import to_domain_text_chunk
16 | from .list import to_domain_list_chunk
17 | from .ok_boosty_video import to_ok_boosty_video_content
18 | from .post_mapper import map_post_dto_to_domain
19 | 
20 | __all__ = [
21 |     'get_best_video',
22 |     'get_quality_ranking',
23 |     'map_post_dto_to_domain',
24 |     'to_domain_file_chunk',
25 |     'to_domain_image_chunk',
26 |     'to_domain_list_chunk',
27 |     'to_domain_text_chunk',
28 |     'to_external_video_content',
29 |     'to_ok_boosty_video_content',
30 | ]
31 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post.py:
--------------------------------------------------------------------------------
 1 | """The module describes the form of a post of a user on boosty.to"""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from datetime import datetime  # noqa: TC003 Pydantic should know this type fully
 6 | 
 7 | from pydantic import ConfigDict
 8 | from pydantic.alias_generators import to_camel
 9 | from pydantic.main import BaseModel
10 | 
11 | from boosty_downloader.src.infrastructure.boosty_api.models.post.base_post_data import (
12 |     BasePostData,  # noqa: TC001 Pydantic should know this type fully
13 | )
14 | 
15 | 
16 | class PostDTO(BaseModel):
17 |     """Post on boosty.to which also have data pieces"""
18 | 
19 |     id: str
20 |     title: str
21 |     created_at: datetime
22 |     updated_at: datetime
23 |     has_access: bool
24 | 
25 |     signed_query: str
26 | 
27 |     data: list[BasePostData]
28 | 
29 |     model_config = ConfigDict(
30 |         alias_generator=to_camel,
31 |         populate_by_name=True,
32 |         from_attributes=True,
33 |     )
34 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/base_post_data.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The module contains a model for boosty 'post' data.
 3 | 
 4 | Only essentials fields defined for parsing purposes.
 5 | """
 6 | 
 7 | from __future__ import annotations
 8 | 
 9 | from typing import Annotated
10 | 
11 | from pydantic import Field
12 | 
13 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import (
14 |     BoostyPostDataExternalVideoDTO,
15 |     BoostyPostDataFileDTO,
16 |     BoostyPostDataHeaderDTO,
17 |     BoostyPostDataImageDTO,
18 |     BoostyPostDataLinkDTO,
19 |     BoostyPostDataListDTO,
20 |     BoostyPostDataOkVideoDTO,
21 |     BoostyPostDataTextDTO,
22 | )
23 | 
24 | BasePostData = Annotated[
25 |     BoostyPostDataTextDTO
26 |     | BoostyPostDataImageDTO
27 |     | BoostyPostDataLinkDTO
28 |     | BoostyPostDataFileDTO
29 |     | BoostyPostDataExternalVideoDTO
30 |     | BoostyPostDataOkVideoDTO
31 |     | BoostyPostDataHeaderDTO
32 |     | BoostyPostDataListDTO,
33 |     Field(
34 |         discriminator='type',
35 |     ),
36 | ]
37 | 


--------------------------------------------------------------------------------
/ruff.toml:
--------------------------------------------------------------------------------
 1 | # match black
 2 | 
 3 | line-length = 88
 4 | lint.select = [
 5 |     "ALL", # include all the rules, including new ones
 6 | ]
 7 | lint.ignore = [
 8 |     "E501",   # line too long    
 9 |     "D102",   # missing docstring in public method
10 |     "D212",   # multiline docstring should start at the first line (personal preference)
11 |     "D107",   # missing docstring in __init__ (lol why)
12 |     "D400",   # first line should end with a period (sometimes mess with markdown or code blocks)
13 |     "D415",   # first line should end with a period (same as above but trickier)
14 |     "RUF001", # unused variable
15 |     "G004",   # don't log f-strings (personal preference)
16 |     "D203",   # incorrected blank line before class is incompatible with D211
17 |     "COM812", # missing trailing comma (formatter conflicts with this)
18 | ]
19 | 
20 | [lint.per-file-ignores]
21 | "test/*" = ["D", "ANN201", "S101", "PLR2004", "INP001"]
22 | "__init__.py" = ["D104"]
23 | 
24 | [format]
25 | quote-style = "single"
26 | 
27 | [lint.flake8-quotes]
28 | inline-quotes = "single"
29 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | # 📌 Task  <!-- Rename this to some general header, for example: "Fix bug X" -->
 2 | 
 3 | ## 📝 Description  
 4 | 
 5 | Describe in detail what has changed, why it is needed, and what problems this PR solves.  
 6 | 
 7 | ## 🔄 Changelog  
 8 | 
 9 | <!-- Please provide a list of general changes made in this PR -->
10 | 
11 | - **✨ Added:** …  
12 | - **🛠 Fixed:** …  
13 | - **🔄 Changed:** …  
14 | - **🗑 Removed:** …  
15 | 
16 | ## 🎯 Related Issue  
17 | <!-- Reference related issues, e.g., Closes #123 -->  
18 | 
19 | ## 📷 Screenshots (if applicable)  
20 | <!-- 
21 |     Add screenshots if the changes affect UI or visuals. 
22 | 
23 |     Or remove this section if not applicable.
24 | -->  
25 | 
26 | ## ✅ Checklist  
27 | 
28 | - [ ] Locally tested (`make test` and your own judgment)
29 | - [ ] Documentation updated (if necessary) 
30 | - [ ] Code follows the project's style guidelines (`make lint && make format`)
31 | 
32 | ## ⚠ Notes  
33 | <!-- 
34 |     Any important notes about the PR. 
35 |     
36 |     Or remove this section if not applicable.
37 | -->  
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Roman Berezkin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/domain/post.py:
--------------------------------------------------------------------------------
 1 | """Module define the Post domain model for further downloading."""
 2 | 
 3 | from dataclasses import dataclass
 4 | from datetime import datetime
 5 | 
 6 | from boosty_downloader.src.domain.post_data_chunks import (
 7 |     PostDataChunkBoostyVideo,
 8 |     PostDataChunkExternalVideo,
 9 |     PostDataChunkFile,
10 |     PostDataChunkImage,
11 |     PostDataChunkText,
12 |     PostDataChunkTextualList,
13 | )
14 | 
15 | PostDataAllChunks = (
16 |     PostDataChunkImage
17 |     | PostDataChunkText
18 |     | PostDataChunkBoostyVideo
19 |     | PostDataChunkExternalVideo
20 |     | PostDataChunkFile
21 |     | PostDataChunkTextualList
22 | )
23 | 
24 | PostDataAllChunksList = list[PostDataAllChunks]
25 | 
26 | PostDataPostOnlyChunksList = list[
27 |     PostDataChunkText | PostDataChunkImage | PostDataChunkTextualList
28 | ]
29 | 
30 | 
31 | @dataclass
32 | class Post:
33 |     """Post on boosty.to which have different kinds of content (images, text, videos, etc.)"""
34 | 
35 |     uuid: str
36 |     title: str
37 |     created_at: datetime
38 |     updated_at: datetime
39 |     has_access: bool
40 | 
41 |     signed_query: str
42 | 
43 |     post_data_chunks: PostDataAllChunksList
44 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/templates/text.html:
--------------------------------------------------------------------------------
 1 | {% for frag in text.text_fragments %}
 2 | {% set lvl = frag.header_level|default(0)|int %}
 3 | {% if lvl > 0 %}
 4 | {% if lvl > 6 %}{% set lvl = 6 %}{% endif %}
 5 | <h{{ lvl }}>{{ frag.text }}</h{{ lvl }}>
 6 | {% else %}
 7 | {% if frag.text in ['\n', '\r\n'] %}
 8 | <br>
 9 | {% else %}
10 | {% if frag.link_url %}
11 | <a href="{{ frag.link_url|e }}">
12 |     {% if frag.style.bold %}<strong>{% endif %}
13 |         {% if frag.style.italic %}<em>{% endif %}
14 |             {% if frag.style.underline %}<u>{% endif %}
15 |                 {{ frag.text }}
16 |                 {% if frag.style.underline %}</u>{% endif %}
17 |             {% if frag.style.italic %}</em>{% endif %}
18 |         {% if frag.style.bold %}</strong>{% endif %}
19 | </a>
20 | {% else %}
21 | {% if frag.style.bold %}<strong>{% endif %}
22 |     {% if frag.style.italic %}<em>{% endif %}
23 |         {% if frag.style.underline %}<u>{% endif %}
24 |             {{ frag.text }}
25 |             {% if frag.style.underline %}</u>{% endif %}
26 |         {% if frag.style.italic %}</em>{% endif %}
27 |     {% if frag.style.bold %}</strong>{% endif %}
28 | {% endif %}
29 | {% endif %}
30 | {% endif %}
31 | {% endfor %}


--------------------------------------------------------------------------------
/boosty_downloader/src/application/di/download_context.py:
--------------------------------------------------------------------------------
 1 | """Define the DownloadContext dataclass and its dependencies for the download workflow."""
 2 | 
 3 | from dataclasses import dataclass
 4 | 
 5 | from aiohttp_retry import RetryClient
 6 | 
 7 | from boosty_downloader.src.application.filtering import (
 8 |     BoostyOkVideoType,
 9 |     DownloadContentTypeFilter,
10 | )
11 | from boosty_downloader.src.infrastructure.external_videos_downloader.external_videos_downloader import (
12 |     ExternalVideosDownloader,
13 | )
14 | from boosty_downloader.src.infrastructure.loggers.failed_downloads_logger import (
15 |     FailedDownloadsLogger,
16 | )
17 | from boosty_downloader.src.infrastructure.post_caching.post_cache import SQLitePostCache
18 | from boosty_downloader.src.interfaces.console_progress_reporter import ProgressReporter
19 | 
20 | 
21 | @dataclass
22 | class DownloadContext:
23 |     """Aggregates dependencies and configuration for the download workflow."""
24 | 
25 |     author_name: str
26 |     downloader_session: RetryClient
27 |     external_videos_downloader: ExternalVideosDownloader
28 |     post_cache: SQLitePostCache
29 |     filters: list[DownloadContentTypeFilter]
30 |     preferred_video_quality: BoostyOkVideoType
31 |     progress_reporter: ProgressReporter
32 |     failed_logger: FailedDownloadsLogger
33 | 


--------------------------------------------------------------------------------
/test/ABOUT_TESTING.md:
--------------------------------------------------------------------------------
 1 | # Structure 
 2 | 
 3 | Tests structure doesn't mirror the application structure, but rather groups tests by their functionality or "domain":
 4 | 
 5 | ```
 6 | test/
 7 | ├── analysis     - Tests ONLY for purpose to analyze responses by known endpoints
 8 | │   └── ...
 9 | │ 
10 | ├── unit         - Unit tests for the application, groupped by "domains"
11 | │   └── ...
12 | │ 
13 | └── integration  - Integration tests for the application, groupped by "domains"
14 | ```
15 | 
16 | # Add a new test 
17 | 
18 | **If you want to add a new test:**
19 | 1. *Decide whether it is a unit test or an integration test.*
20 |     - **Integration** tests depends on external services (Boosty) or network, can be configurable.
21 |     - **Unit** tests are isolated and can be run any time without configuration or setup.
22 | 2. *Decide which "domain" it belongs to*
23 |     - For example ok_video_ranking is the boosty_downloader's domain.
24 | 3. *Create test file, following the naming convention `<filename>_test.py`.*
25 | 4. Test some functionality with `test_<functionality>` function name.
26 |     - Use `assert` statements to check expected outcomes.
27 | 5. *Run the test using `make test` for unit tests or `make test-integration` for integration tests.*
28 | 6. *Make a pull request with your changes.* (see [CONTRIBUTING.md](../CONTRIBUTING.md) for more details)
29 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/ok_boosty_video.py:
--------------------------------------------------------------------------------
 1 | """Mapper for converting Boosty API video DTOs to domain video content objects."""
 2 | 
 3 | from boosty_downloader.src.application.ok_video_ranking import (
 4 |     get_best_video,
 5 | )
 6 | from boosty_downloader.src.domain.post import PostDataChunkBoostyVideo
 7 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import (
 8 |     BoostyPostDataOkVideoDTO,
 9 | )
10 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_ok_video import (
11 |     BoostyOkVideoType,
12 | )
13 | 
14 | 
15 | def to_ok_boosty_video_content(
16 |     api_video_dto: BoostyPostDataOkVideoDTO, preferred_quality: BoostyOkVideoType
17 | ) -> PostDataChunkBoostyVideo | None:
18 |     """
19 |     Convert API video data to domain video content object.
20 | 
21 |     It uses the PostDataVideo DTO to extract the video URL and other metadata
22 |     to create a domain video content object.
23 |     """
24 |     best_video_info = get_best_video(
25 |         preferred_quality=preferred_quality,
26 |         video_urls=api_video_dto.player_urls,
27 |     )
28 | 
29 |     if best_video_info is None:
30 |         return None
31 | 
32 |     best_video, choosed_quality = best_video_info
33 | 
34 |     return PostDataChunkBoostyVideo(
35 |         url=best_video.url,
36 |         title=api_video_dto.title,
37 |         quality=choosed_quality.name,
38 |     )
39 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "boosty-downloader"
 3 | version = "2.0.1"
 4 | description = ""
 5 | authors = [
 6 |     { name = "Roman Berezkin", email = "Glitchy-Sheep@users.noreply.github.com" },
 7 | ]
 8 | readme = "README.md"
 9 | requires-python = ">=3.10,<4"
10 | dependencies = [
11 |     "asyncio (>=3.4.3,<4.0.0)",
12 |     "aiofiles (>=24.1.0,<25.0.0)",
13 |     "aiohttp (>=3.11.12,<4.0.0)",
14 |     "pydantic (>=2.10.6,<3.0.0)",
15 |     "rich (>=14.0.0,<14.1.0)",
16 |     "pydantic-settings[yaml] (>=2.7.1,<3.0.0)",
17 |     "typer (>=0.16.0,<0.17.0)",
18 |     "yt-dlp (>=2025.1.26,<2026.0.0)",
19 |     "jinja2 (>=3.1.5,<4.0.0)",
20 |     "aiohttp-retry (>=2.9.1,<3.0.0)",
21 |     "yarl (>=1.18.3,<2.0.0)",
22 |     "sqlalchemy (>=2.0.42,<3.0.0)",
23 |     "aiolimiter (>=1.2.1,<2.0.0)",
24 |     "packaging (>=25.0,<26.0)",
25 | ]
26 | 
27 | [project.scripts]
28 | boosty-downloader = "boosty_downloader.main:entry_point"
29 | 
30 | [build-system]
31 | requires = ["poetry-core>=2.0.0,<3.0.0"]
32 | build-backend = "poetry.core.masonry.api"
33 | 
34 | [tool.poetry.group.dev.dependencies]
35 | ruff = ">=0.9.6,<0.13.0"
36 | pyright = "^1.1.394"
37 | pytest = "^8.3.4"
38 | pytest-asyncio = "^1.1.0"
39 | 
40 | 
41 | [tool.poetry]
42 | name = "boosty-downloader"
43 | version = "2.0.1"
44 | description = "Download any type of content from boosty.to"
45 | authors = ["Roman Berezkin"]
46 | readme = "README.md"
47 | 
48 | packages = [{ include = "boosty_downloader" }]
49 | 


--------------------------------------------------------------------------------
/.github/workflows/any-pr-validation.yaml:
--------------------------------------------------------------------------------
 1 | # This workflow triggers on any pull request or push to main or dev branches
 2 | name: 🔍 PR Code Health Checks (linters / type checks / tests)
 3 | 
 4 | on:
 5 |   pull_request:
 6 |     branches:
 7 |       - main
 8 |       - dev
 9 |   push:
10 |     branches:
11 |       - dev
12 |       - 'feature/**'
13 |       - 'hotfix/**'
14 | 
15 | env:
16 |   PACKAGE_NAME: "boosty-downloader"
17 | 
18 | jobs:
19 |   lint-test-build:
20 |     name: 🧪 Code Quality & Build
21 |     runs-on: ubuntu-latest
22 |     steps:
23 |       - uses: actions/checkout@v4
24 | 
25 |       - name: 🐍 Set up Python
26 |         uses: actions/setup-python@v5
27 |         with:
28 |           python-version: "3.12"
29 | 
30 |       - name: 📦 Install Poetry if missing
31 |         uses: snok/install-poetry@v1
32 |         with:
33 |           version: 'latest'
34 |           
35 |       - name: 📥 Install dependencies
36 |         run: poetry sync
37 | 
38 |       - name: 🔍 Run ruff linting
39 |         run: make lint-check
40 | 
41 |       - name: 🎨 Run ruff formatting check
42 |         run: make format-check
43 | 
44 |       - name: 🔎 Run type checking
45 |         run: make types
46 | 
47 |       - name: 🧪 Run tests
48 |         run: |
49 |           make test-verbose
50 |           make test-api-verbose
51 |         timeout-minutes: 5
52 | 
53 |       - name: 🏗️ Build package
54 |         run: make build
55 | 
56 |       - name: ✅ Verify build artifacts
57 |         run: |
58 |           ls -la dist/
59 |           if [ ! -f dist/*.whl ] || [ ! -f dist/*.tar.gz ]; then
60 |             echo "❌ Build artifacts missing"
61 |             exit 1
62 |           fi
63 |           echo "✅ Build artifacts created successfully"
64 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/models/post/post_data_types/post_data_ok_video.py:
--------------------------------------------------------------------------------
 1 | """Module with ok video representation of posts data"""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from datetime import timedelta  # noqa: TC003 Pydantic should know this type fully
 6 | from enum import Enum
 7 | from typing import Literal
 8 | 
 9 | from pydantic import BaseModel, ConfigDict
10 | from pydantic.alias_generators import to_camel
11 | 
12 | 
13 | class BoostyOkVideoType(Enum):
14 |     """All the types which boosty provides for ok video"""
15 | 
16 |     live_playback_dash = 'live_playback_dash'
17 |     live_playback_hls = 'live_playback_hls'
18 |     live_ondemand_hls = 'live_ondemand_hls'
19 | 
20 |     live_dash = 'live_dash'
21 |     live_hls = 'live_hls'
22 |     hls = 'hls'
23 |     dash = 'dash'
24 |     dash_uni = 'dash_uni'
25 |     live_cmaf = 'live_cmaf'
26 | 
27 |     ultra_hd = 'ultra_hd'
28 |     quad_hd = 'quad_hd'
29 |     full_hd = 'full_hd'
30 |     high = 'high'
31 |     medium = 'medium'
32 |     low = 'low'
33 |     tiny = 'tiny'
34 |     lowest = 'lowest'
35 | 
36 | 
37 | class BoostyOkVideoUrl(BaseModel):
38 |     """Link to video with specific format (link can be empty for some formats)"""
39 | 
40 |     url: str
41 |     type: BoostyOkVideoType
42 | 
43 | 
44 | class BoostyPostDataOkVideoDTO(BaseModel):
45 |     """Ok video content piece in posts"""
46 | 
47 |     type: Literal['ok_video']
48 | 
49 |     title: str
50 |     failover_host: str
51 |     duration: timedelta
52 | 
53 |     upload_status: str
54 |     complete: bool
55 |     player_urls: list[BoostyOkVideoUrl]
56 | 
57 |     model_config = ConfigDict(
58 |         alias_generator=to_camel,
59 |         populate_by_name=True,
60 |         from_attributes=True,
61 |     )
62 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/exceptions/application_errors.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Custom exceptions for application-level download errors.
 3 | 
 4 | These classes standardize handling of any download failures from posts or resources,
 5 | wrapping lower-level errors into a unified application-level form.
 6 | """
 7 | 
 8 | 
 9 | class ApplicationBaseDownloadError(Exception):
10 |     """
11 |     Base class for all application-level download errors.
12 | 
13 |     Each error instance is bound to a specific post that triggered it.
14 | 
15 |     Attributes
16 |     ----------
17 |     post_uuid : str
18 |         Unique identifier of the post related to the error.
19 | 
20 |     """
21 | 
22 |     def __init__(self, post_uuid: str) -> None:
23 |         super().__init__()
24 |         self.post_uuid = post_uuid
25 | 
26 | 
27 | class ApplicationFailedDownloadError(ApplicationBaseDownloadError):
28 |     """
29 |     Raised when downloading a specific resource from a post fails.
30 | 
31 |     Causes may include network errors, invalid URLs, or resource unavailability
32 |     (e.g., a YouTube video becoming private).
33 | 
34 |     Attributes
35 |     ----------
36 |     resource : str
37 |         Identifier or description of the resource that failed to download.
38 |     message : str
39 |         Human-readable details about the failure.
40 | 
41 |     """
42 | 
43 |     def __init__(self, post_uuid: str, resource: str, message: str) -> None:
44 |         super().__init__(post_uuid)
45 |         self.resource = resource
46 |         self.message = message
47 | 
48 | 
49 | class ApplicationCancelledError(ApplicationBaseDownloadError):
50 |     """
51 |     Raised when a download for a specific post is cancelled by the user.
52 | 
53 |     Typically stops the entire download process.
54 |     """
55 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/filtering.py:
--------------------------------------------------------------------------------
 1 | """Content type filters for the download manager."""
 2 | 
 3 | from enum import Enum
 4 | 
 5 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_ok_video import (
 6 |     BoostyOkVideoType,
 7 | )
 8 | 
 9 | 
10 | class DownloadContentTypeFilter(Enum):
11 |     """
12 |     Class that holds content type filters for the download manager
13 | 
14 |     They can be used to download only specific parts of content.
15 |     """
16 | 
17 |     # -------------------------------------------------------------------
18 |     # --------------------------- WARNING !!! ---------------------------
19 |     # -------------------------------------------------------------------
20 |     #
21 |     # If you add any new content type filters here, please ensure that:
22 |     # 1. You updated cache logic accordingly
23 |     # 2. You updated all the use cases that use this filter
24 |     # 3. You checked all other places in which those filters were used before
25 | 
26 |     boosty_videos = 'boosty_videos'
27 |     external_videos = 'external_videos'
28 |     post_content = 'post_content'
29 |     files = 'files'
30 | 
31 | 
32 | class VideoQualityOption(str, Enum):
33 |     """Preferred video quality option for cli"""
34 | 
35 |     smallest_size = 'smallest_size'
36 |     low = 'low'
37 |     medium = 'medium'
38 |     high = 'high'
39 |     highest = 'highest'
40 | 
41 |     def to_ok_video_type(self) -> BoostyOkVideoType:
42 |         mapping = {
43 |             VideoQualityOption.smallest_size: BoostyOkVideoType.lowest,
44 |             VideoQualityOption.low: BoostyOkVideoType.low,
45 |             VideoQualityOption.medium: BoostyOkVideoType.medium,
46 |             VideoQualityOption.high: BoostyOkVideoType.high,
47 |             VideoQualityOption.highest: BoostyOkVideoType.ultra_hd,
48 |         }
49 |         return mapping[self]
50 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/update_checker/pypi_checker.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PyPI update checker
 3 | 
 4 | Provides functions and data structures to check for updates of any package on PyPI.
 5 | """
 6 | 
 7 | import json
 8 | from dataclasses import dataclass
 9 | from enum import Enum, auto
10 | from urllib.request import urlopen
11 | 
12 | from packaging import version
13 | 
14 | 
15 | class UpdateCheckStatus(Enum):
16 |     """Represents the status of an update check."""
17 | 
18 |     NO_UPDATE = auto()
19 |     UPDATE_AVAILABLE = auto()
20 |     CHECK_FAILED = auto()
21 | 
22 | 
23 | @dataclass
24 | class UpdateAvailable:
25 |     """Update is available."""
26 | 
27 |     current_version: str
28 |     latest_version: str
29 | 
30 | 
31 | @dataclass
32 | class NoUpdate:
33 |     """No update available."""
34 | 
35 | 
36 | @dataclass
37 | class CheckFailed:
38 |     """Update check failed."""
39 | 
40 | 
41 | UpdateResult = UpdateAvailable | NoUpdate | CheckFailed
42 | 
43 | 
44 | def get_pypi_latest_version(package_name: str) -> str | None:
45 |     """Fetch the latest version string of a package from PyPI."""
46 |     try:
47 |         with urlopen(f'https://pypi.org/pypi/{package_name}/json') as resp:
48 |             data = json.load(resp)
49 |             return data['info']['version']
50 |     except Exception:  # noqa: BLE001 It doesn't matter what exception is raised, we just need to 100% catch it
51 |         return None
52 | 
53 | 
54 | def check_for_updates(current_version: str, package_name: str) -> UpdateResult:
55 |     """Check PyPI for a newer version of a package and return update result."""
56 |     latest_str = get_pypi_latest_version(package_name)
57 |     if latest_str is None:
58 |         return CheckFailed()
59 | 
60 |     try:
61 |         current = version.parse(current_version)
62 |         latest = version.parse(latest_str)
63 |     except version.InvalidVersion:
64 |         return CheckFailed()
65 | 
66 |     if latest > current:
67 |         return UpdateAvailable(
68 |             current_version=str(current),
69 |             latest_version=str(latest),
70 |         )
71 | 
72 |     return NoUpdate()
73 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/loggers/failed_downloads_logger.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Deduplicating file logger for failed downloads.
 3 | 
 4 | Format: "[<id>]: <message>"; duplicates are suppressed by <id>.
 5 | The log file and its parent directory are created on demand; writes append.
 6 | """
 7 | 
 8 | import re
 9 | from pathlib import Path
10 | 
11 | import aiofiles
12 | 
13 | 
14 | class FailedDownloadsLogger:
15 |     """
16 |     Append-only deduplicating logger keyed by error id.
17 | 
18 |     Will write to a log file created on demand.
19 |     Each error id is unique and will be written only once.
20 |     """
21 | 
22 |     def __init__(self, log_file_path: Path) -> None:
23 |         self.file_path = log_file_path
24 |         self.file_path.parent.mkdir(parents=True, exist_ok=True)
25 |         self._seen_ids: set[str] = set()
26 |         self._loaded = False
27 | 
28 |     async def _ensure_loaded(self) -> None:
29 |         if self._loaded:
30 |             return
31 |         if not self.file_path.exists():
32 |             self._loaded = True
33 |             return
34 | 
35 |         pattern = re.compile(r'^\[(?P<id>[^\]]+)\]:')
36 |         async with aiofiles.open(self.file_path, encoding='utf-8') as f:
37 |             async for line in f:
38 |                 m = pattern.match(line.strip())
39 |                 if m:
40 |                     self._seen_ids.add(m.group('id'))
41 |         self._loaded = True
42 | 
43 |     async def _write_line(self, line: str) -> None:
44 |         async with aiofiles.open(self.file_path, 'a', encoding='utf-8') as f:
45 |             await f.write(line.rstrip() + '\n')
46 | 
47 |     async def add_error(self, error_id: str, message: str) -> None:
48 |         """
49 |         Add a failed download error to the log.
50 | 
51 |         If the error ID is already logged, the message will be suppressed.
52 |         """
53 |         error_id = error_id.strip()
54 |         message = message.strip()
55 | 
56 |         await self._ensure_loaded()
57 |         if error_id in self._seen_ids:
58 |             return
59 | 
60 |         await self._write_line(f'[{error_id}]: {message}')
61 |         self._seen_ids.add(error_id)
62 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/models.py:
--------------------------------------------------------------------------------
 1 | """HTML generator models that are independent from domain types."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from dataclasses import dataclass, field
 6 | from enum import Enum
 7 | 
 8 | 
 9 | @dataclass
10 | class HtmlTextStyle:
11 |     """Text styling options for HTML generation."""
12 | 
13 |     bold: bool = False
14 |     italic: bool = False
15 |     underline: bool = False
16 | 
17 | 
18 | @dataclass
19 | class HtmlTextFragment:
20 |     """A text fragment with optional styling and links."""
21 | 
22 |     text: str
23 |     link_url: str | None = None
24 |     header_level: int = 0  # 0 means no header, 1-6 for h1-h6
25 |     style: HtmlTextStyle = field(default_factory=HtmlTextStyle)
26 | 
27 | 
28 | @dataclass
29 | class HtmlGenText:
30 |     """Text content for HTML generation."""
31 | 
32 |     text_fragments: list[HtmlTextFragment]
33 | 
34 | 
35 | @dataclass
36 | class HtmlGenImage:
37 |     """Image content for HTML generation."""
38 | 
39 |     url: str
40 |     alt: str = 'Image'
41 |     width: int | None = None
42 |     height: int | None = None
43 | 
44 | 
45 | @dataclass
46 | class HtmlGenVideo:
47 |     """Video content for HTML generation."""
48 | 
49 |     url: str
50 |     title: str | None = None
51 |     poster: str | None = None
52 | 
53 | 
54 | class HtmlListStyle(Enum):
55 |     """List style for HTML generation."""
56 | 
57 |     ORDERED = 'ordered'
58 |     UNORDERED = 'unordered'
59 | 
60 | 
61 | @dataclass
62 | class HtmlListItem:
63 |     """A single item in an HTML list."""
64 | 
65 |     data: list[HtmlGenText]
66 |     nested_items: list[HtmlListItem] = field(default_factory=list['HtmlListItem'])
67 | 
68 | 
69 | @dataclass
70 | class HtmlGenList:
71 |     """List content for HTML generation."""
72 | 
73 |     items: list[HtmlListItem]
74 |     style: HtmlListStyle = HtmlListStyle.UNORDERED
75 | 
76 | 
77 | @dataclass
78 | class HtmlGenFile:
79 |     """File content for HTML generation."""
80 | 
81 |     url: str
82 |     filename: str
83 |     title: str | None = None
84 | 
85 | 
86 | # Union type for all HTML chunk types
87 | HtmlGenChunk = HtmlGenText | HtmlGenImage | HtmlGenVideo | HtmlGenList | HtmlGenFile
88 | 


--------------------------------------------------------------------------------
/test/integration/analysis/get_author_posts_test.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import pytest
 4 | import rich
 5 | from aiohttp_retry import RetryClient
 6 | 
 7 | from boosty_downloader.src.infrastructure.boosty_api.core.endpoints import (
 8 |     BOOSTY_DEFAULT_BASE_URL,
 9 | )
10 | from boosty_downloader.src.infrastructure.boosty_api.utils.filter_none_params import (
11 |     filter_none_params,
12 | )
13 | from integration.configuration import IntegrationTestConfig
14 | 
15 | pytest_plugins = [
16 |     'integration.fixtures',
17 | ]
18 | 
19 | 
20 | @pytest.mark.asyncio
21 | async def test_get_author_posts(
22 |     authorized_retry_client: RetryClient, integration_config: IntegrationTestConfig
23 | ) -> None:
24 |     """Test successful retrieval of posts from an existing author."""
25 |     endpoint = f'{BOOSTY_DEFAULT_BASE_URL}blog/{integration_config.boosty_existing_author}/post/'
26 | 
27 |     posts_raw = await authorized_retry_client.get(
28 |         endpoint,
29 |         params=filter_none_params(
30 |             {
31 |                 'limit': 10,
32 |             },
33 |         ),
34 |     )
35 |     posts_data = await posts_raw.json()
36 | 
37 |     assert posts_data is not None
38 | 
39 |     rich.print_json(data=posts_data)
40 | 
41 | 
42 | @pytest.mark.asyncio
43 | async def test_all_data_chunk_types(
44 |     authorized_retry_client: RetryClient,
45 |     integration_config: IntegrationTestConfig,
46 | ) -> None:
47 |     """Test successful retrieval of posts from an existing author."""
48 |     endpoint = f'{BOOSTY_DEFAULT_BASE_URL}blog/{integration_config.boosty_existing_author}/post/'
49 | 
50 |     posts_raw = await authorized_retry_client.get(
51 |         endpoint,
52 |         params=filter_none_params(
53 |             {
54 |                 'limit': 25,
55 |             },
56 |         ),
57 |     )
58 |     posts_data = await posts_raw.json()
59 | 
60 |     assert posts_data is not None
61 | 
62 |     unique_data_types: Any = {}
63 | 
64 |     for post in posts_data['data']:
65 |         rich.print(post)
66 |         for chunk in post['data']:
67 |             if chunk['type'] not in unique_data_types:
68 |                 unique_data_types[chunk['type']] = chunk
69 | 
70 |     rich.print_json(data=unique_data_types)
71 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/renderer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module provides functions to render HTML content from structured data.
 3 | 
 4 | You can also dump the rendered HTML to a file.
 5 | 
 6 | Current implementation uses Jinja2 templates to render HTML with a little styling.
 7 | """
 8 | 
 9 | from pathlib import Path
10 | 
11 | from jinja2 import Environment, PackageLoader, select_autoescape
12 | 
13 | from boosty_downloader.src.infrastructure.html_generator.models import (
14 |     HtmlGenChunk,
15 |     HtmlGenFile,
16 |     HtmlGenImage,
17 |     HtmlGenList,
18 |     HtmlGenText,
19 |     HtmlGenVideo,
20 | )
21 | 
22 | # Load all templates as a package files
23 | # So if ANY structure changed in this path - it should be reflected here.
24 | # There is also a test to check if templates are rendered correctly (available).
25 | env = Environment(
26 |     loader=PackageLoader(
27 |         'boosty_downloader.src.infrastructure.html_generator', 'templates'
28 |     ),
29 |     autoescape=select_autoescape(['html']),
30 | )
31 | 
32 | 
33 | def render_html_chunk(chunk: HtmlGenChunk) -> str:
34 |     """Render a single HtmlGenChunk to its HTML representation."""
35 |     match chunk:
36 |         case HtmlGenText():
37 |             return env.get_template('text.html').render(text=chunk)
38 |         case HtmlGenImage():
39 |             return env.get_template('image.html').render(image=chunk)
40 |         case HtmlGenVideo():
41 |             chunk.url = str(chunk.url).replace('\\', '/')
42 |             return env.get_template('video.html').render(video=chunk)
43 |         case HtmlGenList():
44 |             return env.get_template('list.html').render(
45 |                 lst=chunk, render_chunk=render_html_chunk
46 |             )
47 |         case HtmlGenFile():
48 |             return f'<a href="{chunk.url}" download>{chunk.filename}</a>'
49 | 
50 | 
51 | def render_html(chunks: list[HtmlGenChunk]) -> str:
52 |     """Render a list of HTML chunks to HTML."""
53 |     rendered = [render_html_chunk(chunk) for chunk in chunks]
54 |     return env.get_template('base.html').render(content='\n'.join(rendered))
55 | 
56 | 
57 | def render_html_to_file(chunks: list[HtmlGenChunk], out_path: Path) -> None:
58 |     """Render HTML chunks to HTML file."""
59 |     html = render_html(chunks)
60 |     out_path.parent.mkdir(parents=True, exist_ok=True)
61 |     out_path.write_text(html, encoding='utf-8')
62 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/use_cases/check_total_posts.py:
--------------------------------------------------------------------------------
 1 | """Use case for reporting the total number of posts and their accessibility for a given Boosty author."""
 2 | 
 3 | from boosty_downloader.src.infrastructure.boosty_api.core.client import (
 4 |     BoostyAPIClient,
 5 | )
 6 | from boosty_downloader.src.infrastructure.loggers.logger_instances import RichLogger
 7 | 
 8 | 
 9 | class ReportTotalPostsCountUseCase:
10 |     """
11 |     Reports the total number of posts and their accessibility for a given Boosty author.
12 | 
13 |     This use case iterates over all posts for the specified author, counts accessible and inaccessible posts,
14 |     and reports the results using the provided ProgressReporter.
15 |     """
16 | 
17 |     def __init__(
18 |         self,
19 |         author_name: str,
20 |         logger: RichLogger,
21 |         boosty_api: BoostyAPIClient,
22 |     ) -> None:
23 |         self.author_name = author_name
24 |         self.logger = logger
25 |         self.boosty_api = boosty_api
26 | 
27 |     async def execute(self) -> None:
28 |         current_page = 0
29 |         total_posts = 0
30 | 
31 |         accessible_posts_count = 0
32 |         inaccessible_posts_count = 0
33 |         inaccessible_posts_names: list[str] = []
34 | 
35 |         async for page in self.boosty_api.iterate_over_posts(
36 |             self.author_name, posts_per_page=100
37 |         ):
38 |             current_page += 1
39 |             total_posts += len(page.posts)
40 | 
41 |             self.logger.info(
42 |                 f'Processing page [bold]{current_page}[/bold]'
43 |                 ' | '
44 |                 f'Total posts so far: [bold]{total_posts}[/bold]'
45 |             )
46 | 
47 |             for post in page.posts:
48 |                 if post.has_access:
49 |                     accessible_posts_count += 1
50 |                 else:
51 |                     inaccessible_posts_count += 1
52 |                     inaccessible_posts_names.append('     - ' + post.title + '\n')
53 | 
54 |         inaccessible_titles_str = ''.join(inaccessible_posts_names)
55 | 
56 |         self.logger.success(
57 |             f'Total posts: [bold]{total_posts}[/bold]\n'
58 |             f'Accessible posts: [bold]{accessible_posts_count}[/bold]\n'
59 |             f'Inaccessible posts: [bold]{inaccessible_posts_count}[/bold] (need higher tier subscription) see their titles:\n'
60 |             '\n'
61 |             f'[bold]{inaccessible_titles_str}[/bold]'
62 |         )
63 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/list.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Contains the mapper function for converting Boosty API post data lists.
 3 | 
 4 | This module is responsible for transforming the Boosty API's list representation
 5 | to the domain's PostDataChunkTextualList object:
 6 | 
 7 | - unordered list example
 8 |     - one
 9 |     - two
10 | - ...
11 | 
12 | 1. ordered list example
13 |     1. one
14 |     2. two
15 | 2. ...
16 | """
17 | 
18 | from boosty_downloader.src.application.mappers.link_header_text import (
19 |     to_domain_text_chunk,
20 | )
21 | from boosty_downloader.src.domain.post_data_chunks import (
22 |     PostDataChunkText,
23 |     PostDataChunkTextualList,
24 | )
25 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_list import (
26 |     BoostyPostDataListDTO,
27 |     BoostyPostDataListItemDTO,
28 | )
29 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_text import (
30 |     BoostyPostDataTextDTO,
31 | )
32 | 
33 | 
34 | def to_domain_list_chunk(post_list: BoostyPostDataListDTO) -> PostDataChunkTextualList:
35 |     """Convert API PostDataList to domain PostDataChunkTextualList."""
36 | 
37 |     def convert_list_item(
38 |         api_item: BoostyPostDataListItemDTO,
39 |     ) -> PostDataChunkTextualList.ListItem:
40 |         """Recursively convert API list item to domain list item."""
41 |         # Convert data items to domain text chunks
42 |         domain_data: list[PostDataChunkText] = []
43 |         for data_item in api_item.data:
44 |             if data_item.type == 'text':
45 |                 # Create proper DTO object for the text mapper
46 |                 text_dto = BoostyPostDataTextDTO(
47 |                     type='text',
48 |                     content=data_item.content,
49 |                     modificator=data_item.modificator or '',
50 |                 )
51 |                 text_fragments = to_domain_text_chunk(text_dto)
52 | 
53 |                 # Create a PostDataChunkText with the text fragments
54 |                 text_chunk = PostDataChunkText(text_fragments=text_fragments)
55 |                 domain_data.append(text_chunk)
56 | 
57 |         # Recursively convert nested items
58 |         nested_items = [
59 |             convert_list_item(nested_item) for nested_item in api_item.items
60 |         ]
61 | 
62 |         return PostDataChunkTextualList.ListItem(
63 |             data=domain_data, nested_items=nested_items
64 |         )
65 | 
66 |     # Convert all items
67 |     domain_items = [convert_list_item(api_item) for api_item in post_list.items]
68 | 
69 |     return PostDataChunkTextualList(items=domain_items)
70 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/html_converter.py:
--------------------------------------------------------------------------------
 1 | """Converters from domain models to HTML generator models."""
 2 | 
 3 | from boosty_downloader.src.domain.post import (
 4 |     PostDataChunkImage,
 5 |     PostDataChunkText,
 6 |     PostDataChunkTextualList,
 7 | )
 8 | from boosty_downloader.src.domain.post_data_chunks import (
 9 |     PostDataChunkFile,
10 | )
11 | from boosty_downloader.src.infrastructure.html_generator.models import (
12 |     HtmlGenFile,
13 |     HtmlGenImage,
14 |     HtmlGenList,
15 |     HtmlGenText,
16 |     HtmlGenVideo,
17 |     HtmlListItem,
18 |     HtmlListStyle,
19 |     HtmlTextFragment,
20 |     HtmlTextStyle,
21 | )
22 | 
23 | 
24 | def convert_text_to_html(chunk: PostDataChunkText) -> HtmlGenText:
25 |     """Convert domain text chunk to HTML text model."""
26 |     fragments: list[HtmlTextFragment] = []
27 |     for frag in chunk.text_fragments:
28 |         style = HtmlTextStyle(
29 |             bold=frag.style.bold,
30 |             italic=frag.style.italic,
31 |             underline=frag.style.underline,
32 |         )
33 |         html_fragment = HtmlTextFragment(
34 |             text=frag.text,
35 |             link_url=frag.link_url,
36 |             header_level=frag.header_level,
37 |             style=style,
38 |         )
39 |         fragments.append(html_fragment)
40 | 
41 |     return HtmlGenText(text_fragments=fragments)
42 | 
43 | 
44 | def convert_image_to_html(chunk: PostDataChunkImage) -> HtmlGenImage:
45 |     """Convert domain image chunk to HTML image model."""
46 |     return HtmlGenImage(url=chunk.url)
47 | 
48 | 
49 | def convert_video_to_html(src: str, title: str) -> HtmlGenVideo:
50 |     """Convert domain video chunk to HTML video model."""
51 |     return HtmlGenVideo(url=src, title=title)
52 | 
53 | 
54 | def convert_file_to_html(chunk: PostDataChunkFile) -> HtmlGenFile:
55 |     """Convert domain file chunk to HTML file model."""
56 |     return HtmlGenFile(url=chunk.url, filename=chunk.filename)
57 | 
58 | 
59 | def convert_list_to_html(chunk: PostDataChunkTextualList) -> HtmlGenList:
60 |     """Convert domain list chunk to HTML list model."""
61 | 
62 |     def convert_list_item(item: PostDataChunkTextualList.ListItem) -> HtmlListItem:
63 |         data = [convert_text_to_html(text_chunk) for text_chunk in item.data]
64 |         nested_items = [convert_list_item(nested) for nested in item.nested_items]
65 |         return HtmlListItem(data=data, nested_items=nested_items)
66 | 
67 |     items = [convert_list_item(item) for item in chunk.items]
68 |     # Default to unordered list since the domain model doesn't have style
69 |     style = HtmlListStyle.UNORDERED
70 | 
71 |     return HtmlGenList(items=items, style=style)
72 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # 💖 Contributing to Boosty Downloader
 2 | 
 3 | Hello, I'm glad you find this project useful and I appreciate your willingness to contribute.
 4 | 
 5 | I created this note to help you understand the way you can help improve the project.
 6 | 
 7 | 
 8 | ## 👩‍💻 Development Process
 9 | 
10 | <div align="center">
11 | <img src="/assets/dev-process.excalidraw.svg" height="400" alt="Development Process" style="border-radius: 12px;">
12 | </div>
13 | 
14 | ### 🔧 Quick Start
15 | 
16 | 1. Fork and clone the repository
17 | 2. Install dependencies: `poetry install`
18 | 3. Create a feature branch and make your changes
19 | 4. Run tests: `poetry run pytest`
20 | 5. Don't forget version bump `poetry version patch` (or minor/major) and update `CHANGELOG.md`
21 | 6. Open a pull request and describe changes and why they are needed
22 | 
23 | **Most of needed/handy commands are available via `make`.**
24 | To see available commands, run:
25 | ```bash
26 | make help
27 | ```
28 | 
29 | ### 🩺 Code Quality
30 | 
31 | We use:
32 | - **Ruff** for linting and formatting
33 | - **Pyright** for type checking
34 | - **pytest** for testing
35 | 
36 | *Please ensure your IDE is configured to use these tools for a smooth development experience.*
37 | 
38 | 
39 | ### 📝 Writing Good Commit Messages
40 | 
41 | **We use**:
42 | - [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) for commit messages.
43 | - [GitMoji](https://gitmoji.dev/) for visual representation of commit types. (**OPTIONAL**)
44 | - Describe not only the change but also **why** it was made.
45 | 
46 | 
47 | So a generic commit message would look like this:
48 | ```
49 | feat: ✨ Add hyperspace drive support
50 |          The hyperspace drive allows faster travel between galaxies.
51 | 
52 | fix: 🐛 Fix formatting.
53 | ```
54 | 
55 | **To make it even easier for you, use VS Code extension:** 
56 | - [VSCode Conventional Commits](https://marketplace.visualstudio.com/items?itemName=vivaxy.vscode-conventional-commits) - it speed up writing commit messages in our format.
57 | 
58 | 
59 | ### ✅ Pull Requests CI Checks
60 | 
61 | **Now project uses Github Actions for:**
62 | - Check PRs for code quality (linting, type checking, tests)
63 | - Check `dev -> main` PRs for version bump 
64 | - Automatically create releases on `main` merge (PyPi and GitHub Releases)
65 | 
66 | 
67 | ### 🔨 Other HOW TOs:
68 | 
69 | <details>
70 | <summary>🏁 Making a Release</summary>
71 | 
72 | 1. **Prepare in `dev` branch:**
73 |    ```bash
74 |    poetry version patch  # or minor/major
75 |    # Update CHANGELOG.md
76 |    git commit -am "chore: bump version to X.Y.Z"
77 |    git push origin dev
78 |    ```
79 | 
80 | 2. **Create PR:** `dev` → `main`
81 | 
82 | 3. **Merge PR** → Automatic release! 🎉
83 | </details>
84 | 
85 | <details>
86 | <summary>🐛 Hotfix</summary>
87 | 
88 | 1. **From main:**
89 |    ```bash
90 |    git checkout -b hotfix/fix-name
91 |    poetry version patch
92 |    # Fix bug, update changelog
93 |    ```
94 | 
95 | 2. **PR:** `hotfix/*` → `main`
96 | </details>
97 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/post_mapper.py:
--------------------------------------------------------------------------------
 1 | """Mapping logic for converting Boosty API post DTOs to domain Post objects."""
 2 | 
 3 | from boosty_downloader.src.application import mappers
 4 | from boosty_downloader.src.domain.post import Post
 5 | from boosty_downloader.src.domain.post_data_chunks import PostDataChunkText
 6 | from boosty_downloader.src.infrastructure.boosty_api.models.post.base_post_data import (
 7 |     BoostyPostDataExternalVideoDTO,
 8 | )
 9 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post import PostDTO
10 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import (
11 |     BoostyPostDataFileDTO,
12 |     BoostyPostDataHeaderDTO,
13 |     BoostyPostDataImageDTO,
14 |     BoostyPostDataLinkDTO,
15 |     BoostyPostDataListDTO,
16 |     BoostyPostDataOkVideoDTO,
17 |     BoostyPostDataTextDTO,
18 | )
19 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_ok_video import (
20 |     BoostyOkVideoType,
21 | )
22 | 
23 | 
24 | def map_post_dto_to_domain(
25 |     post_dto: PostDTO, preferred_video_quality: BoostyOkVideoType
26 | ) -> Post:
27 |     """Convert a Boosty API PostDTO object to a domain Post object, mapping all data chunks to their domain representations."""
28 |     post = Post(
29 |         uuid=post_dto.id,
30 |         title=post_dto.title,
31 |         created_at=post_dto.created_at,
32 |         updated_at=post_dto.updated_at,
33 |         has_access=post_dto.has_access,
34 |         signed_query=post_dto.signed_query,
35 |         post_data_chunks=[],
36 |     )
37 | 
38 |     for data_chunk in post_dto.data:
39 |         match data_chunk:
40 |             case BoostyPostDataImageDTO():
41 |                 post.post_data_chunks.append(mappers.to_domain_image_chunk(data_chunk))
42 |             case (
43 |                 BoostyPostDataHeaderDTO()
44 |                 | BoostyPostDataLinkDTO()
45 |                 | BoostyPostDataTextDTO()
46 |             ):
47 |                 text_fragments = mappers.to_domain_text_chunk(data_chunk)
48 |                 text_chunk = PostDataChunkText(text_fragments=text_fragments)
49 |                 post.post_data_chunks.append(text_chunk)
50 |             case BoostyPostDataListDTO():
51 |                 post.post_data_chunks.append(mappers.to_domain_list_chunk(data_chunk))
52 |             case BoostyPostDataFileDTO():
53 |                 post.post_data_chunks.append(
54 |                     mappers.to_domain_file_chunk(data_chunk, post.signed_query)
55 |                 )
56 |             case BoostyPostDataOkVideoDTO():
57 |                 video_chunk = mappers.to_ok_boosty_video_content(
58 |                     data_chunk, preferred_quality=preferred_video_quality
59 |                 )
60 |                 if video_chunk is not None:
61 |                     post.post_data_chunks.append(video_chunk)
62 |             case BoostyPostDataExternalVideoDTO():
63 |                 post.post_data_chunks.append(
64 |                     mappers.to_external_video_content(data_chunk)
65 |                 )
66 | 
67 |     return post
68 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/interfaces/cli_options.py:
--------------------------------------------------------------------------------
  1 | """CLI option definitions for Boosty Downloader."""
  2 | 
  3 | from pathlib import Path
  4 | from typing import Annotated
  5 | 
  6 | import typer
  7 | 
  8 | from boosty_downloader.src.application.filtering import (
  9 |     DownloadContentTypeFilter,
 10 |     VideoQualityOption,
 11 | )
 12 | from boosty_downloader.src.interfaces.help_panels import HelpPanels
 13 | 
 14 | UsernameOption = Annotated[
 15 |     str,
 16 |     typer.Option(
 17 |         '--username',
 18 |         '-u',
 19 |         help='Username to download posts from.',
 20 |     ),
 21 | ]
 22 | 
 23 | RequestDelaySecondsOption = Annotated[
 24 |     float,
 25 |     typer.Option(
 26 |         '--request-delay-seconds',
 27 |         '-d',
 28 |         help='Delay between requests to the API, in seconds',
 29 |         min=1,
 30 |         rich_help_panel=HelpPanels.network,
 31 |     ),
 32 | ]
 33 | 
 34 | 
 35 | ContentTypeFilterOption = Annotated[
 36 |     list[DownloadContentTypeFilter] | None,
 37 |     typer.Option(
 38 |         '--content-type-filter',
 39 |         '-f',
 40 |         help='Choose what content you want to download\n\n(default: ALL SET)',
 41 |         metavar='Available options:\n- files\n- post_content\n- boosty_videos\n- external_videos\n',
 42 |         show_default=False,
 43 |         rich_help_panel=HelpPanels.filtering,
 44 |     ),
 45 | ]
 46 | 
 47 | 
 48 | PreferredVideoQualityOption = Annotated[
 49 |     VideoQualityOption,
 50 |     typer.Option(
 51 |         '--preferred-video-quality',
 52 |         '-q',
 53 |         help='Preferred video quality. If not available, the best quality will be used.',
 54 |         metavar='Available options:\n- smallest_size\n- low\n- medium\n- high\n- highest',
 55 |         rich_help_panel=HelpPanels.filtering,
 56 |     ),
 57 | ]
 58 | 
 59 | PostUrlOption = Annotated[
 60 |     str | None,
 61 |     typer.Option(
 62 |         '--post-url',
 63 |         '-p',
 64 |         help='Download only the specified post if possible',
 65 |         metavar='URL',
 66 |         show_default=False,
 67 |         rich_help_panel=HelpPanels.actions,
 68 |     ),
 69 | ]
 70 | 
 71 | CheckTotalCountOption = Annotated[
 72 |     bool,
 73 |     typer.Option(
 74 |         '--only-check-total',
 75 |         '-t',
 76 |         help='Check total count of accessible/inaccessible(+names) posts and exit, no download',
 77 |         rich_help_panel=HelpPanels.actions,
 78 |     ),
 79 | ]
 80 | 
 81 | CleanCacheOption = Annotated[
 82 |     bool,
 83 |     typer.Option(
 84 |         '--clean-cache',
 85 |         '-c',
 86 |         help='Remove posts cache for selected username [italic]completely[/italic], use with caution',
 87 |         rich_help_panel=HelpPanels.actions,
 88 |     ),
 89 | ]
 90 | 
 91 | DestinationDirectoryOption = Annotated[
 92 |     Path | None,
 93 |     typer.Option(
 94 |         '--destination-directory',
 95 |         '-o',
 96 |         help='Directory to save downloaded posts',
 97 |         dir_okay=True,
 98 |         file_okay=False,
 99 |         resolve_path=True,
100 |         rich_help_panel=HelpPanels.actions,
101 |         show_default=False,
102 |     ),
103 | ]
104 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: build test posts-example
 2 | 
 3 | # Ensure that all the pipe-like commands work correctly.
 4 | export PYTHONIOENCODING = utf-8
 5 | 
 6 | help:
 7 | 	@echo ------------------------- To run locally: ----------------------------
 8 | 	@echo Run make deps to install dependencies
 9 | 	@echo And to run current project locally without installation:
10 | 	@echo   poetry run python -m boosty_downloader.main
11 | 	@echo .                                                                    .
12 | 	@echo ------------------------- Available commands: ------------------------
13 | 	@echo Building:
14 | 	@echo   deps             - Install project dependencies using poetry
15 | 	@echo   build            - Build the project whl file 
16 | 	@echo ----------------------------------------------------------------------
17 | 	@echo Code Health:
18 | 	@echo   dev-fix          - Try to fix code issues, show problems if any
19 | 	@echo   ci-check         - Run CI checks (linter/formatter/type checks)
20 | 	@echo   types            - Code type checks using pyright 
21 | 	@echo   format-check     - Code format check using ruff
22 | 	@echo   format-fix       - Code format using ruff 
23 | 	@echo   lint-check       - Code linting (only check)
24 | 	@echo   lint-fix         - Code linting (try to fix)
25 | 	@echo ----------------------------------------------------------------------
26 | 	@echo Testing:
27 | 	@echo   test             - Run the project unit tests
28 | 	@echo   test-verbose     - Run the project unit tests
29 | 	@echo   test-api         - Run the project API integration tests
30 | 	@echo   test-api-verbose - Run the project API integration tests with verbose output
31 | 	@echo ----------------------------------------------------------------------
32 | 	@echo Endpoints Analysis (Only work if integration tests config available):
33 | 	@echo   posts_example    - Show posts json for defined author 
34 | 
35 | 
36 | 
37 | # ------------------------------------------------------------------------------
38 | # 📦 Distribution 
39 | 
40 | deps:
41 | 	poetry sync --no-interaction
42 | 
43 | build:
44 | 	poetry build --no-cache
45 | 	@echo Build complete at /dist/
46 | 
47 | # ------------------------------------------------------------------------------
48 | # 🩺 Code Health Checks
49 | 
50 | dev-fix: lint-fix format-fix types
51 | ci-check: lint-check types format-check
52 | 
53 | lint-check:
54 | 	poetry run ruff check .
55 | 
56 | lint-fix:
57 | 	poetry run ruff check --fix .
58 | 
59 | format-check:
60 | 	poetry run ruff format --check .
61 | 
62 | format-fix:
63 | 	poetry run ruff format .
64 | 	
65 | types:
66 | 	poetry run pyright
67 | 
68 | 
69 | # ------------------------------------------------------------------------------
70 | # 🧪 Testing 
71 | 
72 | test:
73 | 	poetry run pytest test/unit/ 
74 | 
75 | test-verbose:
76 | 	poetry run pytest -v test/unit/
77 | 
78 | test-api:
79 | 	poetry run pytest test/integration/
80 | 
81 | test-api-verbose:
82 | 	poetry run pytest -v test/integration/ 
83 | 
84 | # ------------------------------------------------------------------------------
85 | # 🔍 Endpoints analysis
86 | 
87 | posts-example:
88 | 	poetry run pytest ./test/integration/analysis/get_author_posts_test.py::test_get_author_posts -s -q
89 | 
90 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/yaml_configuration/config.py:
--------------------------------------------------------------------------------
 1 | """Configuration for the whole application"""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import sys
 6 | from pathlib import Path
 7 | 
 8 | from pydantic import BaseModel, Field, ValidationError
 9 | from pydantic_settings import (
10 |     BaseSettings,
11 |     PydanticBaseSettingsSource,
12 |     SettingsConfigDict,
13 |     YamlConfigSettingsSource,
14 | )
15 | 
16 | from boosty_downloader.src.infrastructure.loggers import logger_instances
17 | from boosty_downloader.src.infrastructure.yaml_configuration.sample_config import (
18 |     DEFAULT_YAML_CONFIG_VALUE,
19 | )
20 | 
21 | 
22 | class DownloadSettings(BaseModel):
23 |     """Settings for the script downloading process"""
24 | 
25 |     target_directory: Path = Path('./boosty-downloads')
26 | 
27 | 
28 | class AuthSettings(BaseModel):
29 |     """Configuration for authentication (cookies and authorization headers)"""
30 | 
31 |     cookie: str = Field(default='', min_length=1)
32 |     auth_header: str = Field(default='', min_length=1)
33 | 
34 | 
35 | CONFIG_LOCATION: Path = Path('config.yaml')
36 | 
37 | 
38 | class Config(BaseSettings):
39 |     """General script configuration with subsections"""
40 | 
41 |     model_config = SettingsConfigDict(
42 |         yaml_file=CONFIG_LOCATION,
43 |         yaml_file_encoding='utf-8',
44 |     )
45 | 
46 |     auth: AuthSettings = AuthSettings()
47 |     downloading_settings: DownloadSettings = DownloadSettings()
48 | 
49 |     @classmethod
50 |     def settings_customise_sources(
51 |         cls,
52 |         settings_cls: type[BaseSettings],
53 |         init_settings: PydanticBaseSettingsSource,
54 |         env_settings: PydanticBaseSettingsSource,
55 |         dotenv_settings: PydanticBaseSettingsSource,
56 |         file_secret_settings: PydanticBaseSettingsSource,
57 |     ) -> tuple[PydanticBaseSettingsSource, ...]:
58 |         return (
59 |             YamlConfigSettingsSource(settings_cls),
60 |             init_settings,
61 |             env_settings,
62 |             dotenv_settings,
63 |             file_secret_settings,
64 |         )
65 | 
66 | 
67 | def create_sample_config_file() -> None:
68 |     """Create a sample config file if it doesn't exist."""
69 |     with CONFIG_LOCATION.open(mode='w') as f:
70 |         f.write(DEFAULT_YAML_CONFIG_VALUE)
71 | 
72 | 
73 | def init_config() -> Config:
74 |     """Initialize the config file with a sample if it doesn't exist"""
75 |     try:
76 |         if not CONFIG_LOCATION.exists():
77 |             create_sample_config_file()
78 |             logger_instances.downloader_logger.error("Config doesn't exist")
79 |             logger_instances.downloader_logger.success(
80 |                 f'Created a sample config file at {CONFIG_LOCATION.absolute()}, please fill `auth_header` and `cookie` with yours before running the app',
81 |             )
82 |             sys.exit(1)
83 |         return Config()
84 |     except ValidationError:
85 |         # If can't be parsed correctly
86 |         create_sample_config_file()
87 |         logger_instances.downloader_logger.error(
88 |             'Config is invalid (could not be parsed)'
89 |         )
90 |         logger_instances.downloader_logger.error(
91 |             '[bold yellow]Make sure you fill `auth_header` and `cookie` with yours, they are required[/bold yellow]',
92 |         )
93 |         logger_instances.downloader_logger.success(
94 |             f'Recreated it at [green bold]{CONFIG_LOCATION.absolute()}[/green bold]',
95 |         )
96 |         sys.exit(1)
97 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## 2.0.1 
 2 | 
 3 | - 🐛 Fixed image data so posts download even when width/height is missing
 4 | - 🐛 Fixed download process to stop automatically after the chosen post
 5 | 
 6 | ## 2.0.0
 7 | 
 8 | ### ⛔ BREAKING CHANGES ⛔
 9 | 
10 | - Because of the new caching system, the cache database changed.
11 |   If you have an existing cache, you may need to clean it first to avoid issues.
12 | 
13 |   The utility will automatically detect cache inconsistencies and prompt you to clean it though.
14 | 
15 |   I tried to figgure some sort of db migration but it is too complex for the current state of the project, so I decided to just make it a breaking change yet.
16 | 
17 |   If you know how I can keep migrating the cache given the fact that dbs are 
18 |   scattered across multiple author directories, and even possibly have different versions 
19 |   please let me know with an issue!
20 | 
21 | - Some options were renamed but their functionality remains the same
22 | 
23 | ### 🔔 New Features
24 | 
25 | - 🔔 **Automatic Update Checker**  
26 |   You'll now be notified when a new version is available on PyPI.
27 | 
28 | - 📦 **Improved Caching Layer**
29 |   - Only the requested parts are cached to avoid unnecessary re-downloads/skips (before this change the post was cached entirely not just the requested parts), so now partial updates are possible.
30 |   - Cache is properly **invalidated** if a post is updated by its author (will be re-downloaded).
31 |   - More **robust and accurate** caching system: better handling of missing post parts.
32 | 
33 | - **HTML Generation Enhancements**
34 |   - New **HTML generator engine** with support for **Dark/Light modes**. 🦉
35 |   - Added support for **headings and lists** in HTML output.
36 |   - Added better support for styling (italic/bold/etc)
37 |   - `post_content` now includes both **images AND videos** (offline only).
38 | 
39 | - **Improved CLI UX**
40 |   - New destination option to allow override config values.
41 |   - Better help descriptions with logical **option grouping**.
42 |   - More informative **post counter**: displays both accessible and inaccessible posts, with names listed for all inaccessible posts.
43 |   - Enhanced **logging and error handling** for a more readable and helpful output.
44 | 
45 | - **Retry Logic**
46 |   - If post download fails, it will be retried up to 5 times with exponential backoff.
47 |   - After 5 failed attempts, the post will be skipped and not cached.
48 | 
49 | ### 🐛 Fixes
50 | 
51 | - Fixed duplication problem [#12](https://github.com/Glitchy-Sheep/boosty-downloader/issues/12) (now posts are cached by UUID and have it as part of the filename, so duplication is no longer an issue)
52 | - Fixed external video downloading for unsupported formats (now format >=720p is preferred, less otherwise).
53 | - Fixed HTML generation for posts with **no content**, now it won't be created.
54 | - Resolved issues with **newline handling** in some HTML outputs.
55 | - Fixed **Ctrl+C interruption** handling with proper cleanup and messaging.
56 | - Prevented creation of **empty directories** for posts with no downloadable content.
57 |   now the utility do the job only if there is one.
58 |     
59 | ### 🧹 Miscellaneous
60 | 
61 | - Internal **project structure refactored** for better maintainability and scalability.
62 | 
63 | ## 1.0.1
64 | - Fix: 🐛 Support new boosty API response schema (as a placeholder)
65 | 
66 | ## 1.0.0
67 | 
68 | - First stable release
69 | - Main downloader functions such as video/post/external_video/files
70 | - Added CLI interface with typer (with customizable options)
71 | 


--------------------------------------------------------------------------------
/test/unit/download_manager/ok_video_ranking_test.py:
--------------------------------------------------------------------------------
  1 | from boosty_downloader.src.application.mappers import (
  2 |     get_best_video,
  3 |     get_quality_ranking,
  4 | )
  5 | from boosty_downloader.src.application.ok_video_ranking import (
  6 |     BoostyOkVideoType,
  7 |     BoostyOkVideoUrl,
  8 |     RankingDict,
  9 | )
 10 | 
 11 | 
 12 | def test_ranking_dict_basic_operations():
 13 |     ranking = RankingDict[str]()
 14 |     ranking['a'] = 10
 15 |     ranking['b'] = 20
 16 |     ranking['c'] = 15
 17 | 
 18 |     assert ranking['a'] == 10
 19 |     assert ranking['b'] == 20
 20 |     assert ranking['c'] == 15
 21 | 
 22 |     assert ranking.pop_max() == ('b', 20)
 23 |     assert ranking.pop_max() == ('c', 15)
 24 |     assert ranking.pop_max() == ('a', 10)
 25 |     assert ranking.pop_max() is None
 26 | 
 27 | 
 28 | def test_ranking_dict_delete():
 29 |     ranking = RankingDict[str]()
 30 |     ranking['x'] = 5
 31 |     ranking['y'] = 10
 32 | 
 33 |     del ranking['x']
 34 |     assert 'x' not in ranking.data
 35 |     assert ranking.pop_max() == ('y', 10)
 36 |     assert ranking.pop_max() is None
 37 | 
 38 | 
 39 | def test_get_quality_ranking():
 40 |     ranking = get_quality_ranking()
 41 |     assert ranking[BoostyOkVideoType.ultra_hd] == 17
 42 |     assert ranking[BoostyOkVideoType.lowest] == 10
 43 |     assert ranking.pop_max() == (BoostyOkVideoType.ultra_hd, 17)
 44 |     assert ranking.pop_max() == (BoostyOkVideoType.quad_hd, 16)
 45 |     assert ranking.pop_max() == (BoostyOkVideoType.full_hd, 15)
 46 | 
 47 | 
 48 | def test_get_best_video():
 49 |     video_urls = [
 50 |         BoostyOkVideoUrl(type=BoostyOkVideoType.low, url='low.mp4'),
 51 |         BoostyOkVideoUrl(type=BoostyOkVideoType.medium, url='medium.mp4'),
 52 |         BoostyOkVideoUrl(type=BoostyOkVideoType.full_hd, url='full_hd.mp4'),
 53 |     ]
 54 | 
 55 |     best_video_info = get_best_video(video_urls)
 56 |     best_video = best_video_info[0] if best_video_info else None
 57 |     assert best_video is not None
 58 |     assert best_video.type == BoostyOkVideoType.medium  # Default preference
 59 |     assert best_video.url == 'medium.mp4'
 60 | 
 61 | 
 62 | def test_get_best_video_with_preference():
 63 |     video_urls = [
 64 |         BoostyOkVideoUrl(type=BoostyOkVideoType.low, url='low.mp4'),
 65 |         BoostyOkVideoUrl(type=BoostyOkVideoType.full_hd, url='full_hd.mp4'),
 66 |     ]
 67 | 
 68 |     best_video_info = get_best_video(
 69 |         video_urls, preferred_quality=BoostyOkVideoType.full_hd
 70 |     )
 71 | 
 72 |     best_video = best_video_info[0] if best_video_info else None
 73 | 
 74 |     assert best_video is not None
 75 |     assert best_video.type == BoostyOkVideoType.full_hd
 76 |     assert best_video.url == 'full_hd.mp4'
 77 | 
 78 | 
 79 | def test_get_best_video_no_available():
 80 |     video_urls = [
 81 |         BoostyOkVideoUrl(type=BoostyOkVideoType.low, url=''),  # No valid URL
 82 |         BoostyOkVideoUrl(type=BoostyOkVideoType.medium, url=''),
 83 |     ]
 84 | 
 85 |     best_video = get_best_video(video_urls)
 86 |     assert best_video is None
 87 | 
 88 | 
 89 | def test_get_best_video_empty_list():
 90 |     best_video = get_best_video([])
 91 |     assert best_video is None
 92 | 
 93 | 
 94 | def test_ranking_dict_with_duplicate_entries():
 95 |     ranking = RankingDict[str]()
 96 |     ranking['a'] = 10
 97 |     ranking['b'] = 20
 98 |     ranking['a'] = 30  # Overwriting "a" with a higher value
 99 | 
100 |     assert ranking.pop_max() == ('a', 30)
101 |     assert ranking.pop_max() == ('b', 20)
102 |     assert ranking.pop_max() is None
103 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/loggers/base.py:
--------------------------------------------------------------------------------
  1 | """Logger for the application."""
  2 | 
  3 | import io
  4 | import logging
  5 | import sys
  6 | 
  7 | from rich.logging import RichHandler
  8 | 
  9 | # Detect if running in a terminal
 10 | is_terminal = sys.stdout.isatty()
 11 | 
 12 | # Ensure proper UTF-8 handling in non-interactive environments
 13 | if not is_terminal and 'pytest' not in sys.modules:
 14 |     sys.stdout = io.TextIOWrapper(
 15 |         sys.stdout.buffer,
 16 |         encoding='utf-8',
 17 |         line_buffering=True,
 18 |     )
 19 | 
 20 | 
 21 | class RichLogger:
 22 |     """Enhanced logger with Rich for colorful output while keeping severity levels."""
 23 | 
 24 |     def __init__(self, prefix: str) -> None:
 25 |         self.prefix = prefix
 26 | 
 27 |         # Avoid adding duplicate handlers
 28 |         handler = RichHandler(
 29 |             log_time_format='[%H:%M:%S]',
 30 |             markup=True,
 31 |             show_time=True,
 32 |             rich_tracebacks=True,
 33 |             show_path=False,
 34 |             show_level=False,
 35 |         )
 36 | 
 37 |         self._handler = handler
 38 |         self._log = logging.getLogger(prefix)
 39 |         self._log.setLevel(logging.DEBUG)
 40 |         self._log.addHandler(handler)
 41 |         self.console = self._handler.console
 42 |         self.logging_logger_obj = self._log
 43 | 
 44 |     def _log_message(
 45 |         self,
 46 |         level: int,
 47 |         msg: str,
 48 |         *,
 49 |         highlight: bool = True,
 50 |         tab_level: int = 0,
 51 |     ) -> None:
 52 |         if highlight:
 53 |             self._log.log(level, '\t' * tab_level + msg)
 54 |         else:
 55 |             self._handler.console.log('\t' * tab_level + msg, highlight=False)
 56 | 
 57 |     def info(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None:
 58 |         prefix = f'[cyan]{self.prefix}[/cyan][blue].INFO 🔹[/blue]:'
 59 |         self._log_message(
 60 |             logging.INFO,
 61 |             f'{prefix} {msg}',
 62 |             highlight=highlight,
 63 |             tab_level=tab_level,
 64 |         )
 65 | 
 66 |     def success(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None:
 67 |         prefix = f'[cyan]{self.prefix}[/cyan][green].SUCCESS ✔[/green]:'
 68 |         self._log_message(
 69 |             logging.INFO,
 70 |             f'{prefix} {msg}',
 71 |             highlight=highlight,
 72 |             tab_level=tab_level,
 73 |         )
 74 | 
 75 |     def error(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None:
 76 |         prefix = f'[cyan]{self.prefix}[/cyan][bold red].ERROR ❌[/bold red]:'
 77 |         self._log_message(
 78 |             logging.ERROR,
 79 |             f'{prefix} {msg}',
 80 |             highlight=highlight,
 81 |             tab_level=tab_level,
 82 |         )
 83 | 
 84 |     def wait(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None:
 85 |         prefix = f'[cyan]{self.prefix}[/cyan][yellow].WAIT ⏳[/yellow]:'
 86 |         self._log_message(
 87 |             logging.INFO,
 88 |             f'{prefix} {msg}',
 89 |             highlight=highlight,
 90 |             tab_level=tab_level,
 91 |         )
 92 | 
 93 |     def warning(self, msg: str, *, highlight: bool = True, tab_level: int = 0) -> None:
 94 |         prefix = f'[cyan]{self.prefix}[/cyan][bold yellow].WARNING ⚠ [/bold yellow]:'
 95 |         self._log_message(
 96 |             logging.WARNING,
 97 |             f'{prefix} {msg}',
 98 |             highlight=highlight,
 99 |             tab_level=tab_level,
100 |         )
101 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/ok_video_ranking.py:
--------------------------------------------------------------------------------
 1 | """The module provides tools to work with ok video links (selecting them) by quality."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import heapq
 6 | from typing import Generic, TypeVar
 7 | 
 8 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types.post_data_ok_video import (
 9 |     BoostyOkVideoType,
10 |     BoostyOkVideoUrl,
11 | )
12 | 
13 | KT = TypeVar('KT')
14 | 
15 | 
16 | class RankingDict(Generic[KT]):
17 |     """A dict which also keeps track of the max value, it's not thread-safe"""
18 | 
19 |     def __init__(self) -> None:
20 |         self.data: dict[KT, float] = {}
21 |         self.max_heap: list[tuple[float, KT]] = []
22 |         self.entries: dict[KT, tuple[float, KT]] = {}
23 | 
24 |     def __getitem__(self, key: KT) -> float:
25 |         """Get the value associated with the key"""
26 |         return self.data[key]
27 | 
28 |     def __setitem__(self, key: KT, value: float) -> None:
29 |         """Set the value associated with the key"""
30 |         self.data[key] = value
31 |         entry = (-value, key)
32 |         self.entries[key] = entry
33 |         heapq.heappush(self.max_heap, entry)
34 | 
35 |     def __delitem__(self, key: KT) -> None:
36 |         """Remove the key and its value"""
37 |         if key in self.data:
38 |             del self.data[key]
39 |         if key in self.entries:
40 |             self.entries[key] = (float('-inf'), key)  # Mark as deleted
41 | 
42 |     def pop_max(self) -> tuple[KT, float] | None:
43 |         """Pop the maximum value"""
44 |         while self.max_heap:
45 |             value, key = heapq.heappop(self.max_heap)
46 |             if key in self.data and self.entries[key] == (value, key):
47 |                 del self.data[key]
48 |                 del self.entries[key]
49 |                 return key, -value  # Convert back to positive
50 |         return None
51 | 
52 | 
53 | def get_quality_ranking() -> RankingDict[BoostyOkVideoType]:
54 |     """Get the ranking dict for video quality"""
55 |     quality_ranking = RankingDict[BoostyOkVideoType]()
56 |     quality_ranking[BoostyOkVideoType.ultra_hd] = 17
57 |     quality_ranking[BoostyOkVideoType.quad_hd] = 16
58 |     quality_ranking[BoostyOkVideoType.full_hd] = 15
59 |     quality_ranking[BoostyOkVideoType.high] = 14
60 |     quality_ranking[BoostyOkVideoType.medium] = 13
61 |     quality_ranking[BoostyOkVideoType.low] = 12
62 |     quality_ranking[BoostyOkVideoType.tiny] = 11
63 |     quality_ranking[BoostyOkVideoType.lowest] = 10
64 |     quality_ranking[BoostyOkVideoType.live_playback_dash] = 9
65 |     quality_ranking[BoostyOkVideoType.live_playback_hls] = 8
66 |     quality_ranking[BoostyOkVideoType.live_ondemand_hls] = 7
67 |     quality_ranking[BoostyOkVideoType.live_dash] = 6
68 |     quality_ranking[BoostyOkVideoType.live_hls] = 5
69 |     quality_ranking[BoostyOkVideoType.hls] = 4
70 |     quality_ranking[BoostyOkVideoType.dash] = 3
71 |     quality_ranking[BoostyOkVideoType.dash_uni] = 2
72 |     quality_ranking[BoostyOkVideoType.live_cmaf] = 1
73 | 
74 |     return quality_ranking
75 | 
76 | 
77 | def get_best_video(
78 |     video_urls: list[BoostyOkVideoUrl],
79 |     preferred_quality: BoostyOkVideoType = BoostyOkVideoType.medium,
80 | ) -> tuple[BoostyOkVideoUrl, BoostyOkVideoType] | None:
81 |     """Select the best video format for downloading according to user's preferences"""
82 |     quality_ranking: RankingDict[BoostyOkVideoType] = get_quality_ranking()
83 |     quality_ranking[preferred_quality] = float('inf')
84 | 
85 |     video_urls_map = {video.type: video for video in video_urls}
86 | 
87 |     while highest_rank_video_type := quality_ranking.pop_max():
88 |         highest_rank_video_type = highest_rank_video_type[0]
89 | 
90 |         video_url = video_urls_map.get(highest_rank_video_type)
91 |         if video_url and video_url.url:
92 |             return video_url, highest_rank_video_type
93 | 
94 |     return None
95 | 


--------------------------------------------------------------------------------
/test/integration/boosty_api/boosty_api_test.py:
--------------------------------------------------------------------------------
  1 | """Integration tests for Boosty API client.
  2 | 
  3 | These tests make real requests to the Boosty API and require proper configuration.
  4 | 
  5 | Please see test/ABOUT_TESTING.md for more details.
  6 | """
  7 | 
  8 | import pytest
  9 | 
 10 | from boosty_downloader.src.infrastructure.boosty_api import (
 11 |     BoostyAPIClient,
 12 | )
 13 | from boosty_downloader.src.infrastructure.boosty_api.core.client import (
 14 |     BoostyAPINoUsernameError,
 15 |     BoostyAPIUnauthorizedError,
 16 | )
 17 | from integration.configuration import IntegrationTestConfig
 18 | 
 19 | # For automatic fixture discovery
 20 | pytest_plugins = [
 21 |     'integration.fixtures',
 22 | ]
 23 | 
 24 | 
 25 | @pytest.mark.asyncio
 26 | async def test_get_posts_existing_author_success(
 27 |     authorized_boosty_client: BoostyAPIClient, integration_config: IntegrationTestConfig
 28 | ) -> None:
 29 |     """Test successful retrieval of posts from an existing author."""
 30 |     response = await authorized_boosty_client.get_author_posts(
 31 |         author_name=integration_config.boosty_existing_author, limit=5
 32 |     )
 33 | 
 34 |     assert response.posts is not None
 35 |     assert response.extra is not None
 36 |     assert len(response.posts) >= 0
 37 | 
 38 | 
 39 | @pytest.mark.asyncio
 40 | async def test_get_posts_nonexistent_author_raises_error(
 41 |     authorized_boosty_client: BoostyAPIClient, integration_config: IntegrationTestConfig
 42 | ) -> None:
 43 |     """Test that requesting posts from non-existent author raises BoostyAPINoUsernameError."""
 44 |     with pytest.raises(BoostyAPINoUsernameError):
 45 |         await authorized_boosty_client.get_author_posts(
 46 |             author_name=integration_config.boosty_nonexistent_author, limit=5
 47 |         )
 48 | 
 49 | 
 50 | @pytest.mark.asyncio
 51 | async def test_get_posts_with_pagination(
 52 |     authorized_boosty_client: BoostyAPIClient, integration_config: IntegrationTestConfig
 53 | ) -> None:
 54 |     """Test pagination functionality for author posts."""
 55 |     first_page = await authorized_boosty_client.get_author_posts(
 56 |         author_name=integration_config.boosty_existing_author, limit=2
 57 |     )
 58 | 
 59 |     if not first_page.extra.is_last and first_page.extra.offset:
 60 |         second_page = await authorized_boosty_client.get_author_posts(
 61 |             author_name=integration_config.boosty_existing_author,
 62 |             limit=2,
 63 |             offset=first_page.extra.offset,
 64 |         )
 65 | 
 66 |         # Posts should be different between pages (assuming author has more than 2 posts)
 67 |         first_page_ids = {post.id for post in first_page.posts}
 68 |         second_page_ids = {post.id for post in second_page.posts}
 69 |         assert first_page_ids.isdisjoint(second_page_ids), (
 70 |             'Pages should contain different posts'
 71 |         )
 72 | 
 73 | 
 74 | @pytest.mark.asyncio
 75 | async def test_iterate_over_posts(
 76 |     authorized_boosty_client: BoostyAPIClient, integration_config: IntegrationTestConfig
 77 | ) -> None:
 78 |     """Test the async generator for iterating over all author posts."""
 79 |     pages_count = 0
 80 |     total_posts = 0
 81 | 
 82 |     async for response in authorized_boosty_client.iterate_over_posts(
 83 |         author_name=integration_config.boosty_existing_author,
 84 |         posts_per_page=2,
 85 |     ):
 86 |         pages_count += 1
 87 |         total_posts += len(response.posts)
 88 | 
 89 |         # Limit iteration to avoid running too long in tests
 90 |         if pages_count >= 3:
 91 |             break
 92 | 
 93 |     assert pages_count > 0, 'Should retrieve at least one page'
 94 |     assert total_posts >= 0, 'Should count posts correctly'
 95 | 
 96 | 
 97 | @pytest.mark.asyncio
 98 | async def test_unathoirized_raises_error(
 99 |     invalid_auth_boosty_client: BoostyAPIClient,
100 |     integration_config: IntegrationTestConfig,
101 | ) -> None:
102 |     """Test that unauthorized access raises an error."""
103 |     with pytest.raises(BoostyAPIUnauthorizedError):
104 |         await invalid_auth_boosty_client.get_author_posts(
105 |             author_name=integration_config.boosty_existing_author, limit=5
106 |         )
107 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <p align="center">
  3 |     <img src="https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/main/assets/boosty-black-badge.png" style="width: 80%; "/>
  4 | </p>
  5 | 
  6 | # 🖥️ About
  7 | 
  8 | Welcome to the **Boosty Downloader** project! 
  9 | 
 10 | This CLI tool allows you to download most of the content from Boosty.to in bulk.
 11 | The post content itself is saved in html with a little bit of styling.
 12 | 
 13 | **You can download:**
 14 | - Boosty Videos
 15 | - External Videos (YouTube, Vimeo)
 16 | - Files
 17 | - Full Post content (including photos and links)
 18 | 
 19 | ## 📑 Table of Contents
 20 | - [🖥️ About](#️-about)
 21 |   - [📑 Table of Contents](#-table-of-contents)
 22 |   - [✨ Features](#-features)
 23 |   - [📸 Screenshots \& Usage](#-screenshots--usage)
 24 |   - [🛠️ Installation](#️-installation)
 25 |   - [🚀 Configuration for Usage](#-configuration-for-usage)
 26 |     - [Step 1: Get the auth cookie and auth header](#step-1-get-the-auth-cookie-and-auth-header)
 27 |     - [Step 2: Paste the cookie and auth header into the config file](#step-2-paste-the-cookie-and-auth-header-into-the-config-file)
 28 |     - [Step 3: Run the utility](#step-3-run-the-utility)
 29 |   - [💖 Contributing](#-contributing)
 30 |   - [📜 License](#-license)
 31 | 
 32 | 
 33 | 
 34 | ## ✨ Features
 35 | 
 36 | - 📦 **Bulk download**: Download all available content from your favorite creator.
 37 | - 🔎 **Total checker**: See how many posts are available to you, and which are not.
 38 | - 📂 **Content type filters**: Download only the content you need (videos, images, etc), choose what you really want with flags (see below).
 39 | - 📄 **Download specific posts**: Download post by url and username.
 40 | - 🔃 **Sync content seamlessly**: The utility keeps cache of already downloaded posts, so you can resume your download at any time or get new content after a while.
 41 | - 📼 **Choose your video quality**: You can choose preferred video quality to download (for boosty videos)
 42 | - 🎨 **Beauty posts preview**: You can see posts content with rendered offline html files with dark/light theme changing.
 43 | - 📊 **Order matters**: Posts have dates in names, so you can just sort it by name in your file explorer and see them in the correct chronological order.
 44 | - 🆙 **App update checker**: If new updates are available, you'll be notified when you use the application next time.
 45 | 
 46 | 
 47 | ## 📸 Screenshots & Usage
 48 | 
 49 | <img src="https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/refs/heads/dev/assets/usage.png">
 50 | <img src="https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/refs/heads/dev/assets/total_check.png">
 51 | <img src="https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/refs/heads/dev/assets/example1.png">
 52 | <img src="https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/refs/heads/dev/assets/example2.png">
 53 | 
 54 | 
 55 | 
 56 | ## 🛠️ Installation
 57 | 
 58 | 1. **Install python**:
 59 |    - Window:
 60 |       ```bash
 61 |       winget install Python.Python.3.13
 62 |       ```
 63 |    - Linux:
 64 |       ```bash
 65 |       sudo apt-get install python3
 66 |       ```
 67 |    - macOS:
 68 |       ```bash
 69 |       brew install python
 70 |       ```
 71 | 
 72 | 2. **Install the boosty-downloader package:**
 73 |    ```bash
 74 |    pip install boosty-downloader
 75 |    ```
 76 | 
 77 | 3. **Run the application:**
 78 |    ```bash
 79 |    boosty-downloader --help
 80 |    ```
 81 | 
 82 | ## 🚀 Configuration for Usage
 83 | 
 84 | ### Step 1: Get the auth cookie and auth header
 85 | 
 86 | 1. Open the [Boosty](https://boosty.to) website.
 87 | 2. Click the "Sign in" button and fill you credentials.
 88 | 3. Navigate to any author you have access to and scroll post a little.
 89 | 4. Copy auth token and cookie from browser network tab.
 90 | 
 91 | <img src="https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/main/assets/auth_guide.png">
 92 | 
 93 | ### Step 2: Paste the cookie and auth header into the config file
 94 | 
 95 | This config will be created during first run of the app in the current working directory.
 96 | 
 97 | <img src="https://raw.githubusercontent.com/Glitchy-Sheep/boosty-downloader/main/assets/config_guide.png">
 98 | 
 99 | ### Step 3: Run the utility
100 | 
101 | Now you can just download your content with the following command:
102 | 
103 | ```bash
104 | boosty-downloader --username YOUR_CREATOR_NAME
105 | ```
106 | 
107 | ## 💖 Contributing
108 | 
109 | If you want to contribute to this project, please see the [CONTRIBUTING.md](CONTRIBUTING.md).
110 | 
111 | ## 📜 License
112 | 
113 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
114 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/python
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
  3 | 
  4 | test/data
  5 | 
  6 | # ------------- USER DEFINED --------------- #
  7 | lab/
  8 | 
  9 | # For local downloading tests
 10 | boosty-downloads/
 11 | 
 12 | # Credentials
 13 | config.yaml
 14 | 
 15 | 
 16 | ### Python ###
 17 | # Byte-compiled / optimized / DLL files
 18 | __pycache__/
 19 | *.py[cod]
 20 | *$py.class
 21 | 
 22 | # C extensions
 23 | *.so
 24 | 
 25 | # Distribution / packaging
 26 | .Python
 27 | build/
 28 | develop-eggs/
 29 | dist/
 30 | downloads/
 31 | eggs/
 32 | .eggs/
 33 | lib/
 34 | lib64/
 35 | parts/
 36 | sdist/
 37 | var/
 38 | wheels/
 39 | share/python-wheels/
 40 | *.egg-info/
 41 | .installed.cfg
 42 | *.egg
 43 | MANIFEST
 44 | 
 45 | # PyInstaller
 46 | #  Usually these files are written by a python script from a template
 47 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 48 | *.manifest
 49 | *.spec
 50 | 
 51 | # Installer logs
 52 | pip-log.txt
 53 | pip-delete-this-directory.txt
 54 | 
 55 | # Unit test / coverage reports
 56 | htmlcov/
 57 | .tox/
 58 | .nox/
 59 | .coverage
 60 | .coverage.*
 61 | .cache
 62 | nosetests.xml
 63 | coverage.xml
 64 | *.cover
 65 | *.py,cover
 66 | .hypothesis/
 67 | .pytest_cache/
 68 | cover/
 69 | 
 70 | # Translations
 71 | *.mo
 72 | *.pot
 73 | 
 74 | # Django stuff:
 75 | *.log
 76 | local_settings.py
 77 | db.sqlite3
 78 | db.sqlite3-journal
 79 | 
 80 | # Flask stuff:
 81 | instance/
 82 | .webassets-cache
 83 | 
 84 | # Scrapy stuff:
 85 | .scrapy
 86 | 
 87 | # Sphinx documentation
 88 | docs/_build/
 89 | 
 90 | # PyBuilder
 91 | .pybuilder/
 92 | target/
 93 | 
 94 | # Jupyter Notebook
 95 | .ipynb_checkpoints
 96 | 
 97 | # IPython
 98 | profile_default/
 99 | ipython_config.py
100 | 
101 | # pyenv
102 | #   For a library or package, you might want to ignore these files since the code is
103 | #   intended to run in multiple environments; otherwise, check them in:
104 | # .python-version
105 | 
106 | # pipenv
107 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
108 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
109 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
110 | #   install all needed dependencies.
111 | #Pipfile.lock
112 | 
113 | # poetry
114 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
115 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
116 | #   commonly ignored for libraries.
117 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
118 | #poetry.lock
119 | 
120 | # pdm
121 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
122 | #pdm.lock
123 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
124 | #   in version control.
125 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
126 | .pdm.toml
127 | .pdm-python
128 | .pdm-build/
129 | 
130 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
131 | __pypackages__/
132 | 
133 | # Celery stuff
134 | celerybeat-schedule
135 | celerybeat.pid
136 | 
137 | # SageMath parsed files
138 | *.sage.py
139 | 
140 | # Environments
141 | .env
142 | .venv
143 | env/
144 | venv/
145 | ENV/
146 | env.bak/
147 | venv.bak/
148 | 
149 | # Spyder project settings
150 | .spyderproject
151 | .spyproject
152 | 
153 | # Rope project settings
154 | .ropeproject
155 | 
156 | # mkdocs documentation
157 | /site
158 | 
159 | # mypy
160 | .mypy_cache/
161 | .dmypy.json
162 | dmypy.json
163 | 
164 | # Pyre type checker
165 | .pyre/
166 | 
167 | # pytype static type analyzer
168 | .pytype/
169 | 
170 | # Cython debug symbols
171 | cython_debug/
172 | 
173 | # PyCharm
174 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
175 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
176 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
177 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
178 | #.idea/
179 | 
180 | ### Python Patch ###
181 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
182 | poetry.toml
183 | 
184 | # ruff
185 | .ruff_cache/
186 | 
187 | # LSP config files
188 | # pyrightconfig.json Make those rules crucial to the project's quality
189 | 
190 | # End of https://www.toptal.com/developers/gitignore/api/python
191 | 
192 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/di/app_environment.py:
--------------------------------------------------------------------------------
  1 | """Defines the application environment and dependency injection context for resource management."""
  2 | 
  3 | from contextlib import AsyncExitStack
  4 | from dataclasses import dataclass
  5 | from pathlib import Path
  6 | from types import TracebackType
  7 | 
  8 | import aiohttp
  9 | from aiohttp.typedefs import LooseHeaders
 10 | from aiohttp_retry import RetryClient, RetryOptionsBase
 11 | 
 12 | from boosty_downloader.src.infrastructure.boosty_api.core.client import BoostyAPIClient
 13 | from boosty_downloader.src.infrastructure.loggers.logger_instances import RichLogger
 14 | from boosty_downloader.src.infrastructure.post_caching.post_cache import SQLitePostCache
 15 | from boosty_downloader.src.interfaces.console_progress_reporter import (
 16 |     ProgressReporter,
 17 |     use_reporter,
 18 | )
 19 | 
 20 | 
 21 | class AppEnvironment:
 22 |     """Manages the application's resource initialization and cleanup, providing an async context for dependency injection."""
 23 | 
 24 |     @dataclass
 25 |     class Environment:
 26 |         """Holds initialized application resources for use within the app context."""
 27 | 
 28 |         boosty_api_client: BoostyAPIClient
 29 |         downloading_retry_client: RetryClient
 30 |         progress_reporter: ProgressReporter
 31 |         destination_directory: Path
 32 |         post_cache: SQLitePostCache
 33 | 
 34 |     @dataclass
 35 |     class AppConfig:
 36 |         """Configuration for the application environment."""
 37 | 
 38 |         author_name: str
 39 |         target_directory: Path
 40 |         boosty_headers: LooseHeaders
 41 |         boosty_cookies_jar: aiohttp.CookieJar
 42 |         retry_options: RetryOptionsBase
 43 |         request_delay_seconds: float
 44 |         logger: RichLogger
 45 | 
 46 |     def __init__(
 47 |         self,
 48 |         config: AppConfig,
 49 |     ) -> None:
 50 |         self.author_name = config.author_name
 51 |         self.target_directory = config.target_directory
 52 |         self.boosty_headers = config.boosty_headers
 53 |         self.boosty_cookies_jar = config.boosty_cookies_jar
 54 |         self.logger = config.logger
 55 |         self.retry_options = config.retry_options
 56 |         self._request_delay_seconds = config.request_delay_seconds
 57 | 
 58 |     async def __aenter__(self) -> 'Environment':
 59 |         """Enter the async context and initialize resources."""
 60 |         self._exit_stack = AsyncExitStack()
 61 |         await self._exit_stack.__aenter__()
 62 | 
 63 |         authorized_boosty_session = await self._exit_stack.enter_async_context(
 64 |             # Don't: set BASE_URL here, the BoostyAPIClient will handle it internally.
 65 |             # Why: this session will be used for both downloading and API requests with different bases.
 66 |             aiohttp.ClientSession(
 67 |                 headers=self.boosty_headers,
 68 |                 cookie_jar=self.boosty_cookies_jar,
 69 |                 timeout=aiohttp.ClientTimeout(total=None),
 70 |                 trust_env=True,
 71 |             )
 72 |         )
 73 | 
 74 |         progress_reporter = await self._exit_stack.enter_async_context(
 75 |             use_reporter(
 76 |                 reporter=ProgressReporter(
 77 |                     logger=self.logger.logging_logger_obj,
 78 |                     console=self.logger.console,
 79 |                 )
 80 |             )
 81 |         )
 82 | 
 83 |         authorized_retry_client = RetryClient(
 84 |             authorized_boosty_session, retry_options=self.retry_options
 85 |         )
 86 | 
 87 |         boosty_api_client = BoostyAPIClient(
 88 |             authorized_retry_client,
 89 |             request_delay_seconds=self._request_delay_seconds,
 90 |         )
 91 | 
 92 |         post_cache = SQLitePostCache(
 93 |             destination=self.target_directory / self.author_name,
 94 |             logger=self.logger,
 95 |         )
 96 |         post_cache.__enter__()  # sync context manager
 97 |         self._exit_stack.callback(post_cache.__exit__, None, None, None)
 98 | 
 99 |         return self.Environment(
100 |             boosty_api_client=boosty_api_client,
101 |             downloading_retry_client=authorized_retry_client,
102 |             progress_reporter=progress_reporter,
103 |             destination_directory=self.target_directory / self.author_name,
104 |             post_cache=post_cache,
105 |         )
106 | 
107 |     async def __aexit__(
108 |         self,
109 |         exc_type: type[BaseException] | None,
110 |         exc_val: BaseException | None,
111 |         exc_tb: TracebackType | None,
112 |     ) -> None:
113 |         """Exit the async context and clean up resources"""
114 |         await self._exit_stack.__aexit__(exc_type, exc_val, exc_tb)
115 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/domain/post_data_chunks.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module contains domain models for post data chunks.
  3 | 
  4 | These are used to represent different parts of a post, such as text, images, etc.
  5 | """
  6 | 
  7 | from dataclasses import dataclass, field
  8 | from enum import Enum
  9 | 
 10 | 
 11 | @dataclass
 12 | class PostDataChunkImage:
 13 |     """Represent an image data chunk within a post."""
 14 | 
 15 |     url: str
 16 | 
 17 | 
 18 | @dataclass
 19 | class PostDataChunkText:
 20 |     """
 21 |     Represent a textual data chunk within a post.
 22 | 
 23 |     It can contain multiple text fragments, each with optional styling and links.
 24 | 
 25 |     For example:
 26 |         - PostDataChunkText(
 27 |             text_fragments=[
 28 |                 PostDataChunkText.TextFragment(text="Hello, world!", bold=True),
 29 |                 PostDataChunkText.TextFragment(text="Visit Boosty", link_data="https://boosty.com", header_level=1),
 30 |                 PostDataChunkText.TextFragment(text="This is a normal text."),
 31 |                 PostDataChunkText.TextFragment(text="<NEW_LINE_SYMBOL>"),
 32 |             ]
 33 |     """
 34 | 
 35 |     @dataclass
 36 |     class TextFragment:
 37 |         """
 38 |         Represent a text fragment within a post with possibly additional styling.
 39 | 
 40 |         It also can contain a link to external resources (if link_data == None - it's just a text).
 41 |         """
 42 | 
 43 |         @dataclass
 44 |         class TextStyle:
 45 |             """Represent text styling options."""
 46 | 
 47 |             bold: bool = False
 48 |             italic: bool = False
 49 |             underline: bool = False
 50 | 
 51 |         text: str
 52 |         link_url: str | None = None
 53 |         header_level: int = 0  # Header level (0-6), 0 means no header
 54 |         style: TextStyle = field(default_factory=TextStyle)
 55 | 
 56 |     text_fragments: list[TextFragment]
 57 | 
 58 | 
 59 | @dataclass
 60 | class PostDataChunkBoostyVideo:
 61 |     """Represent a Boosty video data chunk within a post."""
 62 | 
 63 |     title: str
 64 |     url: str
 65 |     quality: str
 66 | 
 67 | 
 68 | @dataclass
 69 | class PostDataChunkExternalVideo:
 70 |     """
 71 |     Represent an external video data chunk within a post.
 72 | 
 73 |     Can be from: YouTube, Vimeo, etc.
 74 |     """
 75 | 
 76 |     url: str
 77 | 
 78 | 
 79 | @dataclass
 80 | class PostDataChunkFile:
 81 |     """Represent a file data chunk within a post."""
 82 | 
 83 |     url: str
 84 |     filename: str
 85 | 
 86 | 
 87 | @dataclass
 88 | class PostDataChunkTextualList:
 89 |     """
 90 |     Represent a list of text items within a post.
 91 | 
 92 |     Each item can be a simple text or a more complex structure with optional styling.
 93 |     """
 94 | 
 95 |     """ 📃 About this creepy structure:
 96 | 
 97 |     Lists can be nested, so we use a union type for items
 98 |     each level of nesting means a new list of items:
 99 | 
100 |     ----------------------------------------------------------------------------
101 |     # For example this:
102 |     ----------------------------------------------------------------------------
103 | 
104 |     PostDataChunkTextualList(
105 |         items=[
106 |             PostDataChunkTextualList.ListItem(
107 |                 data=[PostDataChunkText(text="Item 1")],
108 |                 nested_items=[]
109 |             ),
110 |             PostDataChunkTextualList.ListItem(
111 |                 data=[PostDataChunkText(text="Nested list:")],
112 |                 nested_items=[
113 |                     PostDataChunkTextualList.ListItem(
114 |                         data=[PostDataChunkText(text="Item 2")],
115 |                         nested_items=[]
116 |                     ),
117 |                     PostDataChunkTextualList.ListItem(
118 |                         data=[PostDataChunkText(text="Item 3")],
119 |                         nested_items=[]
120 |                     )
121 |                 ]
122 |             )
123 |         ]
124 |     )
125 | 
126 |     ----------------------------------------------------------------------------
127 |     # Becomes this:
128 |     ----------------------------------------------------------------------------
129 | 
130 |     - Item 1
131 |     - Nested list:
132 |       - Item 2
133 |       - Item 3
134 |     """
135 | 
136 |     @dataclass
137 |     class ListItem:
138 |         """'Represent a single item in a textual list."""
139 | 
140 |         data: list['PostDataChunkText']
141 |         nested_items: list['PostDataChunkTextualList.ListItem']
142 | 
143 |     class ListStyle(Enum):
144 |         """Style of the list, can be ordered or unordered."""
145 | 
146 |         ordered = 'ordered'
147 |         unordered = 'unordered'
148 | 
149 |     items: list[ListItem]
150 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/use_cases/download_specific_post.py:
--------------------------------------------------------------------------------
  1 | """Use case for downloading a specific Boosty post by URL."""
  2 | 
  3 | from pathlib import Path
  4 | 
  5 | from boosty_downloader.src.application.di.download_context import DownloadContext
  6 | from boosty_downloader.src.application.exceptions.application_errors import (
  7 |     ApplicationCancelledError,
  8 | )
  9 | from boosty_downloader.src.application.use_cases.check_total_posts import (
 10 |     BoostyAPIClient,
 11 | )
 12 | from boosty_downloader.src.application.use_cases.download_single_post import (
 13 |     ApplicationFailedDownloadError,
 14 |     DownloadSinglePostUseCase,
 15 | )
 16 | from boosty_downloader.src.infrastructure.file_downloader import sanitize_string
 17 | 
 18 | 
 19 | class DownloadPostByUrlUseCase:
 20 |     """
 21 |     Handles downloading a specific Boosty post given its URL.
 22 | 
 23 |     Right now it just iterates over the post and downloads it if UUID matches.
 24 |     Because I can't find a way to get post by URL directly at this moment.
 25 | 
 26 |     If you know how to do it, please open an issue on GitHub or PR with this functionality.
 27 |     """
 28 | 
 29 |     def __init__(
 30 |         self,
 31 |         post_url: str,
 32 |         boosty_api: BoostyAPIClient,
 33 |         destination: Path,
 34 |         download_context: DownloadContext,
 35 |     ) -> None:
 36 |         self.post_url = post_url
 37 |         self.boosty_api = boosty_api
 38 |         self.destination = destination
 39 |         self.context = download_context
 40 | 
 41 |     def extract_author_and_uuid_from_url(self) -> tuple[str | None, str | None]:
 42 |         """
 43 |         Parse Boosty post URL and returns (author_name, post_uuid) if possible.
 44 | 
 45 |         Expects URLs like: https://boosty.to/author_name/posts/post_uuid
 46 |         Returns None if parsing fails or URL is not Boosty.
 47 |         """
 48 |         url = self.post_url
 49 |         if 'boosty.to' not in url:
 50 |             self.context.progress_reporter.error(
 51 |                 "Provided URL doesn't match Boosty format (https://boosty.to/...)"
 52 |             )
 53 |             return None, None
 54 |         try:
 55 |             parts = url.split('/')
 56 |             author = parts[3]
 57 |             post_uuid = parts[5].split('?')[0]
 58 |         except (IndexError, AttributeError):
 59 |             self.context.progress_reporter.error(
 60 |                 'Failed to parse author or post UUID from the provided URL. '
 61 |             )
 62 |             return None, None
 63 |         else:
 64 |             return author, post_uuid
 65 | 
 66 |     async def execute(self) -> None:
 67 |         author_name, post_uuid = self.extract_author_and_uuid_from_url()
 68 |         if not author_name or not post_uuid:
 69 |             self.context.progress_reporter.error(
 70 |                 'Failed to extract author and UUID from the provided URL, aborting...'
 71 |             )
 72 |             return
 73 | 
 74 |         current_page = 0
 75 | 
 76 |         async for page in self.boosty_api.iterate_over_posts(
 77 |             author_name=author_name, posts_per_page=100
 78 |         ):
 79 |             current_page += 1
 80 |             self.context.progress_reporter.info(
 81 |                 f'[Page({current_page})] Searching for the post with UUID: {post_uuid}... '
 82 |             )
 83 |             for post in page.posts:
 84 |                 if post.id == post_uuid:
 85 |                     self.context.progress_reporter.success(
 86 |                         f'Found post with UUID: {post_uuid}, starting download...'
 87 |                     )
 88 | 
 89 |                     post_name = f'{post.created_at.date()} - {post.title}'
 90 |                     post_name = sanitize_string(post_name).replace('.', '').strip()
 91 | 
 92 |                     try:
 93 |                         await DownloadSinglePostUseCase(
 94 |                             post_dto=post,
 95 |                             destination=self.destination / post_name,
 96 |                             download_context=self.context,
 97 |                         ).execute()
 98 |                     except ApplicationCancelledError:
 99 |                         self.context.progress_reporter.warn(
100 |                             'Download cancelled by user. Bye!'
101 |                         )
102 |                     except ApplicationFailedDownloadError as e:
103 |                         self.context.progress_reporter.error(
104 |                             f'Failed to download post: {e.message}, RESOURCE: ({e.resource})'
105 |                         )
106 |                     else:
107 |                         return
108 | 
109 |         self.context.progress_reporter.error(
110 |             'Failed to find and download the specified post.'
111 |         )
112 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_generator/templates/base.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |     <meta charset="UTF-8" />
  6 |     <title>HTML Report</title>
  7 | 
  8 |     <script>
  9 |         (function () {
 10 |             const theme = localStorage.getItem('theme') || 'light';
 11 |             document.documentElement.classList.add(theme);
 12 |         })();
 13 |     </script>
 14 | 
 15 |     <style>
 16 |         html.light {
 17 |             --bg-color: #ffffff;
 18 |             --text-color: #000000;
 19 |             --link-color: #1a0dab;
 20 |             --content-bg-color: #fff;
 21 |             --content-shadow: rgba(0, 0, 0, 0.1);
 22 |             --border-color: #ccc;
 23 |         }
 24 | 
 25 |         html.dark {
 26 |             --bg-color: #121212;
 27 |             --text-color: #f0f0f0;
 28 |             --link-color: #8ab4f8;
 29 |             --content-bg-color: #1e1e1e;
 30 |             --content-shadow: rgba(0, 0, 0, 0.7);
 31 |             --border-color: #444;
 32 |         }
 33 | 
 34 |         /* Общие стили */
 35 |         body {
 36 |             font-family: 'Arial', sans-serif;
 37 |             background-color: var(--bg-color);
 38 |             color: var(--text-color);
 39 |             line-height: 1.6;
 40 |             margin: 0;
 41 |             padding: 0;
 42 |             max-width: 90%;
 43 |             width: 950px;
 44 |             margin-left: auto;
 45 |             margin-right: auto;
 46 |         }
 47 | 
 48 |         a {
 49 |             color: var(--link-color);
 50 |             text-decoration: none;
 51 |             font-weight: bold;
 52 |         }
 53 | 
 54 |         a:hover {
 55 |             text-decoration: underline;
 56 |         }
 57 | 
 58 |         .content {
 59 |             padding: 2rem;
 60 |             background-color: var(--content-bg-color);
 61 |             box-shadow: 0 0 12px var(--content-shadow);
 62 |             margin-bottom: 3rem;
 63 |             margin-top: 1.5rem;
 64 |             border-radius: 8px;
 65 |             transition: background-color 0.3s ease, box-shadow 0.3s ease, color 0.3s ease;
 66 |         }
 67 | 
 68 |         p {
 69 |             font-size: 1.2rem;
 70 |             margin-bottom: 1.5rem;
 71 |         }
 72 | 
 73 |         h1,
 74 |         h2,
 75 |         h3,
 76 |         h4,
 77 |         h5,
 78 |         h6 {
 79 |             margin-top: 2rem;
 80 |             margin-bottom: 1rem;
 81 |             font-weight: bold;
 82 |             line-height: 1.3;
 83 |         }
 84 | 
 85 |         ul,
 86 |         ol {
 87 |             padding-left: 2rem;
 88 |             margin-bottom: 1.5rem;
 89 |         }
 90 | 
 91 |         img {
 92 |             display: block;
 93 |             max-width: 100%;
 94 |             height: auto;
 95 |             border-radius: 8px;
 96 |             margin: 20px auto;
 97 |             box-shadow: 0 2px 12px rgba(0, 0, 0, 0.05);
 98 |         }
 99 | 
100 |         video {
101 |             display: block;
102 |             margin: 2rem auto;
103 |             max-width: 100%;
104 |             border-radius: 6px;
105 |             box-shadow: 0 1px 6px rgba(0, 0, 0, 0.08);
106 |         }
107 | 
108 |         .new-paragraph {
109 |             margin-top: 2rem;
110 |         }
111 | 
112 |         #theme-toggle {
113 |             position: fixed;
114 |             top: 0.5rem;
115 |             right: 0.5rem;
116 |             padding: 0.3rem 0.6rem;
117 |             background: none;
118 |             border: 1px solid var(--border-color);
119 |             border-radius: 0.5rem;
120 |             color: var(--text-color);
121 |             cursor: pointer;
122 |             font-size: 1rem;
123 |             user-select: none;
124 |             transition: color 0.3s ease, border-color 0.3s ease;
125 |             z-index: 1000;
126 |         }
127 | 
128 |         #theme-toggle:hover {
129 |             background-color: var(--border-color);
130 |             color: var(--bg-color);
131 |         }
132 |     </style>
133 | </head>
134 | 
135 | <body>
136 |     <button id="theme-toggle" aria-label="Toggle theme">🌙</button>
137 | 
138 |     <div class="content">
139 |         {{ content | safe }}
140 |     </div>
141 | 
142 |     <script>
143 |         const toggleBtn = document.getElementById('theme-toggle');
144 |         toggleBtn.textContent = document.documentElement.classList.contains('dark') ? '🌙' : '☀';
145 | 
146 |         toggleBtn.onclick = function () {
147 |             const html = document.documentElement;
148 |             const current = html.classList.contains('dark') ? 'dark' : 'light';
149 |             const next = current === 'dark' ? 'light' : 'dark';
150 | 
151 |             html.classList.remove(current);
152 |             html.classList.add(next);
153 |             localStorage.setItem('theme', next);
154 | 
155 |             this.textContent = next === 'dark' ? '🌙' : '☀';
156 |         };
157 |     </script>
158 | </body>
159 | 
160 | </html>


--------------------------------------------------------------------------------
/boosty_downloader/src/application/use_cases/download_all_posts.py:
--------------------------------------------------------------------------------
  1 | """Implements the use case for downloading all posts from a Boosty author, applying filters and caching as needed."""
  2 | 
  3 | import asyncio
  4 | from pathlib import Path
  5 | 
  6 | from boosty_downloader.src.application.di.download_context import DownloadContext
  7 | from boosty_downloader.src.application.exceptions.application_errors import (
  8 |     ApplicationCancelledError,
  9 |     ApplicationFailedDownloadError,
 10 | )
 11 | from boosty_downloader.src.application.use_cases.download_single_post import (
 12 |     DownloadSinglePostUseCase,
 13 | )
 14 | from boosty_downloader.src.infrastructure.boosty_api.core.client import BoostyAPIClient
 15 | from boosty_downloader.src.infrastructure.path_sanitizer import (
 16 |     sanitize_string,
 17 | )
 18 | 
 19 | 
 20 | class DownloadAllPostUseCase:
 21 |     """
 22 |     Use case for downloading all user's posts.
 23 | 
 24 |     This class encapsulates the logic required to download all posts from a source.
 25 |     Initialize the use case and call its methods to perform the download operation.
 26 | 
 27 |     All the downloaded content parts will be saved under the specified destination path.
 28 |     """
 29 | 
 30 |     def __init__(
 31 |         self,
 32 |         author_name: str,
 33 |         boosty_api: BoostyAPIClient,
 34 |         destination: Path,
 35 |         download_context: DownloadContext,
 36 |     ) -> None:
 37 |         self.author_name = author_name
 38 | 
 39 |         self.boosty_api = boosty_api
 40 |         self.destination = destination
 41 |         self.context = download_context
 42 | 
 43 |     async def execute(self) -> None:
 44 |         posts_iterator = self.boosty_api.iterate_over_posts(
 45 |             author_name=self.author_name
 46 |         )
 47 | 
 48 |         current_page = 0
 49 | 
 50 |         async for page in posts_iterator:
 51 |             count = len(page.posts)
 52 |             current_page += 1
 53 | 
 54 |             page_task_id = self.context.progress_reporter.create_task(
 55 |                 f'Got new posts: [{count}]',
 56 |                 total=count,
 57 |                 indent_level=0,  # Each page prints without indentation
 58 |             )
 59 | 
 60 |             for post_dto in page.posts:
 61 |                 if not post_dto.has_access:
 62 |                     self.context.progress_reporter.warn(
 63 |                         f'Skip post ([red]no access to content[/red]): {post_dto.title}'
 64 |                     )
 65 |                     continue
 66 | 
 67 |                 # For empty titles use post ID as a fallback (first 8 chars)
 68 |                 if len(post_dto.title) == 0:
 69 |                     post_dto.title = f'Not title (id_{post_dto.id[:8]})'
 70 | 
 71 |                 post_dto.title = (
 72 |                     sanitize_string(post_dto.title).replace('.', '').strip()
 73 |                 )
 74 | 
 75 |                 # date - TITLE (UUID_PART) for deduplication in case of same names with different posts
 76 |                 full_post_title = f'{post_dto.created_at.date()} - {post_dto.title} ({post_dto.id[:8]})'
 77 | 
 78 |                 single_post_use_case = DownloadSinglePostUseCase(
 79 |                     destination=self.destination / full_post_title,
 80 |                     post_dto=post_dto,
 81 |                     download_context=self.context,
 82 |                 )
 83 | 
 84 |                 self.context.progress_reporter.update_task(
 85 |                     page_task_id,
 86 |                     advance=1,
 87 |                     description=f'Processing page [bold]{current_page}[/bold]',
 88 |                 )
 89 | 
 90 |                 max_attempts = 5
 91 |                 delay = 1.0
 92 |                 for attempt in range(1, max_attempts + 1):
 93 |                     try:
 94 |                         await single_post_use_case.execute()
 95 |                         break
 96 |                     except ApplicationCancelledError:
 97 |                         raise
 98 |                     except ApplicationFailedDownloadError as e:
 99 |                         if attempt == max_attempts:
100 |                             self.context.progress_reporter.error(
101 |                                 f'Skip post after {attempt} failed attempts: {full_post_title} ({e.message})'
102 |                             )
103 |                         else:
104 |                             self.context.progress_reporter.warn(
105 |                                 f'Attempt {attempt} failed for post: {full_post_title} ({e.message}), RESOURCE: ({e.resource})'
106 |                             )
107 |                             self.context.progress_reporter.warn(
108 |                                 f'Retrying in {delay:.1f}s... ({e.message})'
109 |                             )
110 |                             await asyncio.sleep(delay)
111 |                             delay = min(delay * 1.5, 10.0)
112 | 
113 |             self.context.progress_reporter.complete_task(page_task_id)
114 |             self.context.progress_reporter.success(
115 |                 f'--- Finished page {current_page} ---'
116 |             )
117 | 


--------------------------------------------------------------------------------
/.github/workflows/release-pr-validation.yaml:
--------------------------------------------------------------------------------
  1 | # This workflow runs only for dev -> main PRs to ensure that:
  2 | #  - CHANGELOG updated 
  3 | #  - pyproject.toml version updated
  4 | #  - Version is higher than the one on PyPI
  5 | name: 🔍 Release PR Validation (version checks)
  6 | 
  7 | on:
  8 |   pull_request:
  9 |     branches:
 10 |       - main
 11 | 
 12 | env:
 13 |   PACKAGE_NAME: "boosty-downloader"
 14 |       
 15 | jobs:
 16 |   # About Inter-step Communication:
 17 |   #    Steps share data (versions) using GitHub Actions outputs mechanism:
 18 |   #    
 19 |   #    Creating output:     echo "key=value" >> "$GITHUB_OUTPUT"
 20 |   #    Using output:        ${{ steps.STEP_ID.outputs.key }}
 21 |   #
 22 |   version-validation:
 23 |     name: 📋 Version Validation (Main Branch PRs)
 24 |     runs-on: ubuntu-latest
 25 |     # if: github.event_name == 'pull_request' && github.base_ref == 'main'
 26 |     steps:
 27 |       - uses: actions/checkout@v4
 28 |         with:
 29 |           fetch-depth: 0
 30 |           # ref: ${{ github.event.pull_request.head.sha }}
 31 | 
 32 |       - name: 🐍 Set up Python
 33 |         uses: actions/setup-python@v5
 34 |         with:
 35 |           python-version: "3.12"
 36 | 
 37 |       - name: 📦 Install Poetry if missing
 38 |         uses: snok/install-poetry@v1
 39 |         with:
 40 |           version: 'latest'
 41 |       
 42 |       - name: Get project versions (base and head)
 43 |         id: get_poetry_versions
 44 |         run: |
 45 |           HEAD_VERSION=$(poetry version --short)
 46 |           echo "head_version=$HEAD_VERSION" >> "$GITHUB_OUTPUT"
 47 |           echo "Current version: $HEAD_VERSION at $(git rev-parse --short HEAD)"
 48 | 
 49 |           git switch main
 50 |           BASE_VERSION=$(poetry version --short)
 51 |           echo "base_version=$BASE_VERSION" >> "$GITHUB_OUTPUT"
 52 |           echo "Base version: $BASE_VERSION at $(git rev-parse --short HEAD)"
 53 | 
 54 |           git switch - -d
 55 | 
 56 |       
 57 |       - name: ✅ Validate version bump in pyproject.toml
 58 |         run: |
 59 |           CURRENT_VERSION="${{ steps.get_poetry_versions.outputs.head_version }}"
 60 |           BASE_VERSION="${{ steps.get_poetry_versions.outputs.base_version }}"
 61 | 
 62 |           if [ "$CURRENT_VERSION" == "$BASE_VERSION" ]; then
 63 |             echo "❌ Version not updated! Please update version in pyproject.toml"
 64 |             echo "Current: $CURRENT_VERSION"
 65 |             echo "Base: $BASE_VERSION"
 66 |             exit 1
 67 |           fi
 68 |           
 69 |           if [ "$(printf '%s\n' "$BASE_VERSION" "$CURRENT_VERSION" | sort -rV | head -n 1)" != "$CURRENT_VERSION" ]; then
 70 |             echo "❌ Version should be higher than base version!"
 71 |             echo "Current: $CURRENT_VERSION"
 72 |             echo "Base: $BASE_VERSION"
 73 |             exit 1
 74 |           fi
 75 |           
 76 |           echo "✅ Version correctly updated: $BASE_VERSION → $CURRENT_VERSION"
 77 | 
 78 |       - name: 📝 Check for version in CHANGELOG.md
 79 |         run: |
 80 |           if [ ! -f CHANGELOG.md ]; then
 81 |             echo "❌ CHANGELOG.md not found! Please create it."
 82 |             exit 1
 83 |           fi
 84 |           VERSION="${{ steps.get_poetry_versions.outputs.head_version }}"
 85 |           if ! grep -q "$VERSION" CHANGELOG.md; then
 86 |             echo "at $(git rev-parse --short HEAD)"
 87 |             echo "❌ Version $VERSION not found in CHANGELOG.md"
 88 |             echo "Please add changelog entry for version $VERSION"
 89 |             exit 1
 90 |           fi
 91 |           echo "✅ Version $VERSION found in CHANGELOG.md"
 92 | 
 93 |       - name: 🩺 Check PyPi release version compatibility 
 94 |         run: |
 95 |           echo "Checking package: $PACKAGE_NAME"
 96 |           echo "Current version: $CURRENT_VERSION"
 97 | 
 98 |           PACKAGE_NAME="${{ env.PACKAGE_NAME }}"
 99 |           CURRENT_VERSION="${{ steps.get_poetry_versions.outputs.head_version }}"
100 | 
101 |           response=$(curl -s "https://pypi.org/pypi/$PACKAGE_NAME/json" || echo "{}")
102 | 
103 |           pypi_version=$(echo "$response" | jq --raw-output "select(.releases != null) | .releases | keys_unsorted | last // empty")
104 | 
105 |           if [ -z "$pypi_version" ] || [ "$pypi_version" = "null" ]; then
106 |             echo "Package not found on PyPI or no releases available."
107 |             pypi_version="0.0.0"
108 |           fi
109 | 
110 |           echo "Latest version on PyPI: $pypi_version"
111 |           echo "pypi_version=$pypi_version" >> "$GITHUB_OUTPUT"
112 | 
113 |           # Compare versions using sort -rV
114 |           if [ "$CURRENT_VERSION" = "$pypi_version" ]; then
115 |             echo "❌ Current version equals PyPI version ($CURRENT_VERSION)"
116 |             echo "is_newer=false" >> "$GITHUB_OUTPUT"
117 |             exit 1
118 |           elif [ "$(printf '%s\n' "$pypi_version" "$CURRENT_VERSION" | sort -rV | head -n 1)" = "$CURRENT_VERSION" ]; then
119 |             echo "✅ Current version ($CURRENT_VERSION) is newer than PyPI version ($pypi_version)"
120 |             echo "is_newer=true" >> "$GITHUB_OUTPUT"
121 |           else
122 |             echo "❌ Current version ($CURRENT_VERSION) is older than PyPI version ($pypi_version)"
123 |             echo "is_newer=false" >> "$GITHUB_OUTPUT"
124 |             exit 1
125 |           fi
126 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/application/mappers/link_header_text.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mapper for converting textual Boosty API post data chunks to domain text object.
  3 | 
  4 | If the API responses change, this mapper may need to be updated accordingly.
  5 | """
  6 | 
  7 | import json
  8 | 
  9 | from boosty_downloader.src.domain.post_data_chunks import PostDataChunkText
 10 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post_data_types import (
 11 |     BoostyPostDataHeaderDTO,
 12 |     BoostyPostDataLinkDTO,
 13 |     BoostyPostDataTextDTO,
 14 | )
 15 | 
 16 | 
 17 | def _parse_header(style_definition: str) -> int:
 18 |     r"""
 19 |     Parse header level (h1/h2/h3...) from the style definition.
 20 | 
 21 |     Style definition usually comes as a 2nd field in the "content" field of PostDataText.
 22 | 
 23 |     ```
 24 |     "content": "[\"Hello, world!\", \"unstyled\", <---- [[0, 0, 13]]"
 25 |     ```
 26 |     """
 27 |     # These values were reverse engineered from Boosty API responses.
 28 |     header_possible_values = {
 29 |         'unstyled': 0,
 30 |         'header-one': 1,
 31 |         'header-two': 2,
 32 |         'header-three': 3,
 33 |         'header-four': 4,
 34 |         'header-five': 5,
 35 |         'header-six': 6,
 36 |     }
 37 | 
 38 |     # by default (and in other cases) have no header
 39 |     return header_possible_values.get(style_definition, 0)
 40 | 
 41 | 
 42 | def _create_style_bitmap(
 43 |     text_length: int, style_array: list[list[int]]
 44 | ) -> list[set[int]]:
 45 |     """Create bitmap of styles for each character position."""
 46 |     bitmap: list[set[int]] = [set() for _ in range(text_length)]
 47 | 
 48 |     for style_desc in style_array:
 49 |         style_id, start_idx, end_idx = style_desc
 50 |         for i in range(start_idx, min(end_idx, text_length)):
 51 |             bitmap[i].add(style_id)
 52 | 
 53 |     return bitmap
 54 | 
 55 | 
 56 | def _create_text_fragments(
 57 |     text: str, style_bitmap: list[set[int]], header_level: int
 58 | ) -> list[PostDataChunkText.TextFragment]:
 59 |     """Create text fragments based on style bitmap."""
 60 |     if not text:
 61 |         return []
 62 | 
 63 |     fragments: list[PostDataChunkText.TextFragment] = []
 64 |     current_fragment_start = 0
 65 |     current_styles: set[int] = style_bitmap[0] if style_bitmap else set()
 66 | 
 67 |     for i in range(1, len(text)):
 68 |         if i >= len(style_bitmap) or style_bitmap[i] != current_styles:
 69 |             fragment_text = text[current_fragment_start:i]
 70 |             fragment = PostDataChunkText.TextFragment(fragment_text)
 71 |             fragment.header_level = header_level
 72 |             fragment.style = _convert_style_set_to_text_style(current_styles)
 73 |             fragments.append(fragment)
 74 | 
 75 |             current_fragment_start = i
 76 |             current_styles = style_bitmap[i] if i < len(style_bitmap) else set()
 77 | 
 78 |     # Add the last fragment
 79 |     fragment_text = text[current_fragment_start:]
 80 |     fragment = PostDataChunkText.TextFragment(fragment_text)
 81 |     fragment.header_level = header_level
 82 |     fragment.style = _convert_style_set_to_text_style(current_styles)
 83 |     fragments.append(fragment)
 84 | 
 85 |     return fragments
 86 | 
 87 | 
 88 | def _convert_style_set_to_text_style(
 89 |     style_set: set[int],
 90 | ) -> PostDataChunkText.TextFragment.TextStyle:
 91 |     """Convert set of style IDs to TextStyle object."""
 92 |     bold = 0
 93 |     italic = 2
 94 |     underline = 4
 95 | 
 96 |     text_style = PostDataChunkText.TextFragment.TextStyle()
 97 |     text_style.bold = bold in style_set
 98 |     text_style.italic = italic in style_set
 99 |     text_style.underline = underline in style_set
100 | 
101 |     return text_style
102 | 
103 | 
104 | def _parse_content_field(
105 |     content: str, modificator: str = ''
106 | ) -> list[PostDataChunkText.TextFragment]:
107 |     def _extract_content_field(content: str) -> tuple[str, str, list[list[int]]]:
108 |         r"""
109 |         Extract text, style info, and style array from the content field.
110 | 
111 |         Boosty API returns "content" as a JSON-encoded string like this:
112 |         "[\"Hello, world!\", \"unstyled\", [[0, 0, 13]]"
113 | 
114 |         The first part is just a text string, the other two parts are style information:
115 |         - you can read about them in the _parse_style_array and _parse_header functions above.
116 |         """
117 |         try:
118 |             parsed = json.loads(content)
119 |             text = parsed[0]
120 |             style_info = parsed[1]
121 |             style_array = parsed[2]
122 |         except json.JSONDecodeError:
123 |             return content, '', []
124 |         else:
125 |             return text, style_info, style_array
126 | 
127 |     text, style_info, styles_array = _extract_content_field(content)
128 | 
129 |     if modificator == 'BLOCK_END':
130 |         text += '\n'
131 | 
132 |     header_level = _parse_header(style_info)
133 |     style_bitmap = _create_style_bitmap(len(text), styles_array)
134 |     return _create_text_fragments(text, style_bitmap, header_level)
135 | 
136 | 
137 | def to_domain_text_chunk(
138 |     api_textual_dto: BoostyPostDataTextDTO
139 |     | BoostyPostDataHeaderDTO
140 |     | BoostyPostDataLinkDTO,
141 | ) -> list[PostDataChunkText.TextFragment]:
142 |     """
143 |     Convert API textual data chunks to domain text fragments.
144 | 
145 |     It uses the PostDataText, PostDataHeader, or PostDataLink DTOs
146 |     to extract the content and convert it to a list of domain text fragments.
147 |     """
148 |     modificator = getattr(api_textual_dto, 'modificator', '')
149 |     text_fragments = _parse_content_field(api_textual_dto.content, modificator)
150 | 
151 |     # Attach link information to the text fragments if any is present
152 |     if isinstance(api_textual_dto, BoostyPostDataLinkDTO):
153 |         for fragment in text_fragments:
154 |             fragment.link_url = api_textual_dto.url
155 | 
156 |     return text_fragments
157 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/html_reporter/html_reporter.py:
--------------------------------------------------------------------------------
  1 | """HTML Reporter for generating HTML documents"""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from dataclasses import dataclass
  6 | from typing import TYPE_CHECKING, TypedDict
  7 | 
  8 | from jinja2 import Template
  9 | 
 10 | if TYPE_CHECKING:
 11 |     from pathlib import Path
 12 | 
 13 | 
 14 | @dataclass
 15 | class NormalText:
 16 |     """Textual element, which can be added to the html document"""
 17 | 
 18 |     text: str
 19 | 
 20 | 
 21 | @dataclass
 22 | class HyperlinkText:
 23 |     """Hyperlink element, which can be added to the html document"""
 24 | 
 25 |     text: str
 26 |     url: str
 27 | 
 28 | 
 29 | class TextElement(TypedDict):
 30 |     """Text element, which can be added to the html document"""
 31 | 
 32 |     type: str
 33 |     content: str
 34 | 
 35 | 
 36 | class ImageElement(TypedDict):
 37 |     """Image element, which can be added to the html document"""
 38 | 
 39 |     type: str
 40 |     content: str
 41 |     width: int
 42 | 
 43 | 
 44 | class LinkElement(TypedDict):
 45 |     """Link element, which can be added to the html document"""
 46 | 
 47 |     type: str
 48 |     content: str
 49 |     url: str
 50 | 
 51 | 
 52 | class HTMLReport:
 53 |     """
 54 |     Representation of the document, which can be saved as an HTML file.
 55 | 
 56 |     You can add text/links/images to the document, they will be added one after another.
 57 |     """
 58 | 
 59 |     def __init__(self, filename: Path) -> None:
 60 |         self.filename = filename
 61 |         self.elements: list[TextElement | ImageElement | LinkElement] = []
 62 | 
 63 |     def _render_template(self) -> str:
 64 |         """Render the HTML document using Jinja2"""
 65 |         template = """
 66 |         <html>
 67 |         <head>
 68 |             <title>HTML Report</title>
 69 |             <style>
 70 |                 body {
 71 |                     font-family: 'Arial', sans-serif;
 72 |                     background-color: #f4f4f9;
 73 |                     color: #333;
 74 |                     line-height: 1.6;
 75 |                     margin: 0;
 76 |                     padding: 0;
 77 |                     max-width: 90%; /* Increased responsiveness */
 78 |                     width: 950px;
 79 |                     margin-left: auto;
 80 |                     margin-right: auto;
 81 |                 }
 82 |                 .content {
 83 |                     padding: 2rem; /* Changed to rem for better scaling */
 84 |                     background-color: #fff;
 85 |                     box-shadow: 0 0 12px rgba(0, 0, 0, 0.1);
 86 |                     margin-bottom: 3rem; /* Adjusted for better spacing */
 87 |                     margin-top: 1.5rem; /* Adjusted for consistency */
 88 |                     border-radius: 8px;
 89 |                     transition: all 0.3s ease;
 90 |                 }
 91 |                 p {
 92 |                     font-size: 1.2rem; /* Increased for better readability */
 93 |                     margin-bottom: 1.5rem; /* Adjusted for better spacing */
 94 |                 }
 95 |                 a {
 96 |                     color: #007bff;
 97 |                     text-decoration: none;
 98 |                     font-weight: bold;
 99 |                 }
100 |                 a:hover {
101 |                     text-decoration: underline;
102 |                 }
103 |                 img {
104 |                     display: block;
105 |                     max-width: 100%;
106 |                     height: auto;
107 |                     border-radius: 8px;
108 |                     margin: 0 auto;
109 |                     box-shadow: 0 2px 12px rgba(0, 0, 0, 0.05);
110 |                 }
111 |                 .new-paragraph {
112 |                     margin-top: 2rem; /* Adjusted for better spacing */
113 |                 }
114 |             </style>
115 |         </head>
116 |         <body>
117 |             <div class="content">
118 |                 {% for element in elements %}
119 |                     {% if element.type == 'text' %}
120 |                         <p>{{ element.content }}</p>
121 |                     {% elif element.type == 'image' %}
122 |                         <div style="text-align: center;">
123 |                             <img src="{{ element.content }}" width="100%">
124 |                         </div>
125 |                     {% elif element.type == 'link' %}
126 |                         <a href="{{ element.url }}" style="color:blue;">{{ element.content }}</a>
127 |                     {% endif %}
128 |                 {% endfor %}
129 |             </div>
130 |         </body>
131 |         </html>
132 |         """
133 |         jinja_template = Template(template)
134 |         return jinja_template.render(elements=self.elements)
135 | 
136 |     def new_paragraph(self) -> None:
137 |         """Add an empty line between elements"""
138 |         # Append a new paragraph using a proper TextElement type
139 |         self.elements.append(TextElement(type='text', content='<br>'))
140 | 
141 |     def add_text(self, text: NormalText) -> None:
142 |         """Add a text to the report right after the last added element"""
143 |         # Append text content using TextElement
144 |         self.elements.append(TextElement(type='text', content=text.text))
145 | 
146 |     def add_image(self, image_path: str, width: int = 600) -> None:
147 |         """
148 |         Add an image to the report right after the last added element
149 | 
150 |         - width 600 is usually enough for most HTML pages
151 |         """
152 |         # Append image content using ImageElement
153 |         self.elements.append(
154 |             ImageElement(type='image', content=image_path, width=width),
155 |         )
156 | 
157 |     def add_link(self, text: NormalText, url: str) -> None:
158 |         """Add a link to the report right after the last added element"""
159 |         # Append link content using LinkElement
160 |         self.elements.append(LinkElement(type='link', content=text.text, url=url))
161 | 
162 |     def save(self) -> None:
163 |         """Save the whole document to the file"""
164 |         html_content = self._render_template()
165 |         with self.filename.open('w', encoding='utf-8') as file:
166 |             file.write(html_content)
167 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/file_downloader.py:
--------------------------------------------------------------------------------
  1 | """Module to download files with reporting process mechanisms"""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import http
  6 | import mimetypes
  7 | from asyncio import CancelledError
  8 | from dataclasses import dataclass
  9 | from typing import TYPE_CHECKING
 10 | 
 11 | import aiofiles
 12 | from aiohttp import ClientConnectionError
 13 | 
 14 | from boosty_downloader.src.infrastructure.path_sanitizer import (
 15 |     sanitize_string,
 16 | )
 17 | 
 18 | if TYPE_CHECKING:
 19 |     from collections.abc import Callable
 20 |     from pathlib import Path
 21 | 
 22 |     from aiohttp_retry import RetryClient
 23 | 
 24 | 
 25 | @dataclass
 26 | class DownloadingStatus:
 27 |     """
 28 |     Model for status of the download.
 29 | 
 30 |     Can be used in status update callbacks.
 31 |     """
 32 | 
 33 |     name: str
 34 |     total_bytes: int | None
 35 |     total_downloaded_bytes: int
 36 |     downloaded_bytes: int = 0
 37 | 
 38 | 
 39 | @dataclass
 40 | class DownloadFileConfig:
 41 |     """General configuration for the file download"""
 42 | 
 43 |     session: RetryClient
 44 |     url: str
 45 | 
 46 |     filename: str
 47 |     destination: Path
 48 |     on_status_update: Callable[[DownloadingStatus], None] = lambda _: None
 49 | 
 50 |     guess_extension: bool = True
 51 |     chunk_size_bytes: int = 524288  # 512 KiB
 52 | 
 53 | 
 54 | class DownloadError(Exception):
 55 |     """Exception raised when the download failed for any reason"""
 56 | 
 57 |     message: str
 58 |     file: Path | None
 59 |     resource_url: str
 60 | 
 61 |     def __init__(self, message: str, file: Path | None, resource_url: str) -> None:
 62 |         super().__init__(message)
 63 |         self.file = file
 64 |         self.resource_url = resource_url
 65 | 
 66 | 
 67 | class DownloadCancelledError(DownloadError):
 68 |     """Exception raised when the download was cancelled by the user"""
 69 | 
 70 |     def __init__(self, resource_url: str, file: Path | None = None) -> None:
 71 |         super().__init__('Download cancelled by user', file, resource_url=resource_url)
 72 | 
 73 | 
 74 | class DownloadTimeoutError(DownloadError):
 75 |     """Exception raised when the download timed out"""
 76 | 
 77 |     def __init__(self, resource_url: str, file: Path | None = None) -> None:
 78 |         super().__init__(
 79 |             'Download timed out for the destination server',
 80 |             file,
 81 |             resource_url=resource_url,
 82 |         )
 83 | 
 84 | 
 85 | class DownloadConnectionError(DownloadError):
 86 |     """Exception raised when there was a connection error during the download"""
 87 | 
 88 |     def __init__(self, resource_url: str, file: Path | None = None) -> None:
 89 |         super().__init__(
 90 |             'Connection error during the download', file, resource_url=resource_url
 91 |         )
 92 | 
 93 | 
 94 | class DownloadIOFailureError(DownloadError):
 95 |     """Exception raised when there was an IOError during the download"""
 96 | 
 97 |     def __init__(self, resource_url: str, file: Path | None = None) -> None:
 98 |         super().__init__('Failed during I/O operation', file, resource_url=resource_url)
 99 | 
100 | 
101 | class DownloadUnexpectedStatusError(DownloadError):
102 |     """Exception raised when the server returned an unexpected status code"""
103 | 
104 |     status_code: int
105 |     response_message: str
106 | 
107 |     def __init__(self, status: int, response_message: str, resource_url: str) -> None:
108 |         super().__init__(
109 |             f'Unexpected status code: {status}', file=None, resource_url=resource_url
110 |         )
111 |         self.status_code = status
112 |         self.response_message = response_message
113 | 
114 | 
115 | async def download_file(
116 |     dl_config: DownloadFileConfig,
117 | ) -> Path:
118 |     """Download files and report the downloading process via callback"""
119 |     async with dl_config.session.get(dl_config.url) as response:
120 |         if response.status != http.HTTPStatus.OK:
121 |             raise DownloadUnexpectedStatusError(
122 |                 resource_url=dl_config.url,
123 |                 status=response.status,
124 |                 response_message=response.reason or 'No reason provided',
125 |             )
126 | 
127 |         filename = sanitize_string(dl_config.filename)
128 |         file_path = dl_config.destination / filename
129 | 
130 |         content_type = response.content_type
131 |         if content_type and dl_config.guess_extension:
132 |             ext = mimetypes.guess_extension(content_type)
133 |             if ext is not None:
134 |                 file_path = file_path.with_suffix(ext)
135 | 
136 |         total_downloaded = 0
137 | 
138 |         async with aiofiles.open(file_path, mode='wb') as file:
139 |             total_size = response.content_length
140 | 
141 |             try:
142 |                 async for chunk in response.content.iter_chunked(
143 |                     dl_config.chunk_size_bytes
144 |                 ):
145 |                     total_downloaded += len(chunk)
146 |                     dl_config.on_status_update(
147 |                         DownloadingStatus(
148 |                             name=filename,
149 |                             total_bytes=total_size,
150 |                             total_downloaded_bytes=total_downloaded,
151 |                             downloaded_bytes=len(chunk),
152 |                         ),
153 |                     )
154 |                     await file.write(chunk)
155 |             except (CancelledError, KeyboardInterrupt) as e:
156 |                 raise DownloadCancelledError(
157 |                     file=file_path, resource_url=dl_config.url
158 |                 ) from e
159 |             except DownloadTimeoutError as e:
160 |                 raise DownloadTimeoutError(
161 |                     file=file_path, resource_url=dl_config.url
162 |                 ) from e
163 |             except (ConnectionResetError, BrokenPipeError, ClientConnectionError) as e:
164 |                 raise DownloadConnectionError(
165 |                     file=file_path, resource_url=dl_config.url
166 |                 ) from e
167 |             except OSError as e:
168 |                 raise DownloadIOFailureError(
169 |                     file=file_path, resource_url=dl_config.url
170 |                 ) from e
171 | 
172 |         return file_path
173 | 


--------------------------------------------------------------------------------
/test/integration/fixtures.py:
--------------------------------------------------------------------------------
  1 | """Shared fixtures for Boosty API integration tests."""
  2 | 
  3 | import logging
  4 | from collections.abc import AsyncGenerator
  5 | 
  6 | import pytest
  7 | import pytest_asyncio
  8 | from aiohttp import ClientSession, CookieJar
  9 | from aiohttp.typedefs import LooseHeaders
 10 | from aiohttp_retry import ExponentialRetry, RetryClient
 11 | from pydantic import ValidationError
 12 | 
 13 | from boosty_downloader.src.infrastructure.boosty_api.core.client import BoostyAPIClient
 14 | from boosty_downloader.src.infrastructure.boosty_api.utils.auth_parsers import (
 15 |     parse_session_cookie,
 16 | )
 17 | from integration.configuration import IntegrationTestConfig
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | # ------------------------------------------------------------------------------
 22 | # Utilities for further fixtures
 23 | 
 24 | 
 25 | @pytest.fixture(scope='session')
 26 | def integration_config() -> IntegrationTestConfig:
 27 |     """
 28 |     Provides configuration for integration tests.
 29 | 
 30 |     It loads the configuration from the environment or a configuration file.
 31 |     If the configuration is invalid, it logs the errors and skips the tests.
 32 |     """
 33 |     try:
 34 |         return IntegrationTestConfig()  # pyright: ignore[reportCallIssue] : will be loaded automatically by pydantic_settings
 35 | 
 36 |     except ValidationError as e:
 37 |         logger.exception('❌ Failed to load integration test config:')
 38 |         for err in e.errors():
 39 |             loc = '.'.join(map(str, err['loc']))
 40 |             msg = err['msg']
 41 |             logger.exception(f'  - {loc}: {msg}')
 42 |         pytest.skip('Integration tests require valid configuration')
 43 | 
 44 | 
 45 | @pytest.fixture
 46 | def boosty_headers(integration_config: IntegrationTestConfig) -> LooseHeaders:
 47 |     """Returns headers with authorization token for Boosty API requests."""
 48 |     return {
 49 |         'Authorization': integration_config.boosty_auth_token,
 50 |         'Content-Type': 'application/json',
 51 |     }
 52 | 
 53 | 
 54 | @pytest_asyncio.fixture
 55 | async def boosty_cookies_jar_async(
 56 |     integration_config: IntegrationTestConfig,
 57 | ) -> CookieJar:
 58 |     # This avoids 'no running event loop' error by ensuring the jar is created in an async context
 59 |     return parse_session_cookie(integration_config.boosty_cookies)
 60 | 
 61 | 
 62 | # ------------------------------------------------------------------------------
 63 | # Different session setups
 64 | 
 65 | 
 66 | @pytest_asyncio.fixture
 67 | async def authorized_http_session(
 68 |     boosty_headers: LooseHeaders,
 69 |     boosty_cookies_jar_async: CookieJar,
 70 | ) -> AsyncGenerator[ClientSession, None]:
 71 |     """Creates an HTTP session for making requests."""
 72 |     session = ClientSession(
 73 |         headers=boosty_headers,
 74 |         cookie_jar=boosty_cookies_jar_async,
 75 |     )
 76 |     yield session
 77 |     await session.close()
 78 | 
 79 | 
 80 | @pytest_asyncio.fixture
 81 | async def unauthorized_http_session() -> AsyncGenerator[ClientSession, None]:
 82 |     """Creates an HTTP session without authorization headers."""
 83 |     session = ClientSession()
 84 |     yield session
 85 |     await session.close()
 86 | 
 87 | 
 88 | @pytest_asyncio.fixture
 89 | async def invalid_auth_http_session() -> AsyncGenerator[ClientSession, None]:
 90 |     session = ClientSession(
 91 |         headers={
 92 |             'Authorization': 'Bearer '
 93 |             + 'a' * 64,  # Looks valid (64 hex chars), but not actually valid
 94 |         },
 95 |     )
 96 |     yield session
 97 |     await session.close()
 98 | 
 99 | 
100 | # ------------------------------------------------------------------------------
101 | # Clients for Boosty API
102 | 
103 | 
104 | @pytest_asyncio.fixture
105 | async def authorized_retry_client(
106 |     authorized_http_session: ClientSession,
107 | ) -> AsyncGenerator[RetryClient, None]:
108 |     """Creates a retry client for handling transient failures."""
109 |     retry_options = ExponentialRetry(attempts=3, start_timeout=1.0)
110 |     client = RetryClient(
111 |         client_session=authorized_http_session,
112 |         retry_options=retry_options,
113 |     )
114 |     yield client
115 |     await client.close()
116 | 
117 | 
118 | @pytest_asyncio.fixture
119 | async def unauthorized_retry_client(
120 |     unauthorized_http_session: ClientSession,
121 | ) -> AsyncGenerator[RetryClient, None]:
122 |     """Creates a retry client without authentication for testing unauthorized scenarios."""
123 |     retry_options = ExponentialRetry(attempts=3, start_timeout=1.0)
124 |     client = RetryClient(
125 |         client_session=unauthorized_http_session,
126 |         retry_options=retry_options,
127 |     )
128 |     yield client
129 |     await client.close()
130 | 
131 | 
132 | @pytest_asyncio.fixture
133 | async def invalid_auth_retry_client(
134 |     invalid_auth_http_session: ClientSession,
135 | ) -> AsyncGenerator[RetryClient, None]:
136 |     """Creates a retry client with invalid authentication for testing error handling."""
137 |     retry_options = ExponentialRetry(attempts=3, start_timeout=1.0)
138 |     client = RetryClient(
139 |         client_session=invalid_auth_http_session,
140 |         retry_options=retry_options,
141 |     )
142 |     yield client
143 |     await client.close()
144 | 
145 | 
146 | # ------------------------------------------------------------------------------
147 | # Clients for Boosty API
148 | 
149 | 
150 | @pytest_asyncio.fixture
151 | async def authorized_boosty_client(
152 |     authorized_retry_client: RetryClient,
153 | ) -> BoostyAPIClient:
154 |     """Creates a Boosty API client configured with authentication."""
155 |     return BoostyAPIClient(session=authorized_retry_client)
156 | 
157 | 
158 | @pytest_asyncio.fixture
159 | async def unauthorized_boosty_client(
160 |     unauthorized_retry_client: RetryClient,
161 | ) -> BoostyAPIClient:
162 |     """Creates a Boosty API client without authentication for testing unauthorized scenarios."""
163 |     return BoostyAPIClient(session=unauthorized_retry_client, request_delay_seconds=1)
164 | 
165 | 
166 | @pytest_asyncio.fixture
167 | async def invalid_auth_boosty_client(
168 |     invalid_auth_retry_client: RetryClient,
169 | ) -> BoostyAPIClient:
170 |     """Creates a Boosty API client with invalid authentication for testing error handling."""
171 |     return BoostyAPIClient(session=invalid_auth_retry_client, request_delay_seconds=1)
172 | 


--------------------------------------------------------------------------------
/test/unit/html_generator/html_templates_test.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | from boosty_downloader.src.infrastructure.html_generator.models import (
  4 |     HtmlGenChunk,
  5 |     HtmlGenImage,
  6 |     HtmlGenList,
  7 |     HtmlGenText,
  8 |     HtmlGenVideo,
  9 |     HtmlListItem,
 10 |     HtmlTextFragment,
 11 |     HtmlTextStyle,
 12 | )
 13 | from boosty_downloader.src.infrastructure.html_generator.renderer import (
 14 |     render_html,
 15 |     render_html_to_file,
 16 | )
 17 | 
 18 | 
 19 | def test_html_generator_templates():
 20 |     chunks: list[HtmlGenChunk] = [
 21 |         HtmlGenText(
 22 |             text_fragments=[
 23 |                 HtmlTextFragment(text='Welcome to my Boosty!', header_level=1),
 24 |                 HtmlTextFragment(
 25 |                     text='This post includes various elements: text, media, and lists.',
 26 |                 ),
 27 |                 HtmlTextFragment(text='<NEW_LINE_SYMBOL>'),
 28 |                 HtmlTextFragment(
 29 |                     text="Let's dive in below:",
 30 |                     style=HtmlTextStyle(italic=True),
 31 |                 ),
 32 |             ]
 33 |         ),
 34 |         HtmlGenText(
 35 |             text_fragments=[
 36 |                 HtmlTextFragment(text='Highlights', header_level=2),
 37 |                 HtmlTextFragment(
 38 |                     text='This paragraph contains a mix of ',
 39 |                 ),
 40 |                 HtmlTextFragment(
 41 |                     text='bold',
 42 |                     style=HtmlTextStyle(bold=True),
 43 |                 ),
 44 |                 HtmlTextFragment(text=', '),
 45 |                 HtmlTextFragment(
 46 |                     text='italic',
 47 |                     style=HtmlTextStyle(italic=True),
 48 |                 ),
 49 |                 HtmlTextFragment(text=', and '),
 50 |                 HtmlTextFragment(
 51 |                     text='underlined',
 52 |                     style=HtmlTextStyle(underline=True),
 53 |                 ),
 54 |                 HtmlTextFragment(text=' text. You can '),
 55 |                 HtmlTextFragment(
 56 |                     text='click here',
 57 |                     link_url='https://boosty.to/example',
 58 |                     style=HtmlTextStyle(underline=True),
 59 |                 ),
 60 |                 HtmlTextFragment(text=' to support me.'),
 61 |             ]
 62 |         ),
 63 |         HtmlGenList(
 64 |             items=[
 65 |                 HtmlListItem(
 66 |                     data=[
 67 |                         HtmlGenText(
 68 |                             text_fragments=[
 69 |                                 HtmlTextFragment(text="📌 What you'll get inside:")
 70 |                             ]
 71 |                         )
 72 |                     ],
 73 |                     nested_items=[
 74 |                         HtmlListItem(
 75 |                             data=[
 76 |                                 HtmlGenText(
 77 |                                     text_fragments=[
 78 |                                         HtmlTextFragment(text='High-quality images')
 79 |                                     ]
 80 |                                 )
 81 |                             ],
 82 |                             nested_items=[],
 83 |                         ),
 84 |                         HtmlListItem(
 85 |                             data=[
 86 |                                 HtmlGenText(
 87 |                                     text_fragments=[
 88 |                                         HtmlTextFragment(text='Source files (PSD, RAW)')
 89 |                                     ]
 90 |                                 )
 91 |                             ],
 92 |                             nested_items=[],
 93 |                         ),
 94 |                         HtmlListItem(
 95 |                             data=[
 96 |                                 HtmlGenText(
 97 |                                     text_fragments=[
 98 |                                         HtmlTextFragment(text='Bonus video content')
 99 |                                     ]
100 |                                 )
101 |                             ],
102 |                             nested_items=[
103 |                                 HtmlListItem(
104 |                                     data=[
105 |                                         HtmlGenText(
106 |                                             text_fragments=[
107 |                                                 HtmlTextFragment(
108 |                                                     text='Behind the scenes'
109 |                                                 )
110 |                                             ]
111 |                                         )
112 |                                     ],
113 |                                     nested_items=[],
114 |                                 ),
115 |                                 HtmlListItem(
116 |                                     data=[
117 |                                         HtmlGenText(
118 |                                             text_fragments=[
119 |                                                 HtmlTextFragment(
120 |                                                     text='Unreleased footage'
121 |                                                 )
122 |                                             ]
123 |                                         )
124 |                                     ],
125 |                                     nested_items=[],
126 |                                 ),
127 |                             ],
128 |                         ),
129 |                     ],
130 |                 )
131 |             ]
132 |         ),
133 |         HtmlGenImage(url='https://example.com/banner.jpg'),
134 |         HtmlGenVideo(
135 |             title='Exclusive Behind the Scenes',
136 |             url='https://example.com/video.mp4',
137 |         ),
138 |         HtmlGenVideo(url='https://www.youtube.com/watch?v=dQw4w9WgXcQ'),
139 |         HtmlGenText(
140 |             text_fragments=[
141 |                 HtmlTextFragment(text='<NEW_LINE_SYMBOL>'),
142 |                 HtmlTextFragment(text='Thanks for reading!', header_level=2),
143 |                 HtmlTextFragment(
144 |                     text='Feel free to leave a comment or suggestion below.',
145 |                 ),
146 |             ]
147 |         ),
148 |     ]
149 | 
150 |     data = render_html(chunks)
151 | 
152 |     test_output_file = Path('test_output.html')
153 | 
154 |     render_html_to_file(chunks, test_output_file)
155 | 
156 |     assert test_output_file.exists()
157 |     assert test_output_file.read_text(encoding='utf-8') == data
158 |     assert len(data) > 0
159 | 
160 |     test_output_file.unlink(missing_ok=True)
161 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
  1 | name: release
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - main
  7 |   workflow_dispatch:
  8 | 
  9 | env:
 10 |   PACKAGE_NAME: "boosty-downloader"
 11 |   OWNER: "Glitchy-Sheep"
 12 | 
 13 | jobs:
 14 |   extract_base_project_version:
 15 |     name: "📋 Extract pyproject.toml version from main"
 16 |     runs-on: ubuntu-latest
 17 |     outputs:
 18 |       version: ${{ steps.extract_version.outputs.version }}
 19 |     steps:
 20 |       # Ensure that we are on the main branch to get latest stable version
 21 |       - uses: actions/checkout@v4
 22 |         with:
 23 |           fetch-depth: 0
 24 |           ref: main
 25 | 
 26 |       - name: Set up Python
 27 |         uses: actions/setup-python@v5
 28 |         with:
 29 |           python-version: "3.12"
 30 | 
 31 |       - name: Install Poetry
 32 |         uses: snok/install-poetry@v1
 33 | 
 34 |       - name: Extract base version
 35 |         id: extract_version
 36 |         run: |
 37 |           VERSION=$(poetry version --short)
 38 |           echo "version=$VERSION" >> "$GITHUB_OUTPUT"
 39 | 
 40 |   extract_pushed_version:
 41 |     name: "📦 Extract pushed pyproject.toml version"
 42 |     runs-on: ubuntu-latest
 43 |     outputs:
 44 |       version: ${{ steps.extract_version.outputs.version }}
 45 |     steps:
 46 |       - uses: actions/checkout@v4
 47 |         with:
 48 |           fetch-depth: 0  # чтобы poetry могла читать pyproject.toml в любом случае
 49 | 
 50 |       - name: Set up Python
 51 |         uses: actions/setup-python@v5
 52 |         with:
 53 |           python-version: "3.12"
 54 | 
 55 |       - name: Install Poetry
 56 |         uses: snok/install-poetry@v1
 57 | 
 58 |       - name: Extract pushed version
 59 |         id: extract_version
 60 |         run: |
 61 |           VERSION=$(poetry version --short)
 62 |           echo "version=$VERSION" >> "$GITHUB_OUTPUT"
 63 | 
 64 | 
 65 | 
 66 |   # Check if new version is greater than the latest version on PyPI
 67 |   check_pypi:
 68 |     name: "🔍 Validate version against PyPI"
 69 |     needs: extract_pushed_version 
 70 |     runs-on: ubuntu-latest
 71 |     steps:
 72 |       - uses: actions/checkout@v4
 73 |       - name: 🩺 Check PyPi release version compatibility 
 74 |         run: |
 75 |           echo "Checking package: $PACKAGE_NAME"
 76 |           echo "Current version: $CURRENT_VERSION"
 77 | 
 78 |           PACKAGE_NAME="${{ env.PACKAGE_NAME }}"
 79 |           CURRENT_VERSION="${{ needs.extract_pushed_version.outputs.version }}"
 80 | 
 81 |           response=$(curl -s "https://pypi.org/pypi/$PACKAGE_NAME/json" || echo "{}")
 82 | 
 83 |           pypi_version=$(echo "$response" | jq --raw-output "select(.releases != null) | .releases | keys_unsorted | last // empty")
 84 | 
 85 |           if [ -z "$pypi_version" ] || [ "$pypi_version" = "null" ]; then
 86 |             echo "Package not found on PyPI or no releases available."
 87 |             pypi_version="0.0.0"
 88 |           fi
 89 | 
 90 |           echo "Latest version on PyPI: $pypi_version"
 91 |           echo "pypi_version=$pypi_version" >> "$GITHUB_OUTPUT"
 92 | 
 93 |           # Compare versions using sort -rV
 94 |           if [ "$CURRENT_VERSION" = "$pypi_version" ]; then
 95 |             echo "❌ Current version equals PyPI version ($CURRENT_VERSION)"
 96 |             echo "is_newer=false" >> "$GITHUB_OUTPUT"
 97 |             exit 1
 98 |           elif [ "$(printf '%s\n' "$pypi_version" "$CURRENT_VERSION" | sort -rV | head -n 1)" = "$CURRENT_VERSION" ]; then
 99 |             echo "✅ Current version ($CURRENT_VERSION) is newer than PyPI version ($pypi_version)"
100 |             echo "is_newer=true" >> "$GITHUB_OUTPUT"
101 |           else
102 |             echo "❌ Current version ($CURRENT_VERSION) is older than PyPI version ($pypi_version)"
103 |             echo "is_newer=false" >> "$GITHUB_OUTPUT"
104 |             exit 1
105 |           fi
106 | 
107 | 
108 |   # Install dependencies, run tests, and build distribution packages
109 |   setup_and_build:
110 |     name: "🔨 Build distribution packages"
111 |     needs: [extract_pushed_version, extract_base_project_version, check_pypi]
112 |     runs-on: ubuntu-latest
113 |     steps:
114 |       # Checkout the code on MAIN to get current latest stable version
115 |       - uses: actions/checkout@v4
116 |         with:
117 |           fetch-depth: 0
118 | 
119 |       - name: Set up Python
120 |         uses: actions/setup-python@v5
121 |         with:
122 |           python-version: "3.12"
123 | 
124 |       - name: Install Poetry
125 |         uses: snok/install-poetry@v1
126 | 
127 |       - name: Install dependencies
128 |         run: make deps
129 |         timeout-minutes: 10
130 | 
131 |       - name: Build source and wheel distribution
132 |         run: make build 
133 | 
134 |       - name: Upload artifacts
135 |         uses: actions/upload-artifact@v4
136 |         with:
137 |           name: dist
138 |           path: dist/
139 | 
140 |   create_tag:
141 |     name: "🏷️ Create release tag"
142 |     needs: [extract_pushed_version, setup_and_build]
143 |     runs-on: ubuntu-latest
144 |     permissions:
145 |       contents: write
146 |     steps:
147 |       - uses: actions/checkout@v4
148 | 
149 |       - name: Create and push tag
150 |         run: |
151 |           VERSION=${{ needs.extract_pushed_version.outputs.version }}
152 |           git config user.name "github-actions[bot]"
153 |           git config user.email "github-actions[bot]@users.noreply.github.com"
154 |           git tag -a "v$VERSION" -m "Release v$VERSION"
155 |           git push origin "v$VERSION"
156 | 
157 | 
158 |   pypi_publish:
159 |     name: "📦 Upload release to PyPI"
160 |     needs: [setup_and_build, extract_pushed_version, create_tag]
161 |     runs-on: ubuntu-latest
162 |     environment:
163 |       name: release
164 |     permissions:
165 |       id-token: write
166 |     steps:
167 |       - name: Download artifacts
168 |         uses: actions/download-artifact@v5
169 |         with:
170 |           name: dist
171 |           path: dist/
172 | 
173 |       - name: Publish distribution to PyPI
174 |         uses: pypa/gh-action-pypi-publish@release/v1
175 | 
176 | 
177 |   github_release:
178 |     name: "🚀 Create GitHub Release"
179 |     needs: [setup_and_build, extract_pushed_version, create_tag]
180 |     runs-on: ubuntu-latest
181 |     permissions:
182 |       contents: write
183 |     steps:
184 |       - name: Checkout Code
185 |         uses: actions/checkout@v4
186 |         with:
187 |           fetch-depth: 0
188 | 
189 |       - name: Download artifacts
190 |         uses: actions/download-artifact@v5
191 |         with:
192 |           name: dist
193 |           path: dist/
194 | 
195 |       - name: Create GitHub Release
196 |         env:
197 |           GH_TOKEN: ${{ github.token }}
198 |         run: |
199 |           VERSION=${{ needs.extract_pushed_version.outputs.version }}
200 |           gh release create "v$VERSION" dist/* --title "v$VERSION" --generate-notes
201 | 
202 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/boosty_api/core/client.py:
--------------------------------------------------------------------------------
  1 | """Boosty API client for accessing content."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from http import HTTPStatus
  6 | from typing import TYPE_CHECKING
  7 | 
  8 | from aiolimiter import AsyncLimiter
  9 | from pydantic import ValidationError
 10 | from yarl import URL
 11 | 
 12 | from boosty_downloader.src.infrastructure.boosty_api.core.endpoints import (
 13 |     BOOSTY_DEFAULT_BASE_URL,
 14 | )
 15 | from boosty_downloader.src.infrastructure.boosty_api.models.post.extra import Extra
 16 | from boosty_downloader.src.infrastructure.boosty_api.models.post.post import PostDTO
 17 | from boosty_downloader.src.infrastructure.boosty_api.models.post.posts_request import (
 18 |     PostsResponse,
 19 | )
 20 | from boosty_downloader.src.infrastructure.boosty_api.utils.filter_none_params import (
 21 |     filter_none_params,
 22 | )
 23 | 
 24 | if TYPE_CHECKING:
 25 |     from collections.abc import AsyncGenerator, Mapping
 26 | 
 27 |     from aiohttp import ClientResponse
 28 |     from aiohttp_retry import RetryClient
 29 |     from pydantic_core import ErrorDetails
 30 | 
 31 | 
 32 | class BoostyAPIError(Exception):
 33 |     """Base class for all Boosty API related errors."""
 34 | 
 35 | 
 36 | class BoostyAPINoUsernameError(BoostyAPIError):
 37 |     """Raised when no username is specified."""
 38 | 
 39 |     username: str
 40 | 
 41 |     def __init__(self, username: str) -> None:
 42 |         super().__init__(f'Username not found: {username}')
 43 |         self.username = username
 44 | 
 45 | 
 46 | class BoostyAPIUnauthorizedError(BoostyAPIError):
 47 |     """Raised when authorization error occurs, e.g when credentials is invalid."""
 48 | 
 49 | 
 50 | class BoostyAPIUnknownError(BoostyAPIError):
 51 |     """Raised when Boosty returns unexpected error."""
 52 | 
 53 |     details: str
 54 | 
 55 |     def __init__(self, status_code: int, details: str) -> None:
 56 |         super().__init__(f'Boosty returned unknown error[{status_code}]: {details}')
 57 |         self.details = details
 58 | 
 59 | 
 60 | class BoostyAPIValidationError(BoostyAPIError):
 61 |     """
 62 |     Raised when validation error occurs, e.g. when response data is invalid.
 63 | 
 64 |     It can happen if the API response structure changes.
 65 |     In that case the client should be updated to match the new structure.
 66 |     """
 67 | 
 68 |     errors: list[ErrorDetails]
 69 | 
 70 |     def __init__(self, errors: list[ErrorDetails]) -> None:
 71 |         super().__init__('Boosty API response validation error')
 72 |         self.errors = errors
 73 | 
 74 | 
 75 | def _create_limiter(request_delay_seconds: float) -> AsyncLimiter | None:
 76 |     # aiolimiter expects max_rate and time_period to be positive.
 77 |     # For delays <1s, we use a 1-second window and scale the rate to avoid exceptions and ensure correct throttling.
 78 |     # For delays >=1s, we allow 1 request per delay period, matching the intended throttle.
 79 |     # Without this logic, certain values (e.g. delay=0.5) would cause aiolimiter to raise or throttle incorrectly.
 80 |     if request_delay_seconds > 0:
 81 |         if request_delay_seconds < 1:
 82 |             max_rate = 1 / request_delay_seconds
 83 |             time_period = 1
 84 |         else:
 85 |             max_rate = 1
 86 |             time_period = request_delay_seconds
 87 |         return AsyncLimiter(max_rate=max_rate, time_period=time_period)
 88 |     return None
 89 | 
 90 | 
 91 | class BoostyAPIClient:
 92 |     """
 93 |     Main client class for the Boosty API.
 94 | 
 95 |     The session you provide to this class MUST NOT CONTAIN BASE URL.
 96 |     It should only contain headers and cookies. Base url is set internally.
 97 | 
 98 |     It handles the connection and makes requests to the API.
 99 |     To work with private/paid posts you need to provide valid authentication token and cookies in the session.
100 |     """
101 | 
102 |     def __init__(
103 |         self,
104 |         session: RetryClient,
105 |         request_delay_seconds: float = 0.0,
106 |         base_url: URL | None = None,
107 |     ) -> None:
108 |         self._base_url = base_url or BOOSTY_DEFAULT_BASE_URL
109 |         self.session = session
110 |         self._limiter = _create_limiter(request_delay_seconds)
111 | 
112 |     async def _throttled_get(
113 |         self,
114 |         endpoint: str,
115 |         params: Mapping[str, str] | None = None,
116 |         headers: Mapping[str, str] | None = None,
117 |     ) -> ClientResponse:
118 |         url = URL(self._base_url) / endpoint.lstrip('/')
119 | 
120 |         if self._limiter:
121 |             async with self._limiter:
122 |                 return await self.session.get(url, params=params, headers=headers)
123 |         return await self.session.get(url, params=params, headers=headers)
124 | 
125 |     async def get_author_posts(
126 |         self,
127 |         author_name: str,
128 |         limit: int,
129 |         offset: str | None = None,
130 |     ) -> PostsResponse:
131 |         """
132 |         Request to get posts from the specified author.
133 | 
134 |         The request supports pagination, so the response contains meta info.
135 |         If you want to get all posts, you need to repeat the request with the offset of previous response
136 |         until the 'is_last' field becomes True.
137 |         """
138 |         endpoint = f'blog/{author_name}/post/'
139 | 
140 |         posts_raw = await self._throttled_get(
141 |             endpoint,
142 |             params=filter_none_params(
143 |                 {
144 |                     'offset': offset,
145 |                     'limit': limit,
146 |                 },
147 |             ),
148 |         )
149 |         posts_data = await posts_raw.json()
150 | 
151 |         if posts_raw.status == HTTPStatus.NOT_FOUND:
152 |             raise BoostyAPINoUsernameError(author_name)
153 | 
154 |         # This will be returned if the user has creds but they're invalid/expired
155 |         if posts_raw.status == HTTPStatus.UNAUTHORIZED:
156 |             raise BoostyAPIUnauthorizedError
157 | 
158 |         if posts_raw.status != HTTPStatus.OK:
159 |             raise BoostyAPIUnknownError(
160 |                 posts_raw.status, f'Unexpected status code: {posts_raw.status}'
161 |             )
162 | 
163 |         try:
164 |             posts: list[PostDTO] = [
165 |                 PostDTO.model_validate(post) for post in posts_data['data']
166 |             ]
167 |             extra: Extra = Extra.model_validate(posts_data['extra'])
168 |         except ValidationError as e:
169 |             raise BoostyAPIValidationError(errors=e.errors()) from e
170 | 
171 |         return PostsResponse(
172 |             posts=posts,
173 |             extra=extra,
174 |         )
175 | 
176 |     async def iterate_over_posts(
177 |         self,
178 |         author_name: str,
179 |         posts_per_page: int = 5,
180 |     ) -> AsyncGenerator[PostsResponse, None]:
181 |         """
182 |         Infinite generator iterating over posts of the specified author.
183 | 
184 |         The generator will yield all posts of the author, paginating internally.
185 |         """
186 |         offset = None
187 |         while True:
188 |             response = await self.get_author_posts(
189 |                 author_name,
190 |                 offset=offset,
191 |                 limit=posts_per_page,
192 |             )
193 |             yield response
194 |             if response.extra.is_last:
195 |                 break
196 |             offset = response.extra.offset
197 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/external_videos_downloader/external_videos_downloader.py:
--------------------------------------------------------------------------------
  1 | """Manager for downloading external videos (e.g., YouTube, Vimeo) with progress reporting."""
  2 | # ruff: noqa: I001
  3 | 
  4 | from __future__ import annotations
  5 | 
  6 | import contextlib
  7 | from collections.abc import Callable
  8 | from dataclasses import dataclass
  9 | from pathlib import Path
 10 | from typing import Any, ClassVar, cast
 11 | 
 12 | from yt_dlp.YoutubeDL import YoutubeDL
 13 | from yt_dlp.utils import DownloadError
 14 | 
 15 | YtDlOptions = dict[str, object]
 16 | ExternalVideoDownloadProgressHook = Callable[['ExternalVideoDownloadStatus'], None]
 17 | 
 18 | 
 19 | class ExtVideoError(Exception):
 20 |     """Base class for external video download errors."""
 21 | 
 22 | 
 23 | class ExtVideoInfoError(ExtVideoError):
 24 |     """Raised when video information (e.g., title) cannot be extracted."""
 25 | 
 26 |     def __init__(self, url: str) -> None:
 27 |         self.video_url = url
 28 | 
 29 | 
 30 | class ExtVideoDownloadError(ExtVideoError):
 31 |     """Raised when the video download fails."""
 32 | 
 33 |     def __init__(self, url: str) -> None:
 34 |         self.video_url = url
 35 | 
 36 | 
 37 | class ExtVideoInterruptedByUserError(ExtVideoError):
 38 |     """Raised when the user interrupts the download (Ctrl+C)."""
 39 | 
 40 | 
 41 | @dataclass(slots=True)
 42 | class ExternalVideoDownloadStatus:
 43 |     """Status payload for reporting external video download progress."""
 44 | 
 45 |     name: str
 46 |     total_bytes: int | None
 47 |     downloaded_bytes: int | None
 48 |     speed: float | None
 49 |     percentage: float
 50 |     delta_bytes: int
 51 | 
 52 | 
 53 | @dataclass(slots=True)
 54 | class _HookState:
 55 |     """Internal state holder for tracking the status of an external video download."""
 56 | 
 57 |     last_downloaded: int = 0
 58 |     final_filename: Path | None = None
 59 | 
 60 | 
 61 | class _SilentLogger:
 62 |     """
 63 |     Silly hack for yt-dlp to supress any noisy logging output.
 64 | 
 65 |     For logging use ExternalVideoDownloadStatus with progress callback.
 66 |     And for errors use the downloader exceptions.
 67 |     """
 68 | 
 69 |     def debug(self, msg: str) -> None:
 70 |         pass
 71 | 
 72 |     def info(self, msg: str) -> None:
 73 |         pass
 74 | 
 75 |     def warning(self, msg: str) -> None:
 76 |         pass
 77 | 
 78 |     def error(self, msg: str) -> None:
 79 |         pass
 80 | 
 81 |     def critical(self, msg: str) -> None:
 82 |         pass
 83 | 
 84 | 
 85 | class ExternalVideosDownloader:
 86 |     """Manager for downloading external videos (YouTube, Vimeo) with a 720p preference."""
 87 | 
 88 |     # Prefer 720p when available, otherwise choose the best >720
 89 |     _default_ydl_options: ClassVar[YtDlOptions] = {
 90 |         'format': 'bv*[height=720]+ba/bv*[height>720]+ba/bv*+ba/b',
 91 |         'quiet': True,
 92 |         'no_warnings': True,
 93 |         'no_color': True,
 94 |         'noprogress': True,  # Use progress hook instead
 95 |         'logger': _SilentLogger(),  # Suppress noisy error logging
 96 |     }
 97 | 
 98 |     def download_video(
 99 |         self,
100 |         url: str,
101 |         destination_directory: Path,
102 |         progress_hook: ExternalVideoDownloadProgressHook | None = None,
103 |     ) -> Path:
104 |         """Download video using yt-dlp and repeatedly report progress via progress_hook callback until completion."""
105 |         info = self._probe_video(url)
106 |         title = info.get('title')
107 |         if not isinstance(title, str) or not title.strip():
108 |             raise ExtVideoInfoError(url)
109 | 
110 |         clean_title = self._sanitize_title(title)
111 |         destination_directory.mkdir(parents=True, exist_ok=True)
112 | 
113 |         outtmpl = self._build_outtmpl(destination_directory, clean_title)
114 | 
115 |         state = _HookState()
116 |         internal_hook = self._make_progress_hook(outtmpl, progress_hook, state)
117 | 
118 |         options: YtDlOptions = self._default_ydl_options.copy()
119 |         options['outtmpl'] = outtmpl
120 |         options['progress_hooks'] = [internal_hook]
121 | 
122 |         try:
123 |             with YoutubeDL(params=options) as ydl:
124 |                 try:
125 |                     # yt-dlp isn't typed; cast to Any and coerce to int
126 |                     errors: int = int(cast('Any', ydl).download([url]))
127 |                 except KeyboardInterrupt as e:
128 |                     raise ExtVideoInterruptedByUserError from e
129 | 
130 |             if errors != 0:
131 |                 raise ExtVideoDownloadError(url)
132 | 
133 |         except DownloadError as e:
134 |             raise ExtVideoError(url) from e
135 | 
136 |         if state.final_filename is not None:
137 |             return state.final_filename
138 | 
139 |         ext = info.get('ext')
140 |         guessed_ext = ext if isinstance(ext, str) and ext else 'mp4'
141 |         return destination_directory / f'{clean_title}.{guessed_ext}'
142 | 
143 |     def _probe_video(self, url: str) -> dict[str, Any]:
144 |         # Extract metadata without downloading to validate and fetch title/ext.
145 |         try:
146 |             with YoutubeDL({**self._default_ydl_options, 'skip_download': True}) as ydl:
147 |                 raw = cast('Any', ydl).extract_info(url, download=False)
148 |         except DownloadError as e:
149 |             raise ExtVideoInfoError(url) from e
150 | 
151 |         if not isinstance(raw, dict):
152 |             raise ExtVideoInfoError(url)
153 |         return cast('dict[str, Any]', raw)
154 | 
155 |     @staticmethod
156 |     def _sanitize_title(text: str) -> str:
157 |         # Cross-platform safe subset.
158 |         return ''.join(ch for ch in text if ch.isalnum() or ch == ' ')
159 | 
160 |     @staticmethod
161 |     def _build_outtmpl(destination_directory: Path, title: str) -> str:
162 |         return str(destination_directory / f'{title}.%(ext)s')
163 | 
164 |     def _make_progress_hook(
165 |         self,
166 |         outtmpl: str,
167 |         user_hook: ExternalVideoDownloadProgressHook | None,
168 |         state: _HookState,
169 |     ) -> Callable[[dict[str, Any]], None]:
170 |         def _hook(d: dict[str, Any]) -> None:
171 |             filename = d.get('filename') or d.get('tmpfilename') or outtmpl
172 |             name = Path(str(filename)).name
173 | 
174 |             total = d.get('total_bytes') or d.get('total_bytes_estimate')
175 |             downloaded = d.get('downloaded_bytes')
176 |             speed = d.get('speed')
177 | 
178 |             total_i = int(total) if isinstance(total, (int, float)) else None
179 |             downloaded_i = (
180 |                 int(downloaded) if isinstance(downloaded, (int, float)) else None
181 |             )
182 |             speed_f = float(speed) if isinstance(speed, (int, float)) else None
183 | 
184 |             if total_i and downloaded_i is not None and total_i > 0:
185 |                 percentage = (downloaded_i / total_i) * 100.0
186 |             else:
187 |                 percentage = 0.0
188 | 
189 |             if downloaded_i is not None:
190 |                 delta = downloaded_i - state.last_downloaded
191 |                 state.last_downloaded = downloaded_i
192 |             else:
193 |                 delta = 0
194 | 
195 |             status_payload = ExternalVideoDownloadStatus(
196 |                 name=name,
197 |                 total_bytes=total_i,
198 |                 downloaded_bytes=downloaded_i,
199 |                 speed=speed_f,
200 |                 percentage=percentage,
201 |                 delta_bytes=delta,
202 |             )
203 | 
204 |             if user_hook is not None:
205 |                 with contextlib.suppress(Exception):
206 |                     user_hook(status_payload)
207 | 
208 |             if d.get('status') in {'finished', 'postprocessing'}:
209 |                 f = d.get('filename')
210 |                 if isinstance(f, str):
211 |                     state.final_filename = Path(f)
212 | 
213 |         return _hook
214 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/infrastructure/post_caching/post_cache.py:
--------------------------------------------------------------------------------
  1 | """Implementation of a post cache using SQLAlchemy + SQLite local database."""
  2 | 
  3 | from datetime import datetime
  4 | from pathlib import Path
  5 | from types import TracebackType
  6 | 
  7 | from sqlalchemy import String, create_engine, text
  8 | from sqlalchemy.exc import DatabaseError, OperationalError
  9 | from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column, sessionmaker
 10 | 
 11 | from boosty_downloader.src.application.filtering import (
 12 |     DownloadContentTypeFilter,
 13 | )
 14 | from boosty_downloader.src.infrastructure.loggers.base import RichLogger
 15 | 
 16 | 
 17 | class Base(DeclarativeBase):
 18 |     """Base class for SQLAlchemy models."""
 19 | 
 20 | 
 21 | class _PostCacheEntryModel(Base):
 22 |     """Internal sqlite table structure of the caching layer"""
 23 | 
 24 |     __tablename__ = 'post_cache'
 25 |     _Iso8601Datetime = str
 26 | 
 27 |     post_uuid: Mapped[str] = mapped_column(String, primary_key=True)
 28 | 
 29 |     # Flags to see which parts of the posts were downloaded and which are not.
 30 |     files_downloaded: Mapped[bool] = mapped_column(default=False, nullable=False)
 31 |     post_content_downloaded: Mapped[bool] = mapped_column(default=False, nullable=False)
 32 |     external_videos_downloaded: Mapped[bool] = mapped_column(
 33 |         default=False, nullable=False
 34 |     )
 35 |     boosty_videos_downloaded: Mapped[bool] = mapped_column(
 36 |         default=False, nullable=False
 37 |     )
 38 | 
 39 |     # Timestamp of the last update of the post.
 40 |     # Useful to determine if the post is outdated and needs to be re-downloaded even if some parts were downloaded before.
 41 |     #
 42 |     # Should be in ISO 8601 format (e.g., "2023-10-01T12:00:00Z").
 43 |     # because SQLite does not have a native tz-aware datetime type.
 44 |     last_updated_timestamp: Mapped[_Iso8601Datetime] = mapped_column(
 45 |         String, nullable=False
 46 |     )
 47 | 
 48 | 
 49 | class SQLitePostCache:
 50 |     """
 51 |     Post cache using SQLite with SQLAlchemy.
 52 | 
 53 |     Caches posts in a local SQLite database under a given directory.
 54 |     Automatically reinitializes the database if it's missing or corrupted.
 55 | 
 56 |     Caching mechanism is smart enough to determine which specific parts are up-to-date
 57 |     and which are not.
 58 |     """
 59 | 
 60 |     DEFAULT_CACHE_FILENAME = 'post_cache.db'
 61 | 
 62 |     def __enter__(self) -> 'SQLitePostCache':
 63 |         """Create a context manager for the SQLitePostCache."""
 64 |         return self
 65 | 
 66 |     def __exit__(
 67 |         self,
 68 |         exc_type: type[BaseException] | None,
 69 |         exc_value: BaseException | None,
 70 |         exc_tb: TracebackType | None,
 71 |     ) -> None:
 72 |         """Ensure that the database connection is closed when exiting the context."""
 73 |         self.close()
 74 | 
 75 |     def __init__(self, destination: Path, logger: RichLogger) -> None:
 76 |         """Make a connection with the SQLite database and create/init it if necessary."""
 77 |         self.logger = logger
 78 | 
 79 |         self.destination = destination
 80 |         self.db_file: Path = self.destination / self.DEFAULT_CACHE_FILENAME
 81 |         self.db_file.parent.mkdir(parents=True, exist_ok=True)
 82 | 
 83 |         self.engine = create_engine(f'sqlite:///{self.db_file}')
 84 |         Base.metadata.create_all(self.engine)
 85 | 
 86 |         self.Session = sessionmaker(bind=self.engine, expire_on_commit=False)
 87 |         self.session: Session = self.Session()
 88 |         self._dirty = False
 89 | 
 90 |     def _check_db_integrity(self) -> bool:
 91 |         """Check if post_cache table is available and the db itself is accessible."""
 92 |         try:
 93 |             # Ping the database to check if it's accessible
 94 |             self.session.execute(text('SELECT 1 FROM post_cache LIMIT 1'))
 95 |             # Ensure the expected schema (column names) is present; reinit if legacy schema is detected
 96 |             self.session.execute(text('SELECT post_uuid FROM post_cache LIMIT 1'))
 97 |         except (OperationalError, DatabaseError):
 98 |             return False
 99 |         else:
100 |             return True
101 | 
102 |     def _reinitialize_db(self) -> None:
103 |         """Reinitialize the database (recreate it from scratch) and recreate session."""
104 |         self.session.close()
105 |         self.engine.dispose()
106 | 
107 |         if self.db_file.exists():
108 |             self.db_file.unlink()  # Remove the corrupted file
109 | 
110 |         self.engine = create_engine(f'sqlite:///{self.db_file}')
111 |         Base.metadata.create_all(self.engine)
112 |         self.session = self.Session()
113 | 
114 |     def _ensure_valid(self) -> None:
115 |         """Maintenance method to ensure the database is valid before use."""
116 |         if not self._check_db_integrity():
117 |             self.logger.error(
118 |                 'Post cache database is corrupted or inaccessible. Reinitializing...'
119 |             )
120 |             self._reinitialize_db()
121 | 
122 |     def commit(self) -> None:
123 |         """
124 |         Commit any pending changes to the database if there are modifications.
125 | 
126 |         This method should be called after making changes to the database (e.g., adding,
127 |         updating, or deleting records) to ensure that the changes are persisted.
128 |         The `_dirty` flag is used to track whether there are uncommitted changes.
129 |         """
130 |         if self._dirty:
131 |             self.session.commit()
132 |             self._dirty = False
133 | 
134 |     def cache(
135 |         self,
136 |         post_uuid: str,
137 |         updated_at: datetime,
138 |         was_downloaded: list[DownloadContentTypeFilter],
139 |     ) -> None:
140 |         """Cache a post by its UUID and updated_at timestamp."""
141 |         self._ensure_valid()
142 | 
143 |         entry = self.session.get(_PostCacheEntryModel, post_uuid)
144 | 
145 |         files_downloaded = DownloadContentTypeFilter.files in was_downloaded
146 |         boosty_videos_downloaded = (
147 |             DownloadContentTypeFilter.boosty_videos in was_downloaded
148 |         )
149 |         post_content_downloaded = (
150 |             DownloadContentTypeFilter.post_content in was_downloaded
151 |         )
152 |         external_videos_downloaded = (
153 |             DownloadContentTypeFilter.external_videos in was_downloaded
154 |         )
155 | 
156 |         # If post already existed - just update False fields to True.
157 |         if entry:
158 |             entry.last_updated_timestamp = updated_at.isoformat()
159 |             entry.files_downloaded = files_downloaded or entry.files_downloaded
160 |             entry.boosty_videos_downloaded = (
161 |                 boosty_videos_downloaded or entry.boosty_videos_downloaded
162 |             )
163 |             entry.post_content_downloaded = (
164 |                 post_content_downloaded or entry.post_content_downloaded
165 |             )
166 |             entry.external_videos_downloaded = (
167 |                 external_videos_downloaded or entry.external_videos_downloaded
168 |             )
169 |         else:
170 |             entry = _PostCacheEntryModel(
171 |                 post_uuid=post_uuid,
172 |                 last_updated_timestamp=updated_at.isoformat(),
173 |                 files_downloaded=files_downloaded,
174 |                 boosty_videos_downloaded=boosty_videos_downloaded,
175 |                 post_content_downloaded=post_content_downloaded,
176 |                 external_videos_downloaded=external_videos_downloaded,
177 |             )
178 |             self.session.add(entry)
179 | 
180 |         self._dirty = True
181 | 
182 |     def get_missing_parts(
183 |         self,
184 |         post_uuid: str,
185 |         updated_at: datetime,
186 |         required: list[DownloadContentTypeFilter],
187 |     ) -> list[DownloadContentTypeFilter]:
188 |         """
189 |         Determine which parts of the post still need to be downloaded.
190 | 
191 |         Returns all required parts if the post is missing or outdated; otherwise, returns only those parts that haven't been
192 |         downloaded yet based on the current cache state.
193 |         """
194 |         self._ensure_valid()
195 |         post = self.session.get(_PostCacheEntryModel, post_uuid)
196 |         if not post:
197 |             return required
198 | 
199 |         # If cached post is outdated in general, just mark all required parts as missing.
200 |         if datetime.fromisoformat(post.last_updated_timestamp) < updated_at:
201 |             return required
202 | 
203 |         missing: list[DownloadContentTypeFilter] = [
204 |             part
205 |             for part in required
206 |             if (
207 |                 (part is DownloadContentTypeFilter.files and not post.files_downloaded)
208 |                 or (
209 |                     part is DownloadContentTypeFilter.boosty_videos
210 |                     and not post.boosty_videos_downloaded
211 |                 )
212 |                 or (
213 |                     part is DownloadContentTypeFilter.external_videos
214 |                     and not post.external_videos_downloaded
215 |                 )
216 |                 or (
217 |                     part is DownloadContentTypeFilter.post_content
218 |                     and not post.post_content_downloaded
219 |                 )
220 |             )
221 |         ]
222 | 
223 |         return missing
224 | 
225 |     def remove_cache_completely(self) -> None:
226 |         """Reinitialize the cache completely in case if user wants to start fresh."""
227 |         self._reinitialize_db()
228 | 
229 |     def close(self) -> None:
230 |         """Save and close the database connection."""
231 |         self.commit()
232 |         self.session.close()
233 |         self.engine.dispose()
234 | 


--------------------------------------------------------------------------------
/boosty_downloader/src/interfaces/console_progress_reporter.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Progress reporting and logging utilities for console-based Boosty downloader interface.
  3 | 
  4 | Includes a ProgressReporter class for rich progress bars and logging, and a FakeDownloader for demonstration/testing.
  5 | """
  6 | 
  7 | import asyncio
  8 | import logging
  9 | import secrets
 10 | import uuid
 11 | from collections.abc import AsyncGenerator, Sequence
 12 | from contextlib import asynccontextmanager
 13 | 
 14 | from rich.console import Console
 15 | from rich.logging import RichHandler
 16 | from rich.progress import (
 17 |     BarColumn,
 18 |     Progress,
 19 |     SpinnerColumn,
 20 |     TaskID,
 21 |     TaskProgressColumn,
 22 |     TimeElapsedColumn,
 23 | )
 24 | 
 25 | from boosty_downloader.src.infrastructure.loggers.base import RichLogger
 26 | 
 27 | 
 28 | class ProgressReporter:
 29 |     """
 30 |     Provides progress bar management and rich logging for console-based interfaces using the Rich library.
 31 | 
 32 |     Tasks are identified by UUIDs and can be nested using `level` to visually indent sub-tasks.
 33 |     """
 34 | 
 35 |     def __init__(
 36 |         self,
 37 |         console: Console | None = None,
 38 |         logger: logging.Logger | None = None,
 39 |     ) -> None:
 40 |         self.console = console or Console()
 41 |         self.progress = Progress(
 42 |             SpinnerColumn(),
 43 |             '[progress.description]{task.description}',
 44 |             BarColumn(),
 45 |             TaskProgressColumn(),
 46 |             TimeElapsedColumn(),
 47 |             console=self.console,
 48 |             refresh_per_second=29,
 49 |             transient=True,
 50 |         )
 51 |         self._logger = logger or self._create_default_logger()
 52 |         self._uuid_to_task_id: dict[uuid.UUID, TaskID] = {}
 53 |         self._uuid_to_level: dict[uuid.UUID, int] = {}
 54 |         self._uuid_to_name: dict[uuid.UUID, str] = {}
 55 | 
 56 |     def _create_default_logger(self) -> logging.Logger:
 57 |         logger = logging.getLogger('ProgressLogger')
 58 |         logger.setLevel(logging.INFO)
 59 |         logger.addHandler(
 60 |             RichHandler(
 61 |                 console=self.console, show_time=True, markup=True, show_path=False
 62 |             )
 63 |         )
 64 |         return logger
 65 | 
 66 |     def _format_description(self, name: str, level: int) -> str:
 67 |         indent = '  ' * level
 68 |         max_length = 80
 69 |         available = max_length - len(indent)
 70 | 
 71 |         if len(name) > available:
 72 |             name = name[: available - 1] + '…'  # use ellipsis
 73 | 
 74 |         return f'{indent}{name}'
 75 | 
 76 |     def start(self) -> None:
 77 |         self.progress.start()
 78 | 
 79 |     def stop(self) -> None:
 80 |         self.progress.stop()
 81 | 
 82 |     def create_task(
 83 |         self, name: str, total: int | None = None, indent_level: int = 0
 84 |     ) -> uuid.UUID:
 85 |         task_id = self.progress.add_task(
 86 |             self._format_description(name, indent_level), total=total
 87 |         )
 88 |         task_uuid = uuid.uuid4()
 89 |         self._uuid_to_task_id[task_uuid] = task_id
 90 |         self._uuid_to_level[task_uuid] = indent_level
 91 |         self._uuid_to_name[task_uuid] = name
 92 |         return task_uuid
 93 | 
 94 |     def update_task(
 95 |         self,
 96 |         task_uuid: uuid.UUID,
 97 |         advance: int = 1,
 98 |         total: int | None = None,
 99 |         description: str | None = None,
100 |     ) -> None:
101 |         task_id = self._uuid_to_task_id.get(task_uuid)
102 |         if task_id is not None and task_id in self.progress.task_ids:
103 |             level = self._uuid_to_level.get(task_uuid, 0)
104 |             base_name = description or self._uuid_to_name.get(task_uuid, '')
105 |             formatted_description = self._format_description(base_name, level)
106 |             self.progress.update(
107 |                 task_id,
108 |                 advance=advance,
109 |                 total=total,
110 |                 description=formatted_description,
111 |             )
112 | 
113 |     def complete_task(self, task_uuid: uuid.UUID) -> None:
114 |         task_id = self._uuid_to_task_id.get(task_uuid)
115 |         if task_id is not None and task_id in self.progress.task_ids:
116 |             total = self.progress.tasks[task_id].total
117 |             self.progress.update(task_id, completed=total, visible=False)
118 |             self._uuid_to_task_id.pop(task_uuid, None)
119 |             self._uuid_to_level.pop(task_uuid, None)
120 |             self._uuid_to_name.pop(task_uuid, None)
121 | 
122 |     def newline(self, count: int = 1) -> None:
123 |         for _ in range(count):
124 |             self.console.print()
125 | 
126 |     def headline_rule(self) -> None:
127 |         self.console.rule()
128 | 
129 |     def info(self, message: str) -> None:
130 |         self._logger.info(message)
131 | 
132 |     def success(self, message: str) -> None:
133 |         self._logger.info(f'[bold green]✔ {message}[/bold green]')
134 | 
135 |     def warn(self, message: str) -> None:
136 |         self._logger.warning(f'[bold yellow]⚠ {message}[/bold yellow]')
137 | 
138 |     def error(self, message: str) -> None:
139 |         self._logger.error(f'[bold red]✖ {message}[/bold red]')
140 | 
141 |     def notice(self, message: str) -> None:
142 |         self.console.print(
143 |             f'[bold yellow]NOTICE:[/bold yellow] {message}', highlight=False
144 |         )
145 | 
146 |     def log_list(self, title: str, items: Sequence[str]) -> None:
147 |         self.console.print(f'[bold cyan]{title}[/bold cyan]:')
148 |         for item in items:
149 |             self.console.print(f' • {item}')
150 | 
151 | 
152 | @asynccontextmanager
153 | async def use_reporter(
154 |     reporter: ProgressReporter,
155 | ) -> AsyncGenerator[ProgressReporter, None]:
156 |     """Async context manager to start and stop a ProgressReporter instance."""
157 |     try:
158 |         reporter.start()
159 |         yield reporter
160 |     finally:
161 |         reporter.stop()
162 | 
163 | 
164 | # ------------------------------------------------------------------------------
165 | # Usage example: run it as a script to see how it works:
166 | # poetry run boosty_downloader .../console_progress_reporter.py
167 | 
168 | if __name__ == '__main__':
169 |     import asyncio
170 | 
171 |     class FakeDownloader:
172 |         """Just Stupid faker"""
173 | 
174 |         def __init__(self, reporter: ProgressReporter) -> None:
175 |             self.reporter = reporter
176 | 
177 |         async def iterate_pages(
178 |             self, total_pages: int = 3, posts_per_page: int = 5
179 |         ) -> AsyncGenerator[list[str], None]:
180 |             """Simulate stuff"""
181 |             for page_num in range(1, total_pages + 1):
182 |                 await asyncio.sleep(0.5)
183 |                 posts = [
184 |                     f'post_{(page_num - 1) * posts_per_page + i + 1:02}'
185 |                     for i in range(posts_per_page)
186 |                 ]
187 |                 yield posts
188 | 
189 |         async def download_file(self, task_name: str, size_kb: int) -> None:
190 |             """Simulate downloading a file of size size_kb KB with progress"""
191 |             chunk_size = 50
192 |             total_chunks = (size_kb + chunk_size - 1) // chunk_size
193 |             download_task_id = self.reporter.create_task(task_name, total=total_chunks)
194 | 
195 |             for chunk in range(total_chunks):
196 |                 # Simulate delay proportional to chunk size
197 |                 await asyncio.sleep(secrets.randbelow(11) / 100 + 0.05)
198 |                 self.reporter.update_task(
199 |                     download_task_id,
200 |                     advance=1,
201 |                     description=f'{task_name} [{min((chunk + 1) * chunk_size, size_kb)} KB / {size_kb} KB]',
202 |                 )
203 |             self.reporter.complete_task(download_task_id)
204 | 
205 |         async def download_all_posts(self, username: str) -> None:
206 |             """Simulate downloading all posts for a user with progress reporting"""
207 |             self.reporter.notice(f'Starting download for user: {username}')
208 |             self.reporter.headline_rule()
209 | 
210 |             total_posts = None
211 |             download_task_id = self.reporter.create_task('posts', total=total_posts)
212 | 
213 |             downloaded_posts = 0
214 | 
215 |             async for posts in self.iterate_pages():
216 |                 self.reporter.info(f'Loaded new page with {len(posts)} posts')
217 | 
218 |                 for post_title in posts:
219 |                     self.reporter.info(f'Processing post: {post_title}')
220 | 
221 |                     if secrets.randbelow(10) == 0:
222 |                         self.reporter.warn(f'Skipping inaccessible post: {post_title}')
223 |                         self.reporter.update_task(download_task_id, advance=1)
224 |                         continue
225 | 
226 |                     files = {
227 |                         'image_1': secrets.randbelow(201) + 100,  # 100-300 KB
228 |                         'video_1': secrets.randbelow(1501) + 1000,  # 1-2.5 MB
229 |                         'attachment_1': secrets.randbelow(301) + 200,  # 200-500 KB
230 |                     }
231 | 
232 |                     for fname, size_kb in files.items():
233 |                         task_name = f'{post_title}::{fname}'
234 |                         await self.download_file(task_name, size_kb)
235 |                         self.reporter.success(f'Finished {fname} of {post_title}')
236 | 
237 |                     downloaded_posts += 1
238 |                     self.reporter.update_task(download_task_id, advance=1)
239 | 
240 |                 self.reporter.headline_rule()
241 | 
242 |             self.reporter.success(f'✅ Finished downloading {downloaded_posts} posts.')
243 | 
244 |     async def main() -> None:
245 |         """Run a demonstration of the FakeDownloader with progress reporting."""
246 |         logger = RichLogger('dumb')
247 | 
248 |         reporter = ProgressReporter(
249 |             logger=logger.logging_logger_obj,
250 |             console=logger.console,
251 |         )
252 |         async with use_reporter(reporter):
253 |             downloader = FakeDownloader(reporter)
254 |             await downloader.download_all_posts('demo_user')
255 | 
256 |     asyncio.run(main())
257 | 


--------------------------------------------------------------------------------