├── src
    ├── __init__.py
    ├── config.py
    ├── database.py
    ├── client.py
    ├── users.py
    ├── channels.py
    ├── image_analysis.py
    ├── formatting.py
    ├── app.py
    ├── message_export.py
    ├── export.py
    └── media.py
├── requirements.txt
├── main.py
├── tools
    └── .gitignore
├── .env.example
├── .gitignore
├── LICENSE
├── docs
    ├── setup.md
    ├── contributing.md
    ├── faq.md
    ├── codebase.md
    ├── IMPLEMENTATION_PLAN.md
    └── SEARCH_REPLACE_FORMATTING.md
├── CLAUDE.md
└── README.md


/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | telethon
2 | cryptg
3 | python-dotenv
4 | requests 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | """
2 | Telegram Channel Saver - Main entry point
3 | """
4 | from src.app import main
5 | 
6 | if __name__ == "__main__":
7 |     main()


--------------------------------------------------------------------------------
/tools/.gitignore:
--------------------------------------------------------------------------------
 1 | # Virtual environment
 2 | venv/
 3 | 
 4 | # Python cache
 5 | __pycache__/
 6 | *.pyc
 7 | *.pyo
 8 | 
 9 | # Test outputs
10 | *.log
11 | *.tmp
12 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | # Telegram API credentials (required)
2 | # Get these from https://my.telegram.org/
3 | API_ID=your_api_id_here
4 | API_HASH=your_api_hash_here
5 | 
6 | # OpenRouter API key (optional - for AI image analysis)
7 | # Get this from https://openrouter.ai/
8 | OPENROUTER_API_KEY=your_openrouter_api_key_here


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.so
 6 | .Python
 7 | build/
 8 | develop-eggs/
 9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | 
23 | # Virtual Environment
24 | venv/
25 | ENV/
26 | env/
27 | 
28 | # IDE
29 | .idea/
30 | .vscode/
31 | *.swp
32 | *.swo
33 | 
34 | # Project specific
35 | temp/
36 | exports/
37 | *.session
38 | *.session-journal
39 | .env
40 | database.json
41 | 
42 | # OS specific
43 | .DS_Store
44 | Thumbs.db 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Sergey Bulaev
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE. 


--------------------------------------------------------------------------------
/src/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Configuration settings for the Telegram Channel Saver.
 3 | Contains constants and settings used throughout the application.
 4 | """
 5 | import os
 6 | import logging
 7 | 
 8 | # Configure logging
 9 | logging.basicConfig(
10 |     format='[%(levelname) 5s/%(asctime)s] %(name)s: %(message)s',
11 |     level=logging.INFO
12 | )
13 | 
14 | logger = logging.getLogger(__name__)
15 | 
16 | # Batch size and timing settings
17 | MESSAGES_BATCH_SIZE = 100  # Number of messages to process in one batch
18 | BATCH_DELAY = 2  # Delay between batches in seconds
19 | SAVE_INTERVAL = 300  # Save database every 5 minutes
20 | MAX_RETRIES = 3  # Maximum retries for failed message fetches
21 | 
22 | # Media download settings
23 | MEDIA_DOWNLOAD_DELAY = 3  # Delay between media downloads in seconds to avoid rate limits
24 | MEDIA_DOWNLOAD_TIMEOUT = 120  # Timeout for media downloads in seconds (2 minutes)
25 | MEDIA_DOWNLOAD_RETRY = 3  # Maximum number of retries for failed media downloads
26 | MEDIA_RETRY_DELAY_BASE = 5  # Base delay for retry backoff in seconds
27 | CHUNK_SIZE = 1024 * 1024  # 1MB chunk size for large downloads
28 | 
29 | # Directory settings
30 | TEMP_DIR = 'temp/channel_saver'
31 | VIDEO_TEMP_DIR = 'temp/videos'  # Directory for storing downloaded videos
32 | EXPORT_DIR = 'temp/exports'  # Directory for storing exported message files
33 | 
34 | # OpenRouter API settings
35 | OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')
36 | OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
37 | OPENROUTER_MODEL = "openai/gpt-4o-mini"  # Default model for image analysis
38 | OPENROUTER_TIMEOUT = 30  # Timeout for API requests in seconds
39 | 
40 | # Create temp directories if they don't exist
41 | os.makedirs(TEMP_DIR, exist_ok=True)
42 | os.makedirs(VIDEO_TEMP_DIR, exist_ok=True)
43 | os.makedirs(EXPORT_DIR, exist_ok=True) 


--------------------------------------------------------------------------------
/src/database.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Database operations for the Telegram Channel Saver.
 3 | Handles loading, saving, and managing the JSON database.
 4 | """
 5 | import os
 6 | import json
 7 | import logging
 8 | from datetime import datetime
 9 | 
10 | from src.config import TEMP_DIR
11 | 
12 | logger = logging.getLogger(__name__)
13 | 
14 | def load_database(db_path):
15 |     """
16 |     Load database from JSON file or create new if doesn't exist
17 |     
18 |     Args:
19 |         db_path: Path to the database file
20 |         
21 |     Returns:
22 |         dict: Loaded database or new database structure
23 |     """
24 |     if os.path.exists(db_path):
25 |         try:
26 |             with open(db_path, 'r') as f:
27 |                 return json.load(f)
28 |         except json.JSONDecodeError:
29 |             logger.warning("Corrupted database file, creating new")
30 |             return create_new_database(db_path)
31 |     return create_new_database(db_path)
32 | 
33 | def create_new_database(db_path):
34 |     """
35 |     Create new database structure
36 |     
37 |     Args:
38 |         db_path: Path to save the new database
39 |         
40 |     Returns:
41 |         dict: New empty database structure
42 |     """
43 |     db = {
44 |         'users': {},
45 |         'last_login': None,
46 |         'sessions': {},
47 |         'active_channel': None,
48 |         'messages': {},
49 |         'videos': {}
50 |     }
51 |     save_database(db_path, db)
52 |     return db
53 | 
54 | def save_database(db_path, db):
55 |     """
56 |     Save database to JSON file
57 |     
58 |     Args:
59 |         db_path: Path to save the database
60 |         db: Database dictionary to save
61 |     """
62 |     with open(db_path, 'w') as f:
63 |         json.dump(db, f, indent=4, default=str)
64 | 
65 | def get_db_path():
66 |     """
67 |     Get the path to the database file
68 |     
69 |     Returns:
70 |         str: Path to the database file
71 |     """
72 |     return os.path.join(TEMP_DIR, 'database.json') 


--------------------------------------------------------------------------------
/docs/setup.md:
--------------------------------------------------------------------------------
  1 | # Telegram Channel Saver Setup Guide
  2 | 
  3 | This document provides detailed instructions for setting up your development environment to work on the Telegram Channel Saver project.
  4 | 
  5 | ## Prerequisites
  6 | 
  7 | Before you begin, ensure that you have the following installed on your system:
  8 | 
  9 | - Python 3.8 or higher
 10 | - pip (Python package manager)
 11 | - Git
 12 | 
 13 | ## Installation Steps
 14 | 
 15 | ### 1. Clone the Repository
 16 | 
 17 | ```bash
 18 | git clone https://github.com/yourusername/telegram-channel-saver.git
 19 | cd telegram-channel-saver
 20 | ```
 21 | 
 22 | ### 2. Create a Virtual Environment
 23 | 
 24 | It's recommended to use a virtual environment to keep dependencies isolated:
 25 | 
 26 | ```bash
 27 | # Create a virtual environment
 28 | python -m venv venv
 29 | 
 30 | # Activate the virtual environment
 31 | # On Windows:
 32 | venv\Scripts\activate
 33 | # On macOS/Linux:
 34 | source venv/bin/activate
 35 | ```
 36 | 
 37 | ### 3. Install Dependencies
 38 | 
 39 | Install the required packages:
 40 | 
 41 | ```bash
 42 | pip install -r requirements.txt
 43 | 
 44 | # For development, install additional packages
 45 | pip install -r requirements-dev.txt  # if available
 46 | ```
 47 | 
 48 | ### 4. Telegram API Credentials
 49 | 
 50 | To use the Telegram API, you need to obtain API credentials:
 51 | 
 52 | 1. Visit [https://my.telegram.org/auth](https://my.telegram.org/auth) and log in with your Telegram account
 53 | 2. Go to "API Development Tools"
 54 | 3. Create a new application (if you haven't already)
 55 | 4. Note your **API ID** and **API Hash**
 56 | 
 57 | ### 5. Configuration
 58 | 
 59 | 1. Create a copy of the example configuration file:
 60 | 
 61 | ```bash
 62 | cp config.example.ini config.ini
 63 | ```
 64 | 
 65 | 2. Edit `config.ini` with your API credentials and other settings:
 66 | 
 67 | ```ini
 68 | [Telegram]
 69 | api_id = YOUR_API_ID
 70 | api_hash = YOUR_API_HASH
 71 | phone = YOUR_PHONE_NUMBER  # with country code, e.g., +12345678901
 72 | 
 73 | [Storage]
 74 | download_folder = downloads
 75 | ```
 76 | 
 77 | ## Running the Application
 78 | 
 79 | ### Running in Development Mode
 80 | 
 81 | ```bash
 82 | python main.py
 83 | ```
 84 | 
 85 | ### Running Tests
 86 | 
 87 | ```bash
 88 | # Run all tests
 89 | pytest
 90 | 
 91 | # Run specific tests
 92 | pytest tests/test_specific_feature.py
 93 | 
 94 | # Run with coverage report
 95 | pytest --cov=telegram_channel_saver
 96 | ```
 97 | 
 98 | ## Development Tools
 99 | 
100 | ### Code Linting
101 | 
102 | We use flake8 for linting:
103 | 
104 | ```bash
105 | # Check code style
106 | flake8 telegram_channel_saver
107 | 
108 | # Auto-format code (if using Black)
109 | black telegram_channel_saver
110 | ```
111 | 
112 | ### Type Checking
113 | 
114 | We use mypy for type checking:
115 | 
116 | ```bash
117 | mypy telegram_channel_saver
118 | ```
119 | 
120 | ## Project Structure
121 | 
122 | Here's an overview of the key directories and files in the project:
123 | 
124 | ```
125 | telegram-channel-saver/
126 | ├── telegram_channel_saver/  # Main package directory
127 | │   ├── __init__.py
128 | │   ├── main.py             # Entry point
129 | │   ├── client.py           # Telegram client implementation
130 | │   ├── downloader.py       # Media downloading functionality
131 | │   └── utils/              # Utility functions
132 | ├── tests/                  # Test files
133 | ├── docs/                   # Documentation
134 | ├── requirements.txt        # Production dependencies
135 | ├── requirements-dev.txt    # Development dependencies
136 | └── config.example.ini      # Example configuration file
137 | ```
138 | 
139 | ## Troubleshooting
140 | 
141 | ### Common Issues
142 | 
143 | #### Authentication Problems
144 | 
145 | If you encounter authentication issues:
146 | - Verify your API credentials
147 | - Check your phone number format (include country code)
148 | - Ensure you have internet connectivity
149 | 
150 | #### Import Errors
151 | 
152 | If you see import errors:
153 | - Make sure you have activated the virtual environment
154 | - Verify all dependencies are installed correctly
155 | 
156 | #### Rate Limiting
157 | 
158 | If you encounter `FloodWaitError`:
159 | - Reduce the frequency of requests
160 | - Add delays between operations
161 | - Implement exponential backoff
162 | 
163 | ### Getting Help
164 | 
165 | If you encounter any issues that aren't covered here:
166 | 1. Check the [FAQ document](faq.md)
167 | 2. Search for similar issues in the GitHub repository
168 | 3. Open a new issue with a detailed description of the problem
169 | 
170 | ## Additional Resources
171 | 
172 | - [Telethon Documentation](https://docs.telethon.dev/en/stable/) - The Python Telegram client library used in this project
173 | - [Telegram API Documentation](https://core.telegram.org/api) - Official Telegram API documentation 


--------------------------------------------------------------------------------
/CLAUDE.md:
--------------------------------------------------------------------------------
  1 | # Telegram Channel Saver
  2 | 
  3 | ## Project Description
  4 | Telegram Channel Saver is a Python tool for saving and analyzing Telegram channel content. The application connects to Telegram API using user credentials, allows browsing and selecting channels/groups, and provides functionality to download and store messages, track users, and search through saved content.
  5 | 
  6 | ### Key Features
  7 | - Save channel messages with reactions and media information
  8 | - Track channel users and their activity
  9 | - Search through saved messages by text, date, or ID
 10 | - Support for multiple Telegram accounts
 11 | - Message download with rate limiting and error handling
 12 | - Detailed statistics about saved content
 13 | 
 14 | ### File Structure
 15 | ```
 16 | /telegram-channel-saver/
 17 |   ├── LICENSE             # MIT License
 18 |   ├── README.md           # Project documentation
 19 |   ├── docs/
 20 |   │   └── telethon.txt    # Telethon library documentation
 21 |   ├── requirements.txt    # Project dependencies
 22 |   ├── saver.py            # Main application code
 23 |   ├── CLAUDE.md           # Best practices and code guidelines
 24 |   ├── tools/              # Testing tools and utilities (Claude's workspace)
 25 |   │   ├── venv/           # Separate virtual environment for tools
 26 |   │   └── *.py            # Test scripts and utilities
 27 |   └── temp/               # Storage directory for data and sessions
 28 |       └── channel_saver/  # Application data storage location
 29 | ```
 30 | 
 31 | # Claude AI Development Rules
 32 | 
 33 | ## API Documentation
 34 | - **Always use context7** to get the most recent API documentation before implementing features
 35 | - Do not rely on training data for library APIs - fetch current docs via context7
 36 | - When working with Telethon, Telegram API, or any external library, query context7 first
 37 | 
 38 | ## Tools Directory
 39 | - Claude can create test tools and utilities in the `tools/` directory
 40 | - Use the separate venv located at `tools/venv/` for running test scripts
 41 | - Activate with: `source tools/venv/bin/activate`
 42 | - Install dependencies in tools venv: `tools/venv/bin/pip install <package>`
 43 | - Run tools with: `tools/venv/bin/python tools/<script>.py`
 44 | - Tools are for testing pipelines, debugging, and validating implementations
 45 | - Keep tools separate from main application code
 46 | 
 47 | # Clean Code Best Practices
 48 | 
 49 | ## General Principles
 50 | - Keep functions and modules small and focused on a single responsibility
 51 | - Use descriptive variable and function names
 52 | - Keep modules under 500 lines of code
 53 | - Write code that is easy to read and understand for other developers
 54 | - Avoid code duplication through proper abstraction
 55 | 
 56 | ## Function Design
 57 | - Functions should do one thing and do it well
 58 | - Keep functions short (preferably under 20 lines)
 59 | - Minimize the number of arguments (aim for 3 or fewer)
 60 | - Avoid side effects when possible
 61 | - Return early to reduce nesting
 62 | 
 63 | ## Variable Naming
 64 | - Use meaningful and pronounceable variable names
 65 | - Use consistent naming conventions
 66 | - Make sure names reflect what the variable contains
 67 | - Use nouns for variables and verbs for functions
 68 | 
 69 | ## Comments and Documentation
 70 | - Code should be self-documenting
 71 | - Comments should explain "why", not "what"
 72 | - Keep comments up-to-date with code changes
 73 | - Document public APIs and complex functions
 74 | - Use docstrings for functions and classes
 75 | 
 76 | ## Error Handling
 77 | - Handle exceptions at the appropriate level
 78 | - Never swallow exceptions without proper handling
 79 | - Use specific exception types
 80 | - Provide meaningful error messages
 81 | - Fail fast and explicitly
 82 | 
 83 | ## Code Organization
 84 | - Keep related functionality together
 85 | - Separate concerns into different modules
 86 | - Use appropriate design patterns
 87 | - Follow the principle of least surprise
 88 | - Structure code for testability
 89 | 
 90 | ## Testing
 91 | - Write tests for all new features and bug fixes
 92 | - Aim for high test coverage
 93 | - Make tests readable and maintainable
 94 | - Test edge cases and error conditions
 95 | - Use automated testing
 96 | 
 97 | ## Refactoring
 98 | - Refactor regularly to improve code quality
 99 | - Pay down technical debt incrementally
100 | - Make small, incremental changes
101 | - Maintain behavior while improving design
102 | - Use automated tests to verify refactoring
103 | 
104 | ## Version Control
105 | - Write clear, descriptive commit messages
106 | - Make small, focused commits
107 | - Use branches for new features and bug fixes
108 | - Review code before merging
109 | - Keep the main branch stable
110 | 
111 | ## Security
112 | - Validate all inputs
113 | - Never store sensitive information in code
114 | - Use proper authentication and authorization
115 | - Follow the principle of least privilege
116 | - Keep dependencies up to date


--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
  1 | # Contributing to Telegram Channel Saver
  2 | 
  3 | Thank you for your interest in contributing to Telegram Channel Saver! This document provides guidelines and instructions for contributing to the project.
  4 | 
  5 | ## Code of Conduct
  6 | 
  7 | Please help us maintain a positive and inclusive environment by following these guidelines:
  8 | 
  9 | - Be respectful and considerate of others
 10 | - Use inclusive language and be mindful of cultural differences
 11 | - Accept constructive criticism gracefully
 12 | - Focus on what is best for the community
 13 | - Show empathy towards other community members
 14 | 
 15 | ## Ways to Contribute
 16 | 
 17 | There are many ways you can contribute to the project:
 18 | 
 19 | 1. **Report bugs**: Submit bug reports by creating issues in the repository
 20 | 2. **Suggest features**: Propose new features or improvements
 21 | 3. **Improve documentation**: Help us improve existing documentation or add new documentation
 22 | 4. **Submit code changes**: Fix bugs or implement new features
 23 | 5. **Review code**: Review pull requests from other contributors
 24 | 
 25 | ## Development Process
 26 | 
 27 | ### Setting up the Development Environment
 28 | 
 29 | Please refer to the [Setup Guide](setup.md) for detailed instructions on setting up your development environment.
 30 | 
 31 | ### Workflow
 32 | 
 33 | 1. **Fork the repository**: Create your own fork of the project
 34 | 2. **Create a branch**: Create a branch for your changes
 35 |    ```bash
 36 |    git checkout -b feature/your-feature-name
 37 |    # or
 38 |    git checkout -b fix/your-bug-fix
 39 |    ```
 40 | 3. **Make changes**: Implement your changes
 41 | 4. **Test**: Ensure your changes work as expected
 42 | 5. **Commit**: Commit your changes with clear and descriptive commit messages
 43 | 6. **Push**: Push your changes to your fork
 44 | 7. **Pull Request**: Create a pull request from your fork to the main repository
 45 | 
 46 | ### Commit Guidelines
 47 | 
 48 | Follow these guidelines for your commit messages:
 49 | 
 50 | - Use present tense ("Add feature" not "Added feature")
 51 | - Use imperative mood ("Move cursor to..." not "Moves cursor to...")
 52 | - Start with a capital letter
 53 | - Keep the first line under 72 characters
 54 | - Reference issues and pull requests when appropriate
 55 | 
 56 | Example:
 57 | ```
 58 | Add support for downloading images from channels
 59 | 
 60 | - Implement media download functionality
 61 | - Add file type detection
 62 | - Create directory structure for saved media
 63 | 
 64 | Fixes #42
 65 | ```
 66 | 
 67 | ## Pull Request Process
 68 | 
 69 | 1. **Check existing issues and PRs**: Make sure your PR doesn't duplicate existing work
 70 | 2. **Document your changes**: Update documentation to reflect your changes
 71 | 3. **Include tests**: Add tests for new features or bug fixes
 72 | 4. **Keep it focused**: Each PR should address a single concern
 73 | 5. **Be responsive**: Respond to feedback and make requested changes
 74 | 
 75 | ## Code Style and Standards
 76 | 
 77 | ### Python Style Guidelines
 78 | 
 79 | - Follow [PEP 8](https://www.python.org/dev/peps/pep-0008/) style guide
 80 | - Use 4 spaces for indentation (no tabs)
 81 | - Keep lines under 100 characters
 82 | - Use docstrings for functions, classes, and modules
 83 | - Add type hints where appropriate
 84 | 
 85 | ### Documentation Standards
 86 | 
 87 | - Use Markdown for documentation
 88 | - Keep language simple and clear
 89 | - Include code examples where helpful
 90 | - Update documentation when making changes to the code
 91 | 
 92 | ## Testing
 93 | 
 94 | - Write unit tests for new functionality
 95 | - Ensure all tests pass before submitting a pull request
 96 | - Test your changes in different environments if possible
 97 | 
 98 | ## Issue Reporting Guidelines
 99 | 
100 | When reporting issues, please include:
101 | 
102 | 1. **Issue description**: A clear description of the issue
103 | 2. **Steps to reproduce**: Detailed steps to reproduce the problem
104 | 3. **Expected behavior**: What you expected to happen
105 | 4. **Actual behavior**: What actually happened
106 | 5. **Environment**: Python version, OS, etc.
107 | 6. **Screenshots**: If applicable
108 | 7. **Additional context**: Any other relevant information
109 | 
110 | ## Feature Request Guidelines
111 | 
112 | When suggesting features, please include:
113 | 
114 | 1. **Feature description**: A clear description of the proposed feature
115 | 2. **Use case**: Why this feature would be valuable
116 | 3. **Potential implementation**: If you have ideas on how to implement it
117 | 4. **Alternatives considered**: Any alternative solutions you've considered
118 | 
119 | ## Getting Help
120 | 
121 | If you need help with contributing:
122 | 
123 | 1. Check the [documentation](setup.md)
124 | 2. Ask questions in issues or existing discussions
125 | 3. Contact the maintainers
126 | 
127 | ## License
128 | 
129 | By contributing to Telegram Channel Saver, you agree that your contributions will be licensed under the same license as the project. 


--------------------------------------------------------------------------------
/docs/faq.md:
--------------------------------------------------------------------------------
  1 | # Frequently Asked Questions (FAQ)
  2 | 
  3 | This document addresses common questions and issues that users may encounter while using the Telegram Channel Saver.
  4 | 
  5 | ## General Questions
  6 | 
  7 | ### What is Telegram Channel Saver?
  8 | 
  9 | Telegram Channel Saver is a Python tool designed to help you save and analyze content from Telegram channels. It allows you to download messages, media, and track user activity in channels you have access to.
 10 | 
 11 | ### Is this tool official or affiliated with Telegram?
 12 | 
 13 | No, this is an independent tool and is not officially affiliated with or endorsed by Telegram. It uses the official Telegram API through the Telethon library.
 14 | 
 15 | ### Is using this tool against Telegram's Terms of Service?
 16 | 
 17 | This tool is designed for personal use and uses official Telegram API methods. However, please use it responsibly and be aware of Telegram's [Terms of Service](https://telegram.org/tos) and [API Terms of Service](https://core.telegram.org/api/terms). Misuse of the tool (such as excessive scraping or spamming) may violate these terms.
 18 | 
 19 | ## Setup and Installation
 20 | 
 21 | ### Why am I getting an error about missing dependencies?
 22 | 
 23 | Make sure you've installed all required dependencies by running:
 24 | ```bash
 25 | pip install -r requirements.txt
 26 | ```
 27 | 
 28 | ### Where do I get the API_ID and API_HASH?
 29 | 
 30 | 1. Visit [my.telegram.org/apps](https://my.telegram.org/apps)
 31 | 2. Log in with your Telegram account
 32 | 3. Create a new application if you don't have one
 33 | 4. Your API_ID and API_HASH will be displayed on the page
 34 | 
 35 | ### How do I store my API credentials securely?
 36 | 
 37 | Create a `.env` file in the project root with the following content:
 38 | ```
 39 | API_ID=your_api_id
 40 | API_HASH=your_api_hash
 41 | ```
 42 | The application will automatically load these values when started.
 43 | 
 44 | ## Usage Questions
 45 | 
 46 | ### How many messages can I download at once?
 47 | 
 48 | By default, the tool can download any number of messages, but there are rate limits to consider:
 49 | - Telegram API has rate limits that prevent too many requests in a short period
 50 | - The tool has built-in delays to respect these limits
 51 | - For very large channels (10,000+ messages), it's recommended to download in batches
 52 | 
 53 | ### Why am I getting "FloodWaitError"?
 54 | 
 55 | This error occurs when you've made too many requests to the Telegram API in a short period. The error will include a wait time (in seconds). The tool will automatically wait for the specified time before continuing. To reduce the frequency of these errors:
 56 | - Increase the delay between requests in `src/config.py`
 57 | - Download fewer messages at once
 58 | - Run the tool less frequently
 59 | 
 60 | ### Can I download messages from a private channel?
 61 | 
 62 | Yes, but you must be a member of the channel. The tool can only access channels that your Telegram account has access to.
 63 | 
 64 | ### How do I search for specific messages?
 65 | 
 66 | From the main menu, select the option to search messages. You can search by:
 67 | - Text content
 68 | - Date range
 69 | - Message ID
 70 | - User messages
 71 | 
 72 | ### Where are downloaded files stored?
 73 | 
 74 | By default:
 75 | - Database files are stored in `temp/channel_saver/`
 76 | - Media files are stored in `temp/videos/` and other appropriate directories
 77 | - You can change these paths in `src/config.py`
 78 | 
 79 | ## Troubleshooting
 80 | 
 81 | ### The application crashes when downloading media
 82 | 
 83 | This could be due to several reasons:
 84 | 1. Insufficient disk space
 85 | 2. Large media files
 86 | 3. Network interruptions
 87 | 
 88 | Solutions:
 89 | - Free up disk space
 90 | - Set media size limits in `src/config.py`
 91 | - Check your internet connection stability
 92 | - Try downloading media in smaller batches
 93 | 
 94 | ### I can't authenticate with my Telegram account
 95 | 
 96 | Check the following:
 97 | 1. Verify your API_ID and API_HASH are correct
 98 | 2. Make sure you're entering the correct phone number (with country code)
 99 | 3. Check if your account has any restrictions
100 | 4. If using 2FA, ensure you're entering the correct password
101 | 
102 | ### The tool is running very slowly
103 | 
104 | This may be due to:
105 | 1. Rate limiting by Telegram API
106 | 2. Large number of messages being processed
107 | 3. Slow internet connection
108 | 4. Computer resource limitations
109 | 
110 | Optimization tips:
111 | - Increase the delay parameters in `src/config.py` to reduce rate limit errors
112 | - Download in smaller batches
113 | - Close other applications using your network
114 | - Run during off-peak hours
115 | 
116 | ### How do I reset my session?
117 | 
118 | If you need to log in with a different account or your session is corrupted:
119 | 1. Delete the session file(s) in `temp/channel_saver/`
120 | 2. Restart the application
121 | 3. You'll be prompted to log in again
122 | 
123 | ### My question isn't answered here
124 | 
125 | If you have additional questions or issues:
126 | 1. Check the documentation in the `docs/` directory
127 | 2. Look for similar issues in the project repository
128 | 3. Consider opening a new issue in the repository with details about your problem 


--------------------------------------------------------------------------------
/src/client.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Telegram client management module.
  3 | Handles client initialization, authentication, and session management.
  4 | """
  5 | import os
  6 | import logging
  7 | from datetime import datetime
  8 | from telethon import TelegramClient
  9 | from telethon.errors import SessionPasswordNeededError
 10 | import asyncio
 11 | 
 12 | from src.config import TEMP_DIR
 13 | from src.database import save_database
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | async def create_client(api_id, api_hash, session_path):
 18 |     """
 19 |     Create and initialize a Telegram client
 20 |     
 21 |     Args:
 22 |         api_id: Telegram API ID
 23 |         api_hash: Telegram API hash
 24 |         session_path: Path to session file
 25 |         
 26 |     Returns:
 27 |         TelegramClient: Initialized Telegram client
 28 |     """
 29 |     client = TelegramClient(session_path, api_id, api_hash)
 30 |     await client.connect()
 31 |     return client
 32 | 
 33 | async def check_authorized(client):
 34 |     """
 35 |     Check if the client is authorized
 36 |     
 37 |     Args:
 38 |         client: Telegram client
 39 |         
 40 |     Returns:
 41 |         bool: True if authorized, False otherwise
 42 |     """
 43 |     if not client:
 44 |         return False
 45 |     try:
 46 |         return await client.is_user_authorized()
 47 |     except Exception as e:
 48 |         logger.error(f"Error checking authorization: {e}")
 49 |         return False
 50 | 
 51 | async def login(client, phone, force=False):
 52 |     """
 53 |     Log in to Telegram
 54 |     
 55 |     Args:
 56 |         client: Telegram client
 57 |         phone: Phone number
 58 |         force: Force new login
 59 |         
 60 |     Returns:
 61 |         bool: True if login successful, False otherwise
 62 |     """
 63 |     if not await client.is_user_authorized():
 64 |         try:
 65 |             await client.send_code_request(phone)
 66 |             code = input('Enter the code you received: ')
 67 |             await client.sign_in(phone, code)
 68 |         except SessionPasswordNeededError:
 69 |             # 2FA is enabled
 70 |             password = input('Please enter your 2FA password: ')
 71 |             await client.sign_in(password=password)
 72 |     
 73 |     # Get user info
 74 |     me = await client.get_me()
 75 |     return me
 76 | 
 77 | def get_session_path(phone):
 78 |     """
 79 |     Get full path to session file for given phone number
 80 |     
 81 |     Args:
 82 |         phone: Phone number
 83 |         
 84 |     Returns:
 85 |         str: Path to session file
 86 |     """
 87 |     return os.path.join(TEMP_DIR, f'user_{phone}')
 88 | 
 89 | async def save_session(db, phone, me):
 90 |     """
 91 |     Save current session info to database
 92 |     
 93 |     Args:
 94 |         db: Database
 95 |         phone: Phone number
 96 |         me: User info
 97 |     """
 98 |     if not phone:
 99 |         return
100 |         
101 |     # Update sessions info
102 |     db['sessions'][phone] = {
103 |         'session_file': f'user_{phone}',
104 |         'created_at': db['sessions'].get(phone, {}).get('created_at', str(datetime.now())),
105 |         'last_used': str(datetime.now()),
106 |         'user_id': me.id,
107 |         'username': me.username,
108 |         'active': True
109 |     }
110 |     
111 |     # Update last login
112 |     db['last_login'] = {
113 |         'phone': phone,
114 |         'user_id': me.id,
115 |         'username': me.username,
116 |         'date': str(datetime.now())
117 |     }
118 |     
119 |     # Deactivate other sessions
120 |     for p in db['sessions']:
121 |         if p != phone:
122 |             db['sessions'][p]['active'] = False
123 | 
124 | async def restore_session(db, api_id, api_hash, db_path):
125 |     """
126 |     Try to restore last active session
127 |     
128 |     Args:
129 |         db: Database
130 |         api_id: Telegram API ID
131 |         api_hash: Telegram API hash
132 |         db_path: Path to database file
133 |         
134 |     Returns:
135 |         tuple: (client, phone) tuple if successful, (None, None) otherwise
136 |     """
137 |     if not db.get('sessions'):
138 |         return None, None
139 |         
140 |     # Find active session
141 |     active_session = None
142 |     active_phone = None
143 |     
144 |     for phone, session in db['sessions'].items():
145 |         if session.get('active'):
146 |             active_session = session
147 |             active_phone = phone
148 |             break
149 |     
150 |     if not active_session:
151 |         return None, None
152 |         
153 |     # Try to restore session
154 |     try:
155 |         phone = active_phone
156 |         client = TelegramClient(
157 |             get_session_path(phone),
158 |             api_id,
159 |             api_hash
160 |         )
161 |         
162 |         await client.connect()
163 |         if await check_authorized(client):
164 |             # Update last used time
165 |             me = await client.get_me()
166 |             await save_session(db, phone, me)
167 |             save_database(db_path, db)
168 |             logger.info(f"Restored session for {phone}")
169 |             return client, phone
170 |                 
171 |     except Exception as e:
172 |         logger.error(f"Failed to restore session: {e}")
173 |         if client:
174 |             await client.disconnect()
175 |             
176 |     return None, None 


--------------------------------------------------------------------------------
/src/users.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Users management module.
  3 | Handles operations related to Telegram channel users.
  4 | """
  5 | import logging
  6 | from datetime import datetime
  7 | 
  8 | from src.channels import get_active_channel
  9 | from src.database import save_database
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | async def save_channel_users(client, db, db_path):
 14 |     """
 15 |     Save all users from active channel to database
 16 |     
 17 |     Args:
 18 |         client: Telegram client
 19 |         db: Database
 20 |         db_path: Path to database file
 21 |         
 22 |     Returns:
 23 |         bool: True if successful, False otherwise
 24 |     """
 25 |     active = get_active_channel(db)
 26 |     if not active:
 27 |         print("\nNo active channel selected!")
 28 |         return False
 29 |         
 30 |     try:
 31 |         print(f"\nFetching users from {active['title']}...")
 32 |         
 33 |         # Initialize channel users dict if doesn't exist
 34 |         channel_id = str(active['id'])
 35 |         if 'users' not in db:
 36 |             db['users'] = {}
 37 |         if channel_id not in db['users']:
 38 |             db['users'][channel_id] = {}
 39 |         
 40 |         # Get all participants
 41 |         participants = await client.get_participants(active['id'])
 42 |         
 43 |         # Counter for progress
 44 |         total = len(participants)
 45 |         saved = 0
 46 |         updated = 0
 47 |         
 48 |         print(f"\nProcessing {total} users...")
 49 |         
 50 |         for user in participants:
 51 |             user_dict = {
 52 |                 'id': user.id,
 53 |                 'username': user.username,
 54 |                 'first_name': user.first_name,
 55 |                 'last_name': user.last_name,
 56 |                 'phone': getattr(user, 'phone', None),
 57 |                 'bot': user.bot,
 58 |                 'scam': user.scam,
 59 |                 'fake': user.fake,
 60 |                 'premium': user.premium,
 61 |                 'verified': user.verified,
 62 |                 'restricted': user.restricted,
 63 |                 'last_seen': str(datetime.now())
 64 |             }
 65 |             
 66 |             user_id = str(user.id)
 67 |             if user_id in db['users'][channel_id]:
 68 |                 # Update existing user
 69 |                 user_dict['first_seen'] = db['users'][channel_id][user_id]['first_seen']
 70 |                 db['users'][channel_id][user_id].update(user_dict)
 71 |                 updated += 1
 72 |             else:
 73 |                 # Add new user
 74 |                 user_dict['first_seen'] = str(datetime.now())
 75 |                 db['users'][channel_id][user_id] = user_dict
 76 |                 saved += 1
 77 |             
 78 |             # Show progress every 10 users
 79 |             if (saved + updated) % 10 == 0:
 80 |                 print(f"Progress: {saved + updated}/{total}")
 81 |         
 82 |         save_database(db_path, db)
 83 |         print(f"\nOperation completed!")
 84 |         print(f"New users saved: {saved}")
 85 |         print(f"Users updated: {updated}")
 86 |         return True
 87 |         
 88 |     except Exception as e:
 89 |         logger.error(f"Error saving channel users: {e}")
 90 |         print(f"\nError saving users: {str(e)}")
 91 |         return False
 92 | 
 93 | async def show_channel_users_stats(db):
 94 |     """
 95 |     Show statistics about saved users in active channel
 96 |     
 97 |     Args:
 98 |         db: Database
 99 |     """
100 |     active = get_active_channel(db)
101 |     if not active:
102 |         print("\nNo active channel selected!")
103 |         return
104 |         
105 |     channel_id = str(active['id'])
106 |     if channel_id not in db.get('users', {}):
107 |         print("\nNo saved users for this channel!")
108 |         return
109 |         
110 |     users = db['users'][channel_id]
111 |     total = len(users)
112 |     bots = sum(1 for u in users.values() if u['bot'])
113 |     premium = sum(1 for u in users.values() if u['premium'])
114 |     verified = sum(1 for u in users.values() if u['verified'])
115 |     
116 |     print(f"\nChannel Users Statistics:")
117 |     print(f"------------------------")
118 |     print(f"Total users saved: {total}")
119 |     print(f"Bots: {bots}")
120 |     print(f"Premium users: {premium}")
121 |     print(f"Verified users: {verified}")
122 |     print(f"\nLast update: {max(u['last_seen'] for u in users.values())}")
123 | 
124 | async def list_saved_users(db):
125 |     """
126 |     List users saved from active channel
127 |     
128 |     Args:
129 |         db: Database
130 |     """
131 |     active = get_active_channel(db)
132 |     if not active:
133 |         print("\nNo active channel selected!")
134 |         return
135 |         
136 |     channel_id = str(active['id'])
137 |     if channel_id not in db.get('users', {}):
138 |         print("\nNo saved users for this channel!")
139 |         return
140 |         
141 |     users = db['users'][channel_id]
142 |     if not users:
143 |         print("\nNo users found!")
144 |         return
145 |         
146 |     print("\nSaved Users:")
147 |     print("-" * 80)
148 |     print(f"{'ID':<12} | {'Username':<15} | {'Name':<20} | {'Type':<8} | {'Status'}")
149 |     print("-" * 80)
150 |     
151 |     for user_id, user in sorted(users.items(), key=lambda x: x[1].get('username') or ''):
152 |         username = f"@{user['username']}" if user['username'] else '-'
153 |         name = f"{user['first_name'] or ''} {user['last_name'] or ''}".strip() or '-'
154 |         user_type = 'Bot' if user['bot'] else 'User'
155 |         
156 |         status = []
157 |         if user['premium']: status.append('Premium')
158 |         if user['verified']: status.append('Verified')
159 |         if user['scam']: status.append('Scam')
160 |         if user['fake']: status.append('Fake')
161 |         if user['restricted']: status.append('Restricted')
162 |         
163 |         status_str = ', '.join(status) if status else '-'
164 |         
165 |         print(f"{user_id:<12} | {username:<15} | {name[:20]:<20} | {user_type:<8} | {status_str}")
166 |     
167 |     print("-" * 80)
168 |     print(f"Total Users: {len(users)}") 


--------------------------------------------------------------------------------
/docs/codebase.md:
--------------------------------------------------------------------------------
  1 | # Telegram Channel Saver - Codebase Structure
  2 | 
  3 | This document provides an overview of the modular codebase structure in Telegram Channel Saver. The application has been refactored from a monolithic script into modular components to improve maintainability and readability.
  4 | 
  5 | ## Directory Structure
  6 | 
  7 | ```
  8 | telegram-channel-saver/
  9 | ├── main.py                # Main entry point
 10 | ├── src/                   # Source code modules
 11 | │   ├── __init__.py        # Package initialization
 12 | │   ├── app.py             # Main application class
 13 | │   ├── channels.py        # Channel management functions
 14 | │   ├── client.py          # Telegram client functions
 15 | │   ├── config.py          # Configuration and constants
 16 | │   ├── database.py        # Database operations
 17 | │   ├── media.py           # Media handling functions
 18 | │   ├── messages.py        # Message operations
 19 | │   └── users.py           # User management functions
 20 | ├── temp/                  # Temporary data storage
 21 | │   ├── channel_saver/     # Database and session files
 22 | │   └── videos/            # Downloaded videos
 23 | └── docs/                  # Documentation
 24 | ```
 25 | 
 26 | ## Modules Overview
 27 | 
 28 | ### main.py
 29 | 
 30 | The entry point for the application that imports and runs the main function from `src/app.py`. This provides a clean interface for users to start the application.
 31 | 
 32 | ### src/app.py
 33 | 
 34 | The core application module containing the `ChannelSaver` class which coordinates all functionality:
 35 | - Handles Telegram client initialization
 36 | - Manages user sessions
 37 | - Provides the main menu interface
 38 | - Coordinates operations between other modules
 39 | 
 40 | ### src/config.py
 41 | 
 42 | Contains all configuration settings and constants:
 43 | - Batch sizes and timing settings
 44 | - Media download parameters
 45 | - Directory settings
 46 | - Logging configuration
 47 | 
 48 | ### src/database.py
 49 | 
 50 | Handles database operations:
 51 | - Loading and saving the JSON database
 52 | - Database schema creation
 53 | - Provides a clean interface for other modules to access data
 54 | 
 55 | ### src/client.py
 56 | 
 57 | Manages Telegram client interactions:
 58 | - Authentication and login
 59 | - Session management
 60 | - Client creation and connection
 61 | 
 62 | ### src/channels.py
 63 | 
 64 | Functions for channel management:
 65 | - Listing available channels/groups
 66 | - Displaying channel information
 67 | - Selecting active channel
 68 | - Retrieving channel statistics
 69 | 
 70 | ### src/users.py
 71 | 
 72 | User management functions:
 73 | - Saving channel users
 74 | - Displaying user statistics
 75 | - Listing saved users
 76 | 
 77 | ### src/messages.py
 78 | 
 79 | Message operations:
 80 | - Saving channel messages
 81 | - Searching through messages
 82 | - Message display and formatting
 83 | - Handling message batches with rate limiting
 84 | 
 85 | ### src/media.py
 86 | 
 87 | Media handling functionality:
 88 | - Enhanced media download mechanism with retry logic
 89 | - Video download management
 90 | - Media information display
 91 | - Chunked downloads for large files
 92 | 
 93 | ## Dependencies Between Modules
 94 | 
 95 | The codebase follows a hierarchical dependency structure:
 96 | 
 97 | 1. `config.py` - No dependencies on other modules, only standard library imports
 98 | 2. `database.py` - Depends on `config.py` for directory settings
 99 | 3. `client.py` - Depends on `config.py` and `database.py`
100 | 4. `channels.py` - Depends on `database.py` for storing channel data
101 | 5. `users.py` - Depends on `channels.py` and `database.py`
102 | 6. `media.py` - Depends on `config.py`, but has minimal dependencies
103 | 7. `messages.py` - Depends on `channels.py`, `database.py`, and `media.py`
104 | 8. `app.py` - Depends on all other modules to orchestrate the application
105 | 
106 | This structure ensures that lower-level modules don't depend on higher-level ones, reducing circular dependencies.
107 | 
108 | ## Key Functionality by Module
109 | 
110 | ### Configuration (config.py)
111 | 
112 | - Sets up logging
113 | - Defines batch sizes for message downloads
114 | - Controls delay times to avoid rate limiting
115 | - Sets timeouts and retry parameters
116 | - Configures directory paths
117 | 
118 | ### Database Operations (database.py)
119 | 
120 | - Loads JSON database or creates new if none exists
121 | - Saves database state
122 | - Provides schema for users, sessions, messages, etc.
123 | 
124 | ### Client Management (client.py)
125 | 
126 | - Creates and initializes Telegram client
127 | - Authenticates users (login, 2FA)
128 | - Manages sessions (saving, restoring)
129 | 
130 | ### Channel Management (channels.py)
131 | 
132 | - Lists all available channels/groups
133 | - Displays channel information in a readable format
134 | - Selects active channel for operations
135 | - Shows channel statistics
136 | 
137 | ### User Management (users.py)
138 | 
139 | - Retrieves and saves channel participants
140 | - Tracks user information (premium status, etc.)
141 | - Displays user statistics
142 | - Lists saved users
143 | 
144 | ### Message Operations (messages.py)
145 | 
146 | - Downloads messages in batches
147 | - Handles rate limiting and retries
148 | - Processes message content (text, reactions)
149 | - Provides search functionality
150 | 
151 | ### Media Handling (media.py)
152 | 
153 | - Downloads media with progress tracking
154 | - Handles large file downloads efficiently
155 | - Manages video downloads
156 | - Provides retry mechanisms for failed downloads
157 | 
158 | ### Application Orchestration (app.py)
159 | 
160 | - Initializes application components
161 | - Provides user interface (menu)
162 | - Coordinates between modules
163 | - Handles application flow
164 | 
165 | ## How to Extend the Codebase
166 | 
167 | ### Adding New Features
168 | 
169 | 1. Identify which module should contain your feature
170 | 2. Add necessary functions to that module
171 | 3. If needed, update `app.py` to expose the feature in the UI
172 | 4. Update configuration in `config.py` if new settings are required
173 | 
174 | ### Modifying Existing Features
175 | 
176 | 1. Locate the module containing the feature
177 | 2. Make changes while maintaining the module's responsibility
178 | 3. Ensure changes don't break dependencies
179 | 4. Update documentation to reflect changes
180 | 
181 | ## Best Practices
182 | 
183 | When working with this codebase:
184 | 
185 | 1. **Maintain separation of concerns**: Keep each module focused on its specific responsibility
186 | 2. **Avoid circular dependencies**: Lower-level modules shouldn't import higher-level ones
187 | 3. **Update configuration**: Use `config.py` for constants rather than hardcoding values
188 | 4. **Follow existing patterns**: Maintain consistency with the established code style
189 | 5. **Document changes**: Update comments and documentation when making significant changes 


--------------------------------------------------------------------------------
/src/channels.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Channel management module.
  3 | Handles operations related to Telegram channels and groups.
  4 | """
  5 | import logging
  6 | from datetime import datetime
  7 | 
  8 | from src.database import save_database
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | async def list_channels(client):
 13 |     """
 14 |     List all channels/groups user is subscribed to
 15 |     
 16 |     Args:
 17 |         client: Telegram client
 18 |         
 19 |     Returns:
 20 |         list: List of channel information
 21 |     """
 22 |     try:
 23 |         dialogs = await client.get_dialogs()
 24 |         channels = []
 25 |         
 26 |         # Collect channel info
 27 |         for i, dialog in enumerate(dialogs, 1):
 28 |             if dialog.is_channel or dialog.is_group:
 29 |                 entity = dialog.entity
 30 |                 
 31 |                 # Store channel info
 32 |                 channel_info = {
 33 |                     'id': entity.id,
 34 |                     'title': entity.title,
 35 |                     'username': getattr(entity, 'username', None),
 36 |                     'participants_count': getattr(entity, 'participants_count', 0),
 37 |                     'type': 'Channel' if dialog.is_channel else 'Group',
 38 |                     'index': i
 39 |                 }
 40 |                 channels.append(channel_info)
 41 |         
 42 |         # Sort channels by member count (descending)
 43 |         channels.sort(key=lambda x: x['participants_count'] or 0, reverse=True)
 44 |         
 45 |         # Update indices after sorting
 46 |         for i, channel in enumerate(channels, 1):
 47 |             channel['index'] = i
 48 |         
 49 |         return channels
 50 |     except Exception as e:
 51 |         logger.error(f"Error listing channels: {e}")
 52 |         return []
 53 | 
 54 | def display_channels(channels):
 55 |     """
 56 |     Display channels in a tabular format
 57 |     
 58 |     Args:
 59 |         channels: List of channel information
 60 |     """
 61 |     if not channels:
 62 |         print("\nNo channels/groups found!")
 63 |         return
 64 |         
 65 |     # Print header
 66 |     print("\nAvailable Channels and Groups:")
 67 |     print(f"{'#':>3} | {'Members':>7} | {'Type':^7} | {'Title':<30} | {'Username':<15}")
 68 |     print("-" * 70)
 69 |     
 70 |     # Print each channel on one line
 71 |     for channel in channels:
 72 |         # Format members count
 73 |         members = f"{channel['participants_count']:,}" if channel['participants_count'] else 'N/A'
 74 |         members = members[:7]  # Limit length
 75 |         
 76 |         # Format username
 77 |         username = f"@{channel['username']}" if channel['username'] else '-'
 78 |         username = username[:15]  # Limit length
 79 |         
 80 |         # Format title (with ellipsis if too long)
 81 |         title = channel['title']
 82 |         if len(title) > 30:
 83 |             title = title[:27] + "..."
 84 |         
 85 |         print(f"{channel['index']:3} | {members:>7} | {channel['type']:<7} | {title:<30} | {username:<15}")
 86 |     
 87 |     print("-" * 70)
 88 |     print(f"Total: {len(channels)} channels/groups")
 89 | 
 90 | async def select_active_channel(client, db, db_path):
 91 |     """
 92 |     Select active channel/group
 93 |     
 94 |     Args:
 95 |         client: Telegram client
 96 |         db: Database
 97 |         db_path: Path to database file
 98 |         
 99 |     Returns:
100 |         bool: True if channel selected, False otherwise
101 |     """
102 |     channels = await list_channels(client)
103 |     display_channels(channels)
104 |     
105 |     if not channels:
106 |         print("\nNo channels/groups found!")
107 |         return False
108 |     
109 |     while True:
110 |         try:
111 |             choice = input("\nEnter channel number to select (or 0 to cancel): ")
112 |             if choice == '0':
113 |                 return False
114 |             
115 |             index = int(choice)
116 |             selected = next((c for c in channels if c['index'] == index), None)
117 |             
118 |             if selected:
119 |                 # Update active channel in database
120 |                 db['active_channel'] = selected
121 |                 save_database(db_path, db)
122 |                 print(f"\nSelected channel: {selected['title']}")
123 |                 return True
124 |             else:
125 |                 print("\nInvalid channel number!")
126 |         except ValueError:
127 |             print("\nPlease enter a valid number!")
128 | 
129 | def get_active_channel(db):
130 |     """
131 |     Get currently active channel from database
132 |     
133 |     Args:
134 |         db: Database
135 |         
136 |     Returns:
137 |         dict: Active channel information
138 |     """
139 |     return db.get('active_channel')
140 | 
141 | async def show_active_channel(client, db):
142 |     """
143 |     Display information about active channel
144 |     
145 |     Args:
146 |         client: Telegram client
147 |         db: Database
148 |     """
149 |     active = get_active_channel(db)
150 |     if active:
151 |         print("\nActive Channel/Group:")
152 |         print("--------------------")
153 |         print(f"Title: {active['title']}")
154 |         print(f"Type: {active['type']}")
155 |         if active['username']:
156 |             print(f"Username: @{active['username']}")
157 |         print(f"ID: {active['id']}")
158 |         print(f"Members: {active['participants_count']}")
159 |         
160 |         # Get message count information
161 |         try:
162 |             channel_id = str(active['id'])
163 |             
164 |             # Check if we have messages saved in DB
165 |             saved_count = 0
166 |             if 'messages' in db and channel_id in db.get('messages', {}):
167 |                 saved_count = len(db['messages'][channel_id])
168 |             
169 |             print(f"Saved Messages: {saved_count}")
170 |             
171 |             # Get total message count from the server
172 |             print("Fetching total message count from server...")
173 |             # Get first message (oldest)
174 |             first_message = None
175 |             last_message = None
176 |             async for msg in client.iter_messages(active['id'], limit=1, reverse=True):
177 |                 first_message = msg
178 |             
179 |             # Get last message (newest) 
180 |             async for msg in client.iter_messages(active['id'], limit=1):
181 |                 last_message = msg
182 |                 
183 |             if first_message and last_message:
184 |                 total = last_message.id - first_message.id + 1
185 |                 print(f"Total Messages (estimate): {total}")
186 |                 print(f"First Message ID: {first_message.id}")
187 |                 print(f"Last Message ID: {last_message.id}")
188 |                 
189 |                 # Get video counts
190 |                 saved_videos_count = 0
191 |                 if 'videos' in db and channel_id in db.get('videos', {}):
192 |                     saved_videos_count = len(db['videos'][channel_id])
193 |                 
194 |                 print(f"Saved Videos: {saved_videos_count}")
195 |                 
196 |                 # Check for media in saved messages
197 |                 media_count = 0
198 |                 if 'messages' in db and channel_id in db.get('messages', {}):
199 |                     media_count = sum(1 for msg in db['messages'][channel_id].values() 
200 |                                     if msg.get('has_media'))
201 |                 
202 |                 print(f"Messages with Media: {media_count}")
203 |             else:
204 |                 print("Unable to determine total message count.")
205 |             
206 |         except Exception as e:
207 |             logger.error(f"Error getting message count: {e}")
208 |             print("Unable to determine message count.")
209 |     else:
210 |         print("\nNo active channel selected!") 


--------------------------------------------------------------------------------
/src/image_analysis.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Image analysis module using OpenRouter API.
  3 | Provides functionality to analyze images using AI models.
  4 | """
  5 | import os
  6 | import logging
  7 | import base64
  8 | import requests
  9 | from typing import Optional, Dict, Any
 10 | 
 11 | from src.config import OPENROUTER_API_KEY, OPENROUTER_BASE_URL, OPENROUTER_MODEL, OPENROUTER_TIMEOUT
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | def encode_image_to_base64(image_path: str) -> str:
 16 |     """
 17 |     Encode an image file to base64 string.
 18 |     
 19 |     Args:
 20 |         image_path: Path to the image file
 21 |         
 22 |     Returns:
 23 |         Base64 encoded string of the image
 24 |     """
 25 |     with open(image_path, "rb") as image_file:
 26 |         return base64.b64encode(image_file.read()).decode('utf-8')
 27 | 
 28 | def get_image_mime_type(image_path: str) -> str:
 29 |     """
 30 |     Get the MIME type of an image based on its file extension.
 31 |     
 32 |     Args:
 33 |         image_path: Path to the image file
 34 |         
 35 |     Returns:
 36 |         MIME type string (e.g., 'image/jpeg', 'image/png')
 37 |     """
 38 |     extension = os.path.splitext(image_path)[1].lower()
 39 |     mime_types = {
 40 |         '.jpg': 'image/jpeg',
 41 |         '.jpeg': 'image/jpeg',
 42 |         '.png': 'image/png',
 43 |         '.gif': 'image/gif',
 44 |         '.webp': 'image/webp',
 45 |         '.bmp': 'image/bmp'
 46 |     }
 47 |     return mime_types.get(extension, 'image/jpeg')
 48 | 
 49 | def analyze_image_with_openrouter(image_path: str, prompt: str = None) -> Dict[str, Any]:
 50 |     """
 51 |     Analyze an image using OpenRouter API with GPT-4 Vision.
 52 |     
 53 |     Args:
 54 |         image_path: Path to the image file
 55 |         prompt: Optional custom prompt for analysis
 56 |         
 57 |     Returns:
 58 |         Dict containing analysis result or error information
 59 |     """
 60 |     # Reload environment variables to catch any updates
 61 |     from dotenv import load_dotenv
 62 |     load_dotenv()
 63 |     api_key = os.getenv('OPENROUTER_API_KEY')
 64 |     
 65 |     if not api_key:
 66 |         return {
 67 |             'success': False,
 68 |             'error': 'OpenRouter API key not configured. Please set OPENROUTER_API_KEY environment variable.'
 69 |         }
 70 |     
 71 |     if not os.path.exists(image_path):
 72 |         return {
 73 |             'success': False,
 74 |             'error': f'Image file not found: {image_path}'
 75 |         }
 76 |     
 77 |     try:
 78 |         # Encode image to base64
 79 |         base64_image = encode_image_to_base64(image_path)
 80 |         mime_type = get_image_mime_type(image_path)
 81 |         data_url = f"data:{mime_type};base64,{base64_image}"
 82 |         
 83 |         # Default prompt for image analysis
 84 |         if prompt is None:
 85 |             prompt = ("Describe what you see in this image in detail. "
 86 |                      "Include objects, people, text, colors, setting, and any other relevant details. "
 87 |                      "If there is text in the image, transcribe it. "
 88 |                      "Keep the description concise but comprehensive.")
 89 |         
 90 |         # Prepare the request
 91 |         url = f"{OPENROUTER_BASE_URL}/chat/completions"
 92 |         headers = {
 93 |             "Authorization": f"Bearer {api_key}",
 94 |             "Content-Type": "application/json"
 95 |         }
 96 |         
 97 |         messages = [
 98 |             {
 99 |                 "role": "user",
100 |                 "content": [
101 |                     {
102 |                         "type": "text",
103 |                         "text": prompt
104 |                     },
105 |                     {
106 |                         "type": "image_url",
107 |                         "image_url": {
108 |                             "url": data_url
109 |                         }
110 |                     }
111 |                 ]
112 |             }
113 |         ]
114 |         
115 |         payload = {
116 |             "model": OPENROUTER_MODEL,
117 |             "messages": messages,
118 |             "max_tokens": 1000
119 |         }
120 |         
121 |         logger.info(f"Analyzing image: {image_path}")
122 |         response = requests.post(url, headers=headers, json=payload, timeout=OPENROUTER_TIMEOUT)
123 |         
124 |         if response.status_code == 200:
125 |             result = response.json()
126 |             
127 |             if 'choices' in result and len(result['choices']) > 0:
128 |                 analysis = result['choices'][0]['message']['content']
129 |                 logger.info(f"Image analysis completed successfully for: {image_path}")
130 |                 
131 |                 return {
132 |                     'success': True,
133 |                     'analysis': analysis,
134 |                     'model': OPENROUTER_MODEL,
135 |                     'image_path': image_path,
136 |                     'usage': result.get('usage', {})
137 |                 }
138 |             else:
139 |                 logger.error(f"No analysis content in response: {result}")
140 |                 return {
141 |                     'success': False,
142 |                     'error': 'No analysis content received from API'
143 |                 }
144 |         else:
145 |             logger.error(f"OpenRouter API error: {response.status_code} - {response.text}")
146 |             return {
147 |                 'success': False,
148 |                 'error': f'API request failed with status {response.status_code}: {response.text}'
149 |             }
150 |             
151 |     except requests.exceptions.Timeout:
152 |         logger.error(f"Timeout while analyzing image: {image_path}")
153 |         return {
154 |             'success': False,
155 |             'error': 'Request timed out while analyzing image'
156 |         }
157 |     except requests.exceptions.RequestException as e:
158 |         logger.error(f"Network error while analyzing image: {image_path} - {str(e)}")
159 |         return {
160 |             'success': False,
161 |             'error': f'Network error: {str(e)}'
162 |         }
163 |     except Exception as e:
164 |         logger.error(f"Unexpected error analyzing image: {image_path} - {str(e)}")
165 |         return {
166 |             'success': False,
167 |             'error': f'Unexpected error: {str(e)}'
168 |         }
169 | 
170 | def analyze_multiple_images(image_paths: list, prompt: str = None) -> Dict[str, Any]:
171 |     """
172 |     Analyze multiple images that belong to the same media group.
173 |     
174 |     Args:
175 |         image_paths: List of image file paths
176 |         prompt: Optional custom prompt for analysis
177 |         
178 |     Returns:
179 |         Dict containing analysis result or error information
180 |     """
181 |     # Reload environment variables to catch any updates
182 |     from dotenv import load_dotenv
183 |     load_dotenv()
184 |     api_key = os.getenv('OPENROUTER_API_KEY')
185 |     
186 |     if not api_key:
187 |         return {
188 |             'success': False,
189 |             'error': 'OpenRouter API key not configured. Please set OPENROUTER_API_KEY environment variable.'
190 |         }
191 |     
192 |     # Filter out non-existent files
193 |     existing_images = [path for path in image_paths if os.path.exists(path)]
194 |     if not existing_images:
195 |         return {
196 |             'success': False,
197 |             'error': 'No valid image files found'
198 |         }
199 |     
200 |     try:
201 |         # Default prompt for multiple images
202 |         if prompt is None:
203 |             prompt = ("Describe what you see in these images. "
204 |                      "These images are part of the same message or media group. "
205 |                      "Describe each image and explain how they relate to each other. "
206 |                      "Include objects, people, text, colors, setting, and any other relevant details. "
207 |                      "If there is text in any image, transcribe it. "
208 |                      "Keep the description concise but comprehensive.")
209 |         
210 |         # Prepare the request
211 |         url = f"{OPENROUTER_BASE_URL}/chat/completions"
212 |         headers = {
213 |             "Authorization": f"Bearer {api_key}",
214 |             "Content-Type": "application/json"
215 |         }
216 |         
217 |         # Build content array with text and all images
218 |         content = [
219 |             {
220 |                 "type": "text",
221 |                 "text": prompt
222 |             }
223 |         ]
224 |         
225 |         # Add each image to the content
226 |         for image_path in existing_images:
227 |             base64_image = encode_image_to_base64(image_path)
228 |             mime_type = get_image_mime_type(image_path)
229 |             data_url = f"data:{mime_type};base64,{base64_image}"
230 |             
231 |             content.append({
232 |                 "type": "image_url",
233 |                 "image_url": {
234 |                     "url": data_url
235 |                 }
236 |             })
237 |         
238 |         messages = [
239 |             {
240 |                 "role": "user",
241 |                 "content": content
242 |             }
243 |         ]
244 |         
245 |         payload = {
246 |             "model": OPENROUTER_MODEL,
247 |             "messages": messages,
248 |             "max_tokens": 1500  # Increased for multiple images
249 |         }
250 |         
251 |         logger.info(f"Analyzing {len(existing_images)} images as media group")
252 |         response = requests.post(url, headers=headers, json=payload, timeout=OPENROUTER_TIMEOUT)
253 |         
254 |         if response.status_code == 200:
255 |             result = response.json()
256 |             
257 |             if 'choices' in result and len(result['choices']) > 0:
258 |                 analysis = result['choices'][0]['message']['content']
259 |                 logger.info(f"Media group analysis completed successfully for {len(existing_images)} images")
260 |                 
261 |                 return {
262 |                     'success': True,
263 |                     'analysis': analysis,
264 |                     'model': OPENROUTER_MODEL,
265 |                     'image_paths': existing_images,
266 |                     'image_count': len(existing_images),
267 |                     'usage': result.get('usage', {})
268 |                 }
269 |             else:
270 |                 logger.error(f"No analysis content in response: {result}")
271 |                 return {
272 |                     'success': False,
273 |                     'error': 'No analysis content received from API'
274 |                 }
275 |         else:
276 |             logger.error(f"OpenRouter API error: {response.status_code} - {response.text}")
277 |             return {
278 |                 'success': False,
279 |                 'error': f'API request failed with status {response.status_code}: {response.text}'
280 |             }
281 |             
282 |     except requests.exceptions.Timeout:
283 |         logger.error(f"Timeout while analyzing media group")
284 |         return {
285 |             'success': False,
286 |             'error': 'Request timed out while analyzing images'
287 |         }
288 |     except requests.exceptions.RequestException as e:
289 |         logger.error(f"Network error while analyzing media group: {str(e)}")
290 |         return {
291 |             'success': False,
292 |             'error': f'Network error: {str(e)}'
293 |         }
294 |     except Exception as e:
295 |         logger.error(f"Unexpected error analyzing media group: {str(e)}")
296 |         return {
297 |             'success': False,
298 |             'error': f'Unexpected error: {str(e)}'
299 |         }


--------------------------------------------------------------------------------
/src/formatting.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Formatting preservation utilities for search and replace.
  3 | Uses Telegram's native entity format (raw_text + entities).
  4 | """
  5 | import re
  6 | import copy
  7 | from telethon.tl import types
  8 | from telethon.extensions import markdown
  9 | 
 10 | # All supported entity types for serialization/deserialization
 11 | ENTITY_TYPES = {
 12 |     'MessageEntityBold': types.MessageEntityBold,
 13 |     'MessageEntityItalic': types.MessageEntityItalic,
 14 |     'MessageEntityStrike': types.MessageEntityStrike,
 15 |     'MessageEntityUnderline': types.MessageEntityUnderline,
 16 |     'MessageEntityCode': types.MessageEntityCode,
 17 |     'MessageEntityPre': types.MessageEntityPre,
 18 |     'MessageEntityTextUrl': types.MessageEntityTextUrl,
 19 |     'MessageEntityUrl': types.MessageEntityUrl,
 20 |     'MessageEntityMention': types.MessageEntityMention,
 21 |     'MessageEntityMentionName': types.MessageEntityMentionName,
 22 |     'MessageEntityHashtag': types.MessageEntityHashtag,
 23 |     'MessageEntityCashtag': types.MessageEntityCashtag,
 24 |     'MessageEntityBotCommand': types.MessageEntityBotCommand,
 25 |     'MessageEntityEmail': types.MessageEntityEmail,
 26 |     'MessageEntityPhone': types.MessageEntityPhone,
 27 |     'MessageEntityBlockquote': types.MessageEntityBlockquote,
 28 |     'MessageEntitySpoiler': types.MessageEntitySpoiler,
 29 |     'MessageEntityCustomEmoji': types.MessageEntityCustomEmoji,
 30 | }
 31 | 
 32 | 
 33 | def entity_to_dict(entity):
 34 |     """
 35 |     Convert a MessageEntity to a JSON-serializable dict.
 36 | 
 37 |     Args:
 38 |         entity: A Telethon MessageEntity object
 39 | 
 40 |     Returns:
 41 |         dict: Serializable representation of the entity
 42 |     """
 43 |     if entity is None:
 44 |         return None
 45 | 
 46 |     d = {
 47 |         '_type': type(entity).__name__,
 48 |         'offset': entity.offset,
 49 |         'length': entity.length
 50 |     }
 51 | 
 52 |     # Type-specific attributes
 53 |     if hasattr(entity, 'url') and entity.url:
 54 |         d['url'] = entity.url
 55 |     if hasattr(entity, 'language') and entity.language:
 56 |         d['language'] = entity.language
 57 |     if hasattr(entity, 'user_id') and entity.user_id:
 58 |         d['user_id'] = entity.user_id
 59 |     if hasattr(entity, 'document_id') and entity.document_id:
 60 |         d['document_id'] = entity.document_id
 61 |     if hasattr(entity, 'collapsed') and entity.collapsed:
 62 |         d['collapsed'] = entity.collapsed
 63 | 
 64 |     return d
 65 | 
 66 | 
 67 | def dict_to_entity(d):
 68 |     """
 69 |     Convert a dict back to a MessageEntity object.
 70 | 
 71 |     Args:
 72 |         d: Dictionary with entity data
 73 | 
 74 |     Returns:
 75 |         MessageEntity object or None if type unknown
 76 |     """
 77 |     if not d or '_type' not in d:
 78 |         return None
 79 | 
 80 |     entity_type = d['_type']
 81 |     offset = d.get('offset', 0)
 82 |     length = d.get('length', 0)
 83 | 
 84 |     cls = ENTITY_TYPES.get(entity_type)
 85 |     if not cls:
 86 |         return None
 87 | 
 88 |     # Handle different constructor signatures
 89 |     try:
 90 |         if entity_type == 'MessageEntityTextUrl':
 91 |             return cls(offset, length, d.get('url', ''))
 92 |         elif entity_type == 'MessageEntityPre':
 93 |             return cls(offset, length, d.get('language', ''))
 94 |         elif entity_type == 'MessageEntityMentionName':
 95 |             return cls(offset, length, d.get('user_id', 0))
 96 |         elif entity_type == 'MessageEntityCustomEmoji':
 97 |             return cls(offset, length, d.get('document_id', 0))
 98 |         elif entity_type == 'MessageEntityBlockquote':
 99 |             return cls(offset, length, collapsed=d.get('collapsed', False))
100 |         else:
101 |             return cls(offset, length)
102 |     except Exception:
103 |         return None
104 | 
105 | 
106 | def entities_to_dicts(entities):
107 |     """
108 |     Convert a list of entities to list of dicts.
109 | 
110 |     Args:
111 |         entities: List of MessageEntity objects or None
112 | 
113 |     Returns:
114 |         list: List of serializable dicts
115 |     """
116 |     if not entities:
117 |         return []
118 |     return [entity_to_dict(e) for e in entities if e is not None]
119 | 
120 | 
121 | def dicts_to_entities(dicts):
122 |     """
123 |     Convert a list of dicts back to MessageEntity objects.
124 | 
125 |     Args:
126 |         dicts: List of entity dicts or None
127 | 
128 |     Returns:
129 |         list: List of MessageEntity objects
130 |     """
131 |     if not dicts:
132 |         return []
133 |     entities = [dict_to_entity(d) for d in dicts]
134 |     return [e for e in entities if e is not None]
135 | 
136 | 
137 | def search_replace_with_entities(raw_text, entities, search, replace, case_sensitive=True):
138 |     """
139 |     Replace text while adjusting entity offsets and lengths.
140 |     Works with Telegram's native format.
141 | 
142 |     Args:
143 |         raw_text: Plain text without formatting markers
144 |         entities: List of MessageEntity objects or dicts
145 |         search: String to find
146 |         replace: String to replace with
147 |         case_sensitive: Whether search is case sensitive
148 | 
149 |     Returns:
150 |         tuple: (new_raw_text, new_entities_as_dicts, replacement_count)
151 |     """
152 |     if not raw_text or not search:
153 |         return raw_text, entities_to_dicts(entities) if entities else [], 0
154 | 
155 |     # Convert dicts to entity objects if needed
156 |     entity_objs = []
157 |     for e in (entities or []):
158 |         if isinstance(e, dict):
159 |             obj = dict_to_entity(e)
160 |             if obj:
161 |                 entity_objs.append(obj)
162 |         else:
163 |             entity_objs.append(copy.copy(e))
164 | 
165 |     entity_objs = sorted(entity_objs, key=lambda e: e.offset)
166 | 
167 |     new_text = raw_text
168 |     offset_delta = 0
169 |     replacement_count = 0
170 | 
171 |     # Build regex pattern
172 |     flags = 0 if case_sensitive else re.IGNORECASE
173 |     pattern = re.compile(re.escape(search), flags)
174 | 
175 |     # Find all matches in original text
176 |     for match in pattern.finditer(raw_text):
177 |         start = match.start()
178 |         end = match.end()
179 |         len_diff = len(replace) - len(search)
180 | 
181 |         # Apply replacement at adjusted position
182 |         adjusted_start = start + offset_delta
183 |         new_text = new_text[:adjusted_start] + replace + new_text[adjusted_start + len(search):]
184 |         replacement_count += 1
185 | 
186 |         # Adjust entity offsets and lengths
187 |         for ent in entity_objs:
188 |             ent_start = ent.offset
189 |             ent_end = ent.offset + ent.length
190 | 
191 |             # Entity is completely AFTER the replacement
192 |             if ent_start >= end:
193 |                 ent.offset += len_diff
194 | 
195 |             # Entity CONTAINS the replacement (replacement is inside entity)
196 |             elif ent_start <= start and ent_end >= end:
197 |                 ent.length += len_diff
198 | 
199 |             # Replacement OVERLAPS entity start (partial overlap from left)
200 |             elif start < ent_start < end:
201 |                 overlap = end - ent_start
202 |                 ent.offset = adjusted_start + len(replace)
203 |                 ent.length = max(0, ent.length - overlap)
204 | 
205 |             # Replacement OVERLAPS entity end (partial overlap from right)
206 |             elif start < ent_end <= end:
207 |                 overlap = ent_end - start
208 |                 ent.length = max(0, ent.length - overlap)
209 | 
210 |         offset_delta += len_diff
211 | 
212 |     # Also replace in URL attributes of TextUrl entities
213 |     for ent in entity_objs:
214 |         if isinstance(ent, types.MessageEntityTextUrl) and ent.url:
215 |             if case_sensitive:
216 |                 ent.url = ent.url.replace(search, replace)
217 |             else:
218 |                 # Case-insensitive URL replacement
219 |                 ent.url = re.sub(re.escape(search), replace, ent.url, flags=re.IGNORECASE)
220 | 
221 |     # Remove zero-length entities and convert back to dicts
222 |     new_entities = [entity_to_dict(e) for e in entity_objs if e.length > 0]
223 | 
224 |     return new_text, new_entities, replacement_count
225 | 
226 | 
227 | def entities_to_markdown(raw_text, entities):
228 |     """
229 |     Convert raw_text + entities to markdown for display.
230 | 
231 |     Args:
232 |         raw_text: Plain text
233 |         entities: List of entity dicts or MessageEntity objects
234 | 
235 |     Returns:
236 |         str: Markdown-formatted text
237 |     """
238 |     if not raw_text:
239 |         return ''
240 | 
241 |     entity_objs = dicts_to_entities(entities) if entities else []
242 | 
243 |     try:
244 |         return markdown.unparse(raw_text, entity_objs)
245 |     except Exception:
246 |         # Fallback to raw text if unparse fails
247 |         return raw_text
248 | 
249 | 
250 | def get_entities_from_markdown(text):
251 |     """
252 |     Parse markdown to get raw_text and entities.
253 |     Used for legacy data that doesn't have entities stored.
254 | 
255 |     Args:
256 |         text: Markdown-formatted text
257 | 
258 |     Returns:
259 |         tuple: (raw_text, entities_as_dicts)
260 |     """
261 |     if not text:
262 |         return '', []
263 | 
264 |     try:
265 |         raw_text, entity_objs = markdown.parse(text)
266 |         return raw_text, entities_to_dicts(entity_objs)
267 |     except Exception:
268 |         # If parsing fails, return text as-is with no entities
269 |         return text, []
270 | 
271 | 
272 | def get_message_entities(message_dict):
273 |     """
274 |     Get entities from a message dict, with fallback to parsing markdown.
275 | 
276 |     Args:
277 |         message_dict: Message dictionary from database
278 | 
279 |     Returns:
280 |         tuple: (raw_text, entities_as_dicts)
281 |     """
282 |     # If entities are stored, use them with raw_text
283 |     if 'entities' in message_dict and message_dict['entities']:
284 |         raw_text = message_dict.get('raw_text') or ''
285 |         entities = message_dict['entities']
286 |         return raw_text, entities
287 | 
288 |     # Fallback: parse from markdown text field
289 |     text = message_dict.get('text') or ''
290 |     if text:
291 |         return get_entities_from_markdown(text)
292 | 
293 |     # Last resort: use raw_text with no entities
294 |     return message_dict.get('raw_text') or '', []
295 | 
296 | 
297 | def apply_replacement_to_message(message_dict, search, replace, case_sensitive=True):
298 |     """
299 |     Apply search-replace to a message and return updated fields.
300 | 
301 |     Args:
302 |         message_dict: Original message dictionary
303 |         search: Text to find
304 |         replace: Text to replace with
305 |         case_sensitive: Whether search is case sensitive
306 | 
307 |     Returns:
308 |         dict: Updated fields (raw_text, entities, text) or None if no changes
309 |     """
310 |     raw_text, entities = get_message_entities(message_dict)
311 | 
312 |     # Handle None or empty raw_text
313 |     if not raw_text:
314 |         return None
315 | 
316 |     # Check if search term exists
317 |     if case_sensitive:
318 |         if search not in raw_text:
319 |             # Also check URLs in entities
320 |             has_url_match = False
321 |             for ent in entities:
322 |                 if isinstance(ent, dict) and ent.get('url') and search in ent['url']:
323 |                     has_url_match = True
324 |                     break
325 |             if not has_url_match:
326 |                 return None
327 |     else:
328 |         if search.lower() not in raw_text.lower():
329 |             has_url_match = False
330 |             for ent in entities:
331 |                 if isinstance(ent, dict) and ent.get('url') and search.lower() in ent['url'].lower():
332 |                     has_url_match = True
333 |                     break
334 |             if not has_url_match:
335 |                 return None
336 | 
337 |     # Apply replacement
338 |     new_raw, new_entities, count = search_replace_with_entities(
339 |         raw_text, entities, search, replace, case_sensitive
340 |     )
341 | 
342 |     if count == 0:
343 |         # Check if only URL was changed
344 |         old_urls = [e.get('url', '') for e in entities if isinstance(e, dict)]
345 |         new_urls = [e.get('url', '') for e in new_entities if isinstance(e, dict)]
346 |         if old_urls == new_urls:
347 |             return None
348 | 
349 |     # Generate new markdown
350 |     new_markdown = entities_to_markdown(new_raw, new_entities)
351 | 
352 |     return {
353 |         'raw_text': new_raw,
354 |         'entities': new_entities,
355 |         'text': new_markdown,
356 |         'replacement_count': count
357 |     }
358 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Telegram Channel Saver
  2 | 
  3 | <div align="center">
  4 | 
  5 | ![Python](https://img.shields.io/badge/Python-3.8+-3776AB?style=for-the-badge&logo=python&logoColor=white)
  6 | ![Telegram](https://img.shields.io/badge/Telegram_API-26A5E4?style=for-the-badge&logo=telegram&logoColor=white)
  7 | ![Telethon](https://img.shields.io/badge/Telethon-MTProto-FF6B6B?style=for-the-badge)
  8 | ![License](https://img.shields.io/badge/License-MIT-green?style=for-the-badge)
  9 | ![Claude](https://img.shields.io/badge/Built_with-Claude_AI-CC785C?style=for-the-badge&logo=anthropic&logoColor=white)
 10 | 
 11 | **Build your own Telegram client with Telethon**
 12 | 
 13 | *The foundation for the first Telegram-CMS system*
 14 | 
 15 | [Features](#-features) • [Installation](#-installation) • [Usage](#-usage) • [Documentation](#-documentation) • [Contributing](#-contributing)
 16 | 
 17 | </div>
 18 | 
 19 | ---
 20 | 
 21 | ## About This Project
 22 | 
 23 | This project demonstrates how to build a **personal Telegram client** using the [Telethon](https://github.com/LonamiWebs/Telethon) library to manage your Telegram messages at scale. It serves as both a practical tool and an educational resource for developers interested in:
 24 | 
 25 | - Building custom Telegram applications beyond standard bots
 26 | - Managing channel content programmatically
 27 | - Creating backup and archival systems for Telegram data
 28 | - Understanding MTProto protocol interactions
 29 | 
 30 | > **Vision**: This codebase is the foundation for a **Telegram-CMS** - a content management system for Telegram channels and groups, enabling publishers to manage, edit, and organize their content.
 31 | 
 32 | ---
 33 | 
 34 | ## Features
 35 | 
 36 | ### Channel & Group Management
 37 | 
 38 | | Feature | Description |
 39 | |---------|-------------|
 40 | | **Multi-Channel Support** | Connect to and manage multiple channels and groups from a single interface |
 41 | | **Channel Selection** | Browse all your subscribed channels/groups and select which one to work with |
 42 | | **Channel Info Display** | View detailed information about channels including member counts and activity |
 43 | | **Permission Detection** | Automatically detects admin rights for editing capabilities |
 44 | 
 45 | ### Message Operations
 46 | 
 47 | | Feature | Description |
 48 | |---------|-------------|
 49 | | **Bulk Message Download** | Download all messages or specify ranges (by count, ID range, or recent) |
 50 | | **Incremental Sync** | Download only new messages since last sync to save time and bandwidth |
 51 | | **Force Redownload** | Option to re-fetch all messages when needed |
 52 | | **Reaction Tracking** | Saves emoji reactions and reaction counts for each message |
 53 | | **Reply Threading** | Preserves reply relationships between messages |
 54 | | **Rate Limit Compliance** | Automatic rate limiting to respect Telegram's API limits (100 msgs/request) |
 55 | 
 56 | ### Search & Replace (Channel Editing)
 57 | 
 58 | | Feature | Description |
 59 | |---------|-------------|
 60 | | **Local Search** | Search through locally saved messages by text, regex, date, or ID |
 61 | | **Bulk Search & Replace** | Find and replace text across multiple messages |
 62 | | **Formatting Preservation** | Maintains all Telegram formatting (bold, italic, links, etc.) during edits |
 63 | | **Native Entity Handling** | Works with Telegram's native entity format for pixel-perfect formatting |
 64 | | **One-by-One Approval** | Review each change before applying with preview |
 65 | | **Live Channel Editing** | Edit messages directly on Telegram (requires admin rights) |
 66 | | **Undo Capability** | Instantly undo the last edit during batch operations |
 67 | | **Backup & Restore** | All edited messages are backed up; restore originals anytime |
 68 | 
 69 | ### Message Browsing
 70 | 
 71 | | Feature | Description |
 72 | |---------|-------------|
 73 | | **Paginated View** | Browse messages 10 per page with easy navigation |
 74 | | **Jump Navigation** | Jump to specific message ID or page number |
 75 | | **HTML Source View** | View the raw HTML/markdown source of any message |
 76 | | **Message Preview** | See ID, date, sender, and content snippet at a glance |
 77 | 
 78 | ### Media Handling
 79 | 
 80 | | Feature | Description |
 81 | |---------|-------------|
 82 | | **Video Downloads** | Download all videos or just video circles (round videos) |
 83 | | **Photo Support** | Download and track photos attached to messages |
 84 | | **Chunked Downloads** | Large files downloaded in chunks for reliability |
 85 | | **Progress Indication** | Real-time progress during downloads |
 86 | | **Retry Mechanism** | Exponential backoff for failed downloads |
 87 | | **Timeout Handling** | Configurable timeouts for slow connections |
 88 | 
 89 | ### Export Capabilities
 90 | 
 91 | | Feature | Description |
 92 | |---------|-------------|
 93 | | **Full Channel Export** | Export all messages to formatted text files |
 94 | | **User-Specific Export** | Export only messages from a specific user |
 95 | | **Individual Message Export** | Export single messages with full context |
 96 | | **AI Image Analysis** | Optional GPT-4 powered analysis of images via OpenRouter |
 97 | | **Statistics** | View channel stats (message count, media count, users) |
 98 | | **Structured Output** | Exports include timestamps, usernames, and reply context |
 99 | 
100 | ### User Management
101 | 
102 | | Feature | Description |
103 | |---------|-------------|
104 | | **User Tracking** | Save and track all users in a channel |
105 | | **User Statistics** | View activity stats and user information |
106 | | **User Message History** | Find all messages from a specific user |
107 | | **Multi-Session Support** | Manage multiple Telegram accounts |
108 | | **Session Cleanup** | Remove invalid or expired sessions |
109 | 
110 | ### Search Features
111 | 
112 | | Feature | Description |
113 | |---------|-------------|
114 | | **Text Search** | Find messages containing specific text |
115 | | **Date Range Filter** | Search within specific time periods |
116 | | **ID-Based Search** | Look up messages by their ID |
117 | | **Reaction Filter** | Find messages with specific reactions |
118 | | **Media Filter** | Filter messages that contain media |
119 | 
120 | ---
121 | 
122 | ## Project Structure
123 | 
124 | ```
125 | /telegram-channel-saver/
126 |   ├── main.py               # Application entry point
127 |   ├── requirements.txt      # Python dependencies
128 |   ├── LICENSE               # MIT License
129 |   ├── CLAUDE.md             # AI development guidelines
130 |   │
131 |   ├── src/                  # Source code
132 |   │   ├── app.py            # Main application class
133 |   │   ├── channels.py       # Channel management
134 |   │   ├── client.py         # Telegram client operations
135 |   │   ├── config.py         # Configuration settings
136 |   │   ├── database.py       # JSON database operations
137 |   │   ├── export.py         # Export functionality
138 |   │   ├── formatting.py     # Entity & formatting utilities
139 |   │   ├── media.py          # Media file handling
140 |   │   ├── messages.py       # Message operations
141 |   │   ├── search_replace.py # Search & replace engine
142 |   │   └── users.py          # User tracking
143 |   │
144 |   ├── docs/                 # Documentation
145 |   │   ├── setup.md          # Setup instructions
146 |   │   ├── codebase.md       # Codebase overview
147 |   │   ├── contributing.md   # Contribution guidelines
148 |   │   └── faq.md            # FAQ
149 |   │
150 |   ├── tools/                # Development utilities
151 |   │   └── venv/             # Separate venv for tools
152 |   │
153 |   └── temp/                 # Data storage
154 |       ├── channel_saver/    # Database files
155 |       ├── media/            # Downloaded media
156 |       └── videos/           # Downloaded videos
157 | ```
158 | 
159 | ---
160 | 
161 | ## Installation
162 | 
163 | ### Prerequisites
164 | 
165 | - Python 3.8 or higher
166 | - Telegram account
167 | - Telegram API credentials (api_id and api_hash)
168 | 
169 | ### Step-by-Step Setup
170 | 
171 | 1. **Clone the repository**
172 |    ```bash
173 |    git clone https://github.com/chubajs/telegram-channel-saver.git
174 |    cd telegram-channel-saver
175 |    ```
176 | 
177 | 2. **Create virtual environment**
178 |    ```bash
179 |    # macOS/Linux
180 |    python3 -m venv venv
181 |    source venv/bin/activate
182 | 
183 |    # Windows
184 |    python -m venv venv
185 |    venv\Scripts\activate
186 |    ```
187 | 
188 | 3. **Install dependencies**
189 |    ```bash
190 |    pip install -r requirements.txt
191 |    ```
192 | 
193 | 4. **Get Telegram API credentials**
194 |    - Visit [my.telegram.org/apps](https://my.telegram.org/apps)
195 |    - Create a new application
196 |    - Copy your `api_id` and `api_hash`
197 | 
198 | 5. **Configure environment**
199 | 
200 |    Create a `.env` file in the project root:
201 |    ```env
202 |    # Required: Telegram API Credentials
203 |    API_ID=your_api_id
204 |    API_HASH=your_api_hash
205 | 
206 |    # Optional: AI Image Analysis
207 |    OPENROUTER_API_KEY=your_openrouter_key
208 |    ```
209 | 
210 | ---
211 | 
212 | ## Usage
213 | 
214 | ### Starting the Application
215 | 
216 | ```bash
217 | python main.py
218 | ```
219 | 
220 | ### First-Time Setup
221 | 
222 | 1. Enter your phone number (international format: +1234567890)
223 | 2. Enter the verification code sent to your Telegram
224 | 3. If 2FA is enabled, enter your password
225 | 
226 | ### Main Menu Options
227 | 
228 | ```
229 | Options:
230 | 1.  Show account info
231 | 2.  List channels/groups
232 | 3.  Select active channel
233 | 4.  Show active channel info
234 | 5.  Save channel users
235 | 6.  Show users statistics
236 | 7.  List saved sessions
237 | 8.  Switch session
238 | 9.  Cleanup invalid sessions
239 | 10. Save channel messages
240 | 11. List saved users
241 | 12. Search messages
242 | 13. Browse message index
243 | 14. Search and replace in messages
244 | 15. Restore edited messages
245 | 16. List edited messages
246 | 17. Download videos
247 | 18. List downloaded videos
248 | 19. Export messages
249 | 20. Logout
250 | 21. Exit
251 | ```
252 | 
253 | ### Example Workflows
254 | 
255 | **Backup a Channel:**
256 | ```
257 | 2 → List channels
258 | 3 → Select your channel
259 | 10 → Save messages (option 1: new only)
260 | ```
261 | 
262 | **Find and Replace Text:**
263 | ```
264 | 3 → Select channel
265 | 14 → Search and replace
266 |    → Enter search term
267 |    → Enter replacement
268 |    → Choose: Local only / Edit on Telegram
269 |    → Approve each change or skip
270 | ```
271 | 
272 | **Export for Analysis:**
273 | ```
274 | 3 → Select channel
275 | 19 → Export messages
276 |    → Choose export type
277 | ```
278 | 
279 | ---
280 | 
281 | ## Configuration
282 | 
283 | Edit `src/config.py` to customize:
284 | 
285 | | Setting | Default | Description |
286 | |---------|---------|-------------|
287 | | `MESSAGES_BATCH_SIZE` | 100 | Messages per API request |
288 | | `BATCH_DELAY` | 2 | Seconds between batches |
289 | | `SAVE_INTERVAL` | 300 | Auto-save interval (seconds) |
290 | | `MAX_RETRIES` | 3 | Retry attempts for failed operations |
291 | | `MEDIA_DOWNLOAD_TIMEOUT` | 120 | Timeout for media downloads |
292 | | `CHUNK_SIZE` | 1MB | Chunk size for large downloads |
293 | 
294 | ---
295 | 
296 | ## Data Storage
297 | 
298 | All data is stored locally in JSON format:
299 | 
300 | | Location | Content |
301 | |----------|---------|
302 | | `temp/channel_saver/database.json` | Messages, users, settings |
303 | | `temp/media/` | Downloaded photos and files |
304 | | `temp/videos/` | Downloaded videos |
305 | | `exports/` | Exported message logs |
306 | 
307 | ---
308 | 
309 | ## Troubleshooting
310 | 
311 | | Issue | Solution |
312 | |-------|----------|
313 | | **API Credentials Error** | Verify `.env` file exists with valid credentials |
314 | | **Database Errors** | Check `temp/` directory has write permissions |
315 | | **Rate Limiting** | Increase `BATCH_DELAY` in config |
316 | | **Media Timeouts** | Increase `MEDIA_DOWNLOAD_TIMEOUT` |
317 | | **Session Expired** | Use option 9 to cleanup, then re-login |
318 | 
319 | ---
320 | 
321 | ## Documentation
322 | 
323 | - [Setup Instructions](docs/setup.md)
324 | - [Codebase Overview](docs/codebase.md)
325 | - [Contributing Guidelines](docs/contributing.md)
326 | - [FAQ](docs/faq.md)
327 | 
328 | ---
329 | 
330 | ## Contributing
331 | 
332 | Contributions are welcome! Please see our [Contributing Guidelines](docs/contributing.md).
333 | 
334 | 1. Fork the repository
335 | 2. Create feature branch (`git checkout -b feature/AmazingFeature`)
336 | 3. Commit changes (`git commit -m 'Add AmazingFeature'`)
337 | 4. Push to branch (`git push origin feature/AmazingFeature`)
338 | 5. Open a Pull Request
339 | 
340 | ---
341 | 
342 | ## Author
343 | 
344 | <div align="center">
345 | 
346 | Created by **[Sergey Bulaev](https://t.me/sergiobulaev)**
347 | 
348 | Follow my Telegram channel for more AI & tech projects
349 | 
350 | [![Telegram](https://img.shields.io/badge/Follow-@sergiobulaev-26A5E4?style=for-the-badge&logo=telegram&logoColor=white)](https://t.me/sergiobulaev)
351 | 
352 | </div>
353 | 
354 | ---
355 | 
356 | ## License
357 | 
358 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
359 | 
360 | ---
361 | 
362 | ## Disclaimer
363 | 
364 | This tool is for **educational and personal use only**. It demonstrates how to build custom Telegram clients using the Telethon library. Please ensure compliance with:
365 | 
366 | - [Telegram's Terms of Service](https://telegram.org/tos)
367 | - [Telegram API Terms of Use](https://core.telegram.org/api/terms)
368 | - Local laws regarding data processing and privacy
369 | 
370 | ---
371 | 
372 | <div align="center">
373 | 
374 | **The Foundation for Telegram-CMS**
375 | 
376 | *Managing Telegram content at scale*
377 | 
378 | ![Stars](https://img.shields.io/github/stars/chubajs/telegram-channel-saver?style=social)
379 | ![Forks](https://img.shields.io/github/forks/chubajs/telegram-channel-saver?style=social)
380 | 
381 | </div>
382 | 


--------------------------------------------------------------------------------
/docs/IMPLEMENTATION_PLAN.md:
--------------------------------------------------------------------------------
  1 | # Search and Replace Feature - Implementation Plan
  2 | 
  3 | ## Feature Overview
  4 | 
  5 | Add a search-and-replace feature to the Telegram Channel Saver that:
  6 | 1. Searches through saved messages for specified text/URLs
  7 | 2. Shows each matching message with original and proposed replacement
  8 | 3. Allows user to approve/skip/quit for each message
  9 | 4. Preserves all message formatting (bold, italic, links, etc.)
 10 | 5. Updates the local database only (does not modify Telegram)
 11 | 
 12 | ## Key Decision: Native Entities Approach
 13 | 
 14 | Based on research (see [SEARCH_REPLACE_FORMATTING.md](./SEARCH_REPLACE_FORMATTING.md)), we will use **Telegram's native entity format** instead of markdown:
 15 | 
 16 | - **Native format**: `raw_text` (plain text) + `entities` (list of MessageEntity with offset/length)
 17 | - **Why**: More accurate, handles all 21 entity types, no parsing round-trip errors
 18 | - **Trade-off**: Need to update message download code to store entities
 19 | 
 20 | ## Architecture
 21 | 
 22 | ### New Files to Create
 23 | 
 24 | ```
 25 | src/
 26 | ├── search_replace.py    # Main search-replace logic + UI
 27 | └── formatting.py        # Entity serialization + replacement algorithm
 28 | ```
 29 | 
 30 | ### Files to Modify
 31 | 
 32 | ```
 33 | src/
 34 | ├── messages.py          # Add entities storage when downloading
 35 | └── app.py               # Add menu option 14
 36 | ```
 37 | 
 38 | ### Menu Integration
 39 | 
 40 | Add new option **14. Search and Replace** to the main menu in `src/app.py`.
 41 | 
 42 | ## Implementation Steps
 43 | 
 44 | ### Step 0: Update Message Download to Store Entities
 45 | 
 46 | In `src/messages.py`, add entity storage when downloading messages:
 47 | 
 48 | ```python
 49 | # Add to message_dict creation (around line 180-210):
 50 | 
 51 | # Serialize entities if present
 52 | 'entities': [entity_to_dict(e) for e in (message.entities or [])]
 53 | ```
 54 | 
 55 | This requires the `entity_to_dict` function from `formatting.py` (see Step 1).
 56 | 
 57 | ### Step 1: Create Formatting Utilities (`src/formatting.py`)
 58 | 
 59 | ```python
 60 | """
 61 | Formatting preservation utilities for search and replace.
 62 | Uses Telegram's native entity format (raw_text + entities).
 63 | """
 64 | import re
 65 | import copy
 66 | from telethon.tl import types
 67 | from telethon.extensions import markdown
 68 | 
 69 | # All supported entity types
 70 | ENTITY_TYPES = {
 71 |     'MessageEntityBold': types.MessageEntityBold,
 72 |     'MessageEntityItalic': types.MessageEntityItalic,
 73 |     'MessageEntityStrike': types.MessageEntityStrike,
 74 |     'MessageEntityUnderline': types.MessageEntityUnderline,
 75 |     'MessageEntityCode': types.MessageEntityCode,
 76 |     'MessageEntityPre': types.MessageEntityPre,
 77 |     'MessageEntityTextUrl': types.MessageEntityTextUrl,
 78 |     'MessageEntityUrl': types.MessageEntityUrl,
 79 |     'MessageEntityMention': types.MessageEntityMention,
 80 |     'MessageEntityMentionName': types.MessageEntityMentionName,
 81 |     'MessageEntityHashtag': types.MessageEntityHashtag,
 82 |     'MessageEntityCashtag': types.MessageEntityCashtag,
 83 |     'MessageEntityBotCommand': types.MessageEntityBotCommand,
 84 |     'MessageEntityEmail': types.MessageEntityEmail,
 85 |     'MessageEntityPhone': types.MessageEntityPhone,
 86 |     'MessageEntityBlockquote': types.MessageEntityBlockquote,
 87 |     'MessageEntitySpoiler': types.MessageEntitySpoiler,
 88 |     'MessageEntityCustomEmoji': types.MessageEntityCustomEmoji,
 89 | }
 90 | 
 91 | def entity_to_dict(entity):
 92 |     """Convert a MessageEntity to a JSON-serializable dict."""
 93 |     d = {
 94 |         '_type': type(entity).__name__,
 95 |         'offset': entity.offset,
 96 |         'length': entity.length
 97 |     }
 98 |     if hasattr(entity, 'url') and entity.url:
 99 |         d['url'] = entity.url
100 |     if hasattr(entity, 'language') and entity.language:
101 |         d['language'] = entity.language
102 |     if hasattr(entity, 'user_id'):
103 |         d['user_id'] = entity.user_id
104 |     if hasattr(entity, 'document_id'):
105 |         d['document_id'] = entity.document_id
106 |     if hasattr(entity, 'collapsed'):
107 |         d['collapsed'] = entity.collapsed
108 |     return d
109 | 
110 | def dict_to_entity(d):
111 |     """Convert a dict back to a MessageEntity object."""
112 |     entity_type = d['_type']
113 |     offset = d['offset']
114 |     length = d['length']
115 | 
116 |     cls = ENTITY_TYPES.get(entity_type)
117 |     if not cls:
118 |         return None
119 | 
120 |     if entity_type == 'MessageEntityTextUrl':
121 |         return cls(offset, length, d.get('url', ''))
122 |     elif entity_type == 'MessageEntityPre':
123 |         return cls(offset, length, d.get('language', ''))
124 |     elif entity_type == 'MessageEntityMentionName':
125 |         return cls(offset, length, d.get('user_id', 0))
126 |     elif entity_type == 'MessageEntityCustomEmoji':
127 |         return cls(offset, length, d.get('document_id', 0))
128 |     elif entity_type == 'MessageEntityBlockquote':
129 |         return cls(offset, length, collapsed=d.get('collapsed', False))
130 |     else:
131 |         return cls(offset, length)
132 | 
133 | def search_replace_with_entities(raw_text, entities, search, replace):
134 |     """
135 |     Replace text while adjusting entity offsets and lengths.
136 |     Works with Telegram's native format.
137 | 
138 |     Args:
139 |         raw_text: Plain text without formatting
140 |         entities: List of MessageEntity objects or dicts
141 |         search: String to find
142 |         replace: String to replace with
143 | 
144 |     Returns:
145 |         tuple: (new_raw_text, new_entities_as_dicts)
146 |     """
147 |     # Convert dicts to entities if needed
148 |     entity_objs = []
149 |     for e in (entities or []):
150 |         if isinstance(e, dict):
151 |             obj = dict_to_entity(e)
152 |             if obj:
153 |                 entity_objs.append(obj)
154 |         else:
155 |             entity_objs.append(copy.copy(e))
156 | 
157 |     entity_objs = sorted(entity_objs, key=lambda e: e.offset)
158 | 
159 |     new_text = raw_text
160 |     offset_delta = 0
161 | 
162 |     for match in re.finditer(re.escape(search), raw_text):
163 |         start = match.start()
164 |         end = match.end()
165 |         len_diff = len(replace) - len(search)
166 | 
167 |         adjusted_start = start + offset_delta
168 |         new_text = new_text[:adjusted_start] + replace + new_text[adjusted_start + len(search):]
169 | 
170 |         for ent in entity_objs:
171 |             ent_start = ent.offset
172 |             ent_end = ent.offset + ent.length
173 | 
174 |             if ent_start >= end:
175 |                 ent.offset += len_diff
176 |             elif ent_start <= start and ent_end >= end:
177 |                 ent.length += len_diff
178 |             elif start < ent_start < end:
179 |                 overlap = end - ent_start
180 |                 ent.offset = adjusted_start + len(replace)
181 |                 ent.length = max(0, ent.length - overlap)
182 |             elif start < ent_end <= end:
183 |                 overlap = ent_end - start
184 |                 ent.length = max(0, ent.length - overlap)
185 | 
186 |         offset_delta += len_diff
187 | 
188 |     # Replace in URL attributes
189 |     for ent in entity_objs:
190 |         if isinstance(ent, types.MessageEntityTextUrl) and ent.url:
191 |             ent.url = ent.url.replace(search, replace)
192 | 
193 |     # Remove zero-length entities and convert back to dicts
194 |     new_entities = [entity_to_dict(e) for e in entity_objs if e.length > 0]
195 | 
196 |     return new_text, new_entities
197 | 
198 | def entities_to_markdown(raw_text, entities):
199 |     """Convert raw_text + entities to markdown for display."""
200 |     entity_objs = []
201 |     for e in (entities or []):
202 |         if isinstance(e, dict):
203 |             obj = dict_to_entity(e)
204 |             if obj:
205 |                 entity_objs.append(obj)
206 |         else:
207 |             entity_objs.append(e)
208 |     return markdown.unparse(raw_text, entity_objs)
209 | 
210 | def get_entities_from_markdown(text):
211 |     """Parse markdown to get raw_text and entities (for legacy data)."""
212 |     raw_text, entity_objs = markdown.parse(text)
213 |     return raw_text, [entity_to_dict(e) for e in entity_objs]
214 | ```
215 | 
216 | ### Step 2: Create Search-Replace Module (`src/search_replace.py`)
217 | 
218 | ```python
219 | """
220 | Search and replace functionality with message-by-message approval.
221 | """
222 | import os
223 | from datetime import datetime
224 | from src.channels import get_active_channel
225 | from src.database import save_database
226 | from src.formatting import (
227 |     parse_message_text,
228 |     unparse_message_text,
229 |     replace_preserving_entities,
230 |     highlight_changes
231 | )
232 | 
233 | async def search_replace_messages(db, db_path):
234 |     """
235 |     Main entry point for search and replace feature.
236 |     """
237 |     # 1. Get active channel
238 |     # 2. Get search/replace terms from user
239 |     # 3. Find all matching messages
240 |     # 4. Show each match with preview
241 |     # 5. Process approvals
242 |     # 6. Save changes
243 |     pass
244 | 
245 | def find_matching_messages(db, channel_id, search_term, case_sensitive=False):
246 |     """
247 |     Find all messages containing the search term.
248 |     Returns list of (message_id, message_dict, match_count)
249 |     """
250 |     pass
251 | 
252 | def preview_replacement(message, search, replace):
253 |     """
254 |     Generate preview of replacement without modifying message.
255 |     Returns dict with original, new, and diff info.
256 |     """
257 |     pass
258 | 
259 | def apply_replacement(message, search, replace):
260 |     """
261 |     Apply replacement to message, preserving formatting.
262 |     Returns updated message dict.
263 |     """
264 |     pass
265 | 
266 | def display_message_preview(preview, message_num, total_messages):
267 |     """
268 |     Display formatted preview in terminal.
269 |     Shows original, replacement, and action options.
270 |     """
271 |     pass
272 | ```
273 | 
274 | ### Step 3: User Interface Flow
275 | 
276 | ```
277 | ╔══════════════════════════════════════════════════════════════════════════╗
278 | ║                         SEARCH AND REPLACE                                ║
279 | ╚══════════════════════════════════════════════════════════════════════════╝
280 | 
281 | Active channel: Sergio Bulaev AI (ID: 2234839119)
282 | Total messages: 834
283 | 
284 | Search Options:
285 | 1. Search and replace text
286 | 2. Search and replace URL/domain
287 | 3. Back to main menu
288 | 
289 | Enter choice: 1
290 | 
291 | Enter text to search: example.com
292 | Enter replacement text: newsite.org
293 | Case sensitive? (y/N): n
294 | 
295 | Searching... Found 15 messages with matches.
296 | 
297 | ═══════════════════════════════════════════════════════════════════════════
298 | MESSAGE 1 of 15 (ID: #508, Date: 2024-01-15 14:30:00)
299 | ═══════════════════════════════════════════════════════════════════════════
300 | 
301 | ORIGINAL:
302 | ─────────
303 | Visit [example.com](https://example.com) for more info.
304 | Check out **example.com** for details.
305 | 
306 | AFTER REPLACEMENT:
307 | ──────────────────
308 | Visit [newsite.org](https://newsite.org) for more info.
309 | Check out **newsite.org** for details.
310 | 
311 | Changes: 4 occurrences will be replaced
312 | 
313 | ───────────────────────────────────────────────────────────────────────────
314 | [A]pprove  [S]kip  [V]iew full message  [Q]uit (save approved)  [C]ancel all
315 | ───────────────────────────────────────────────────────────────────────────
316 | 
317 | Enter choice: a
318 | 
319 | ✓ Message #508 approved for replacement.
320 | 
321 | ═══════════════════════════════════════════════════════════════════════════
322 | MESSAGE 2 of 15 (ID: #512, Date: 2024-01-16 09:15:00)
323 | ...
324 | ```
325 | 
326 | ### Step 4: Summary and Confirmation
327 | 
328 | ```
329 | ═══════════════════════════════════════════════════════════════════════════
330 |                               SUMMARY
331 | ═══════════════════════════════════════════════════════════════════════════
332 | 
333 | Total messages found: 15
334 | Approved for replacement: 12
335 | Skipped: 3
336 | 
337 | Approved messages:
338 |   #508 - 4 replacements
339 |   #512 - 2 replacements
340 |   #523 - 1 replacement
341 |   ... (showing first 10)
342 | 
343 | Total replacements: 28
344 | 
345 | Apply all approved changes? (y/N): y
346 | 
347 | Applying changes...
348 | ✓ Message #508 updated
349 | ✓ Message #512 updated
350 | ✓ Message #523 updated
351 | ...
352 | 
353 | Database saved successfully.
354 | 
355 | 12 messages updated with 28 total replacements.
356 | ```
357 | 
358 | ### Step 5: Integration with Main App
359 | 
360 | In `src/app.py`:
361 | 
362 | ```python
363 | from src.search_replace import search_replace_messages
364 | 
365 | # In menu display (around line 100):
366 | print("14. Search and replace in messages")
367 | 
368 | # In menu handler (around line 300):
369 | elif choice == '14':
370 |     await search_replace_messages(self.db, self.db_path)
371 | ```
372 | 
373 | ## Data Model
374 | 
375 | ### Message Before (Current Format - Legacy)
376 | ```json
377 | {
378 |   "id": 508,
379 |   "text": "Visit [example.com](https://example.com) for **bold info**.",
380 |   "raw_text": "Visit example.com for bold info.",
381 |   "text_html": "...",
382 |   "last_update": "2024-01-15 14:30:00"
383 | }
384 | ```
385 | 
386 | ### Message Before (New Format - With Native Entities)
387 | ```json
388 | {
389 |   "id": 508,
390 |   "text": "Visit [example.com](https://example.com) for **bold info**.",
391 |   "raw_text": "Visit example.com for bold info.",
392 |   "entities": [
393 |     {"_type": "MessageEntityTextUrl", "offset": 6, "length": 11, "url": "https://example.com"},
394 |     {"_type": "MessageEntityBold", "offset": 22, "length": 9}
395 |   ],
396 |   "last_update": "2024-01-15 14:30:00"
397 | }
398 | ```
399 | 
400 | ### Message After Replacement
401 | ```json
402 | {
403 |   "id": 508,
404 |   "text": "Visit [newsite.org](https://newsite.org) for **bold info**.",
405 |   "raw_text": "Visit newsite.org for bold info.",
406 |   "entities": [
407 |     {"_type": "MessageEntityTextUrl", "offset": 6, "length": 11, "url": "https://newsite.org"},
408 |     {"_type": "MessageEntityBold", "offset": 22, "length": 9}
409 |   ],
410 |   "last_update": "2024-11-26 10:00:00",
411 |   "edit_history": [
412 |     {
413 |       "date": "2024-11-26 10:00:00",
414 |       "action": "search_replace",
415 |       "search": "example.com",
416 |       "replace": "newsite.org",
417 |       "original_raw_text": "Visit example.com for bold info.",
418 |       "original_entities": [...]
419 |     }
420 |   ]
421 | }
422 | ```
423 | 
424 | ### Backward Compatibility
425 | 
426 | For messages without `entities` field (legacy data), the system will:
427 | 1. Parse `text` field using `markdown.parse()` to extract entities
428 | 2. Apply replacement
429 | 3. Store both `text` (markdown) and new `entities` field
430 | 
431 | ## Error Handling
432 | 
433 | 1. **No active channel** - Prompt user to select channel first
434 | 2. **No messages saved** - Inform user to download messages first
435 | 3. **No matches found** - Display "No messages found matching 'search term'"
436 | 4. **Formatting error** - Log warning, show original, ask user to skip or force
437 | 5. **Database save error** - Rollback changes, show error, keep backup
438 | 
439 | ## Testing Plan
440 | 
441 | ### Unit Tests
442 | - `test_replace_preserving_entities()` - Various entity types
443 | - `test_url_replacement()` - URLs in text and attributes
444 | - `test_nested_formatting()` - Bold inside italic, etc.
445 | - `test_unicode_handling()` - Cyrillic, emoji, special chars
446 | - `test_edge_cases()` - Empty strings, no matches, all matches
447 | 
448 | ### Integration Tests
449 | - Full workflow with real database
450 | - Menu navigation
451 | - Approval flow
452 | - Database persistence
453 | 
454 | ### Manual Testing
455 | - Visual verification of formatting
456 | - Different terminal widths
457 | - Large messages
458 | - Many matches
459 | 
460 | ## Future Enhancements
461 | 
462 | 1. **Regex support** - Allow regex patterns for search
463 | 2. **Batch operations** - Replace in all messages without approval
464 | 3. **Undo feature** - Restore from edit_history
465 | 4. **Export changes** - Generate report of all changes
466 | 5. **Dry run mode** - Preview all changes without prompts
467 | 6. **Filter by date** - Only replace in messages from date range
468 | 7. **Filter by author** - Only replace in messages from specific user
469 | 
470 | ## Dependencies
471 | 
472 | No new dependencies required. Uses existing:
473 | - `telethon.extensions.markdown` - For formatting
474 | - `telethon.tl.types` - For entity types
475 | 
476 | ## Timeline Estimate
477 | 
478 | | Task | Complexity |
479 | |------|------------|
480 | | Step 1: Formatting utilities | Low |
481 | | Step 2: Core search-replace | Medium |
482 | | Step 3: UI flow | Medium |
483 | | Step 4: Summary/confirmation | Low |
484 | | Step 5: App integration | Low |
485 | | Testing & edge cases | Medium |
486 | 
487 | ## Risk Assessment
488 | 
489 | | Risk | Mitigation |
490 | |------|------------|
491 | | Formatting corruption | Extensive testing, user preview before apply |
492 | | Data loss | Keep edit_history, backup before changes |
493 | | Unicode issues | Use proper encoding, test with various scripts |
494 | | Performance with many messages | Batch processing, progress indicator |
495 | 


--------------------------------------------------------------------------------
/src/app.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Main application module for Telegram Channel Saver.
  3 | This is the entry point for the application.
  4 | """
  5 | import os
  6 | import asyncio
  7 | import logging
  8 | from dotenv import load_dotenv
  9 | from datetime import datetime
 10 | from telethon import TelegramClient
 11 | 
 12 | from src.config import logger
 13 | from src.database import load_database, save_database, get_db_path
 14 | from src.client import check_authorized, login, restore_session, save_session, get_session_path
 15 | from src.channels import list_channels, display_channels, select_active_channel, show_active_channel
 16 | from src.users import save_channel_users, show_channel_users_stats, list_saved_users
 17 | from src.messages import save_channel_messages, search_messages, browse_messages
 18 | from src.media import download_video_messages, list_downloaded_videos
 19 | from src.export import export_menu
 20 | from src.search_replace import search_replace_messages, restore_edited_messages, list_edited_messages
 21 | 
 22 | class ChannelSaver:
 23 |     """Main application class for Telegram Channel Saver"""
 24 |     
 25 |     def __init__(self):
 26 |         """Initialize the application"""
 27 |         # Load environment variables from .env file
 28 |         load_dotenv()
 29 |         
 30 |         # Database setup
 31 |         self.db_path = get_db_path()
 32 |         self.db = load_database(self.db_path)
 33 |         
 34 |         # Telegram client setup
 35 |         try:
 36 |             self.api_id = int(os.getenv('API_ID'))
 37 |             if not self.api_id:
 38 |                 raise ValueError("API_ID not found in environment variables")
 39 |                 
 40 |             self.api_hash = os.getenv('API_HASH')
 41 |             if not self.api_hash:
 42 |                 raise ValueError("API_HASH not found in environment variables")
 43 |                 
 44 |         except (TypeError, ValueError) as e:
 45 |             logger.error(f"Error loading API credentials: {e}")
 46 |             print("\nError: Please make sure API_ID and API_HASH are properly set in .env file")
 47 |             raise
 48 |             
 49 |         self.client = None
 50 |         self.phone = None
 51 | 
 52 |     async def cleanup_sessions(self):
 53 |         """Remove invalid sessions"""
 54 |         if not self.db['sessions']:
 55 |             print("\nNo sessions to clean up!")
 56 |             return
 57 |             
 58 |         print("\nChecking sessions validity...")
 59 |         invalid = []
 60 |         
 61 |         for phone, session in self.db['sessions'].items():
 62 |             # Skip active session
 63 |             if session['active']:
 64 |                 continue
 65 |                 
 66 |             # Try to connect with session
 67 |             client = TelegramClient(
 68 |                 get_session_path(phone),
 69 |                 self.api_id,
 70 |                 self.api_hash
 71 |             )
 72 |             
 73 |             try:
 74 |                 await client.connect()
 75 |                 if not await client.is_user_authorized():
 76 |                     invalid.append(phone)
 77 |             except Exception:
 78 |                 invalid.append(phone)
 79 |             finally:
 80 |                 await client.disconnect()
 81 |         
 82 |         if invalid:
 83 |             print(f"\nFound {len(invalid)} invalid sessions")
 84 |             if input("Remove them? (y/N): ").lower() == 'y':
 85 |                 for phone in invalid:
 86 |                     # Remove session file
 87 |                     try:
 88 |                         os.remove(get_session_path(phone))
 89 |                     except OSError:
 90 |                         pass
 91 |                     # Remove from database
 92 |                     del self.db['sessions'][phone]
 93 |                 save_database(self.db_path, self.db)
 94 |                 print("\nInvalid sessions removed!")
 95 |         else:
 96 |             print("\nAll sessions are valid!")
 97 | 
 98 |     async def list_sessions(self):
 99 |         """Display all saved sessions"""
100 |         if not self.db['sessions']:
101 |             print("\nNo saved sessions found!")
102 |             return
103 |             
104 |         print("\nSaved Sessions:")
105 |         print("--------------")
106 |         for phone, session in self.db['sessions'].items():
107 |             status = "ACTIVE" if session['active'] else "inactive"
108 |             print(f"\nPhone: {phone} [{status}]")
109 |             print(f"Username: @{session['username']}")
110 |             print(f"Created: {session['created_at']}")
111 |             print(f"Last used: {session['last_used']}")
112 | 
113 |     async def switch_session(self):
114 |         """Switch to a different saved session"""
115 |         if not self.db['sessions']:
116 |             print("\nNo saved sessions found!")
117 |             return False
118 |             
119 |         await self.list_sessions()
120 |         
121 |         while True:
122 |             phone = input("\nEnter phone number to switch to (or 0 to cancel): ")
123 |             if phone == '0':
124 |                 return False
125 |                 
126 |             if phone in self.db['sessions']:
127 |                 # Disconnect current client if exists
128 |                 if self.client:
129 |                     await self.client.disconnect()
130 |                 
131 |                 # Update active status
132 |                 for p, s in self.db['sessions'].items():
133 |                     s['active'] = (p == phone)
134 |                 
135 |                 # Create new client with selected session
136 |                 self.phone = phone
137 |                 self.client = TelegramClient(
138 |                     get_session_path(phone),
139 |                     self.api_id,
140 |                     self.api_hash
141 |                 )
142 |                 
143 |                 await self.client.connect()
144 |                 if await check_authorized(self.client):
145 |                     # Update last used
146 |                     self.db['sessions'][phone]['last_used'] = datetime.now()
147 |                     save_database(self.db_path, self.db)
148 |                     print(f"\nSwitched to session: {phone}")
149 |                     return True
150 |                 else:
151 |                     print("\nSession is no longer valid!")
152 |                     return False
153 |             else:
154 |                 print("\nInvalid phone number!")
155 | 
156 |     async def start(self):
157 |         """Main entry point"""
158 |         print("\nWelcome to Channel Saver!")
159 |         print("------------------------")
160 |         
161 |         # Ensure clean start
162 |         if self.client:
163 |             await self.client.disconnect()
164 |             self.client = None
165 |         
166 |         # Try to restore session first
167 |         self.client, self.phone = await restore_session(self.db, self.api_id, self.api_hash, self.db_path)
168 |         if self.client:
169 |             print(f"\nRestored session for {self.phone}")
170 |             relogin = False
171 |         else:
172 |             relogin = True
173 |         
174 |         try:
175 |             if relogin:
176 |                 # New login required
177 |                 self.phone = input('Please enter your phone number (international format): ')
178 |                 
179 |                 # Create new client
180 |                 self.client = TelegramClient(
181 |                     get_session_path(self.phone),
182 |                     self.api_id,
183 |                     self.api_hash
184 |                 )
185 |                 
186 |                 await self.client.connect()
187 |                 me = await login(self.client, self.phone)
188 |                 
189 |                 # Save session after successful login
190 |                 await save_session(self.db, self.phone, me)
191 |                 save_database(self.db_path, self.db)
192 |                 
193 |                 logger.info(f"Successfully logged in as {me.first_name} (@{me.username})")
194 |                 
195 |             print("\nSuccessfully connected!")
196 |             
197 |             while True:
198 |                 # Show active channel in menu if selected
199 |                 active_channel = self.db.get('active_channel')
200 |                 if active_channel:
201 |                     print(f"\nActive: {active_channel['title']} ({active_channel['type']})")
202 |                 
203 |                 print("\nOptions:")
204 |                 print("1. Show account info")
205 |                 print("2. List channels/groups")
206 |                 print("3. Select active channel")
207 |                 print("4. Show active channel info")
208 |                 print("5. Save channel users")
209 |                 print("6. Show users statistics")
210 |                 print("7. List saved sessions")
211 |                 print("8. Switch session")
212 |                 print("9. Cleanup invalid sessions")
213 |                 print("10. Save channel messages")
214 |                 print("11. List saved users")
215 |                 print("12. Search messages")
216 |                 print("13. Browse message index")
217 |                 print("14. Search and replace in messages")
218 |                 print("15. Restore edited messages")
219 |                 print("16. List edited messages")
220 |                 print("17. Download videos")
221 |                 print("18. List downloaded videos")
222 |                 print("19. Export messages")
223 |                 print("20. Logout")
224 |                 print("21. Exit")
225 | 
226 |                 choice = input("\nEnter your choice (1-21): ")
227 |                 
228 |                 if choice == '1':
229 |                     me = await self.client.get_me()
230 |                     print(f"\nAccount Information:")
231 |                     print(f"Phone: {self.phone}")
232 |                     print(f"Username: @{me.username}")
233 |                     print(f"First Name: {me.first_name}")
234 |                     print(f"Last Name: {me.last_name}")
235 |                     print(f"User ID: {me.id}")
236 |                 elif choice == '2':
237 |                     channels = await list_channels(self.client)
238 |                     display_channels(channels)
239 |                 elif choice == '3':
240 |                     await select_active_channel(self.client, self.db, self.db_path)
241 |                 elif choice == '4':
242 |                     await show_active_channel(self.client, self.db)
243 |                 elif choice == '5':
244 |                     await save_channel_users(self.client, self.db, self.db_path)
245 |                 elif choice == '6':
246 |                     await show_channel_users_stats(self.db)
247 |                 elif choice == '7':
248 |                     await self.list_sessions()
249 |                 elif choice == '8':
250 |                     await self.switch_session()
251 |                 elif choice == '9':
252 |                     await self.cleanup_sessions()
253 |                 elif choice == '10':
254 |                     print("\nMessage Download Options:")
255 |                     print("1. Download new messages only")
256 |                     print("2. Force redownload all messages")
257 |                     print("3. Download most recent messages")
258 |                     print("4. Download messages by ID range")
259 |                     print("5. Back to main menu")
260 |                     
261 |                     dl_choice = input("\nEnter choice (1-5): ")
262 |                     
263 |                     if dl_choice == '1':
264 |                         limit = input("\nEnter number of messages to save (or press Enter for all): ")
265 |                         limit = int(limit) if limit.strip() else None
266 |                         await save_channel_messages(self.client, self.db, self.db_path, limit=limit, force_redownload=False)
267 |                     elif dl_choice == '2':
268 |                         confirm = input("\nThis will redownload all messages. Continue? (y/N): ").lower()
269 |                         if confirm == 'y':
270 |                             limit = input("\nEnter number of messages to save (or press Enter for all): ")
271 |                             limit = int(limit) if limit.strip() else None
272 |                             await save_channel_messages(self.client, self.db, self.db_path, limit=limit, force_redownload=True)
273 |                     elif dl_choice == '3':
274 |                         count = input("\nEnter number of recent messages to download: ")
275 |                         try:
276 |                             count = int(count)
277 |                             if count <= 0:
278 |                                 print("\nPlease enter a positive number")
279 |                                 continue
280 |                             await save_channel_messages(self.client, self.db, self.db_path, recent_count=count)
281 |                         except ValueError:
282 |                             print("\nPlease enter a valid number")
283 |                     elif dl_choice == '4':
284 |                         try:
285 |                             min_id = input("\nEnter minimum message ID (or press Enter for first message): ")
286 |                             min_id = int(min_id) if min_id.strip() else None
287 |                             
288 |                             max_id = input("Enter maximum message ID (or press Enter for last message): ")
289 |                             max_id = int(max_id) if max_id.strip() else None
290 |                             
291 |                             limit = input("Enter maximum number of messages to download (or press Enter for all): ")
292 |                             limit = int(limit) if limit.strip() else None
293 |                             
294 |                             force = input("Force redownload existing messages? (y/N): ").lower() == 'y'
295 |                             
296 |                             await save_channel_messages(
297 |                                 self.client,
298 |                                 self.db,
299 |                                 self.db_path,
300 |                                 min_id=min_id, 
301 |                                 max_id=max_id, 
302 |                                 limit=limit,
303 |                                 force_redownload=force
304 |                             )
305 |                         except ValueError:
306 |                             print("\nPlease enter valid message IDs (numbers only)")
307 |                     elif dl_choice == '5':
308 |                         continue
309 |                 elif choice == '11':
310 |                     await list_saved_users(self.db)
311 |                 elif choice == '12':
312 |                     await search_messages(self.db)
313 |                 elif choice == '13':
314 |                     await browse_messages(self.db)
315 |                 elif choice == '14':
316 |                     await search_replace_messages(self.db, self.db_path, self.client)
317 |                 elif choice == '15':
318 |                     await restore_edited_messages(self.db, self.db_path, self.client)
319 |                 elif choice == '16':
320 |                     list_edited_messages(self.db)
321 |                 elif choice == '17':
322 |                     print("\nVideo Download Options:")
323 |                     print("1. Download all videos")
324 |                     print("2. Download video circles only (round videos)")
325 |                     print("3. Back to main menu")
326 | 
327 |                     video_choice = input("\nEnter choice (1-3): ")
328 | 
329 |                     if video_choice == '1':
330 |                         limit = input("\nEnter number of videos to download (or press Enter for all): ")
331 |                         limit = int(limit) if limit.strip() else None
332 |                         await download_video_messages(self.client, self.db, self.db_path, limit=limit, round_videos_only=False)
333 |                     elif video_choice == '2':
334 |                         limit = input("\nEnter number of video circles to download (or press Enter for all): ")
335 |                         limit = int(limit) if limit.strip() else None
336 |                         await download_video_messages(self.client, self.db, self.db_path, limit=limit, round_videos_only=True)
337 |                     elif video_choice == '3':
338 |                         continue
339 |                 elif choice == '18':
340 |                     list_downloaded_videos(self.db)
341 |                 elif choice == '19':
342 |                     await export_menu(self.db, self.client)
343 |                 elif choice == '20':
344 |                     await self.client.log_out()
345 |                     print("\nLogged out successfully!")
346 |                     if self.phone in self.db['sessions']:
347 |                         del self.db['sessions'][self.phone]
348 |                     self.db['last_login'] = None
349 |                     self.db['active_channel'] = None
350 |                     save_database(self.db_path, self.db)
351 |                     break
352 |                 elif choice == '21':
353 |                     break
354 |                 else:
355 |                     print("\nInvalid choice!")
356 | 
357 |         finally:
358 |             if self.client:
359 |                 await self.client.disconnect()
360 |                 self.client = None
361 | 
362 | def main():
363 |     """Entry point function"""
364 |     app = ChannelSaver()
365 |     asyncio.run(app.start())
366 | 
367 | if __name__ == '__main__':
368 |     main() 


--------------------------------------------------------------------------------
/docs/SEARCH_REPLACE_FORMATTING.md:
--------------------------------------------------------------------------------
  1 | # Search and Replace with Formatting Preservation
  2 | 
  3 | ## Overview
  4 | 
  5 | This document describes how Telegram message formatting works and how to implement a search-and-replace feature that preserves all formatting (bold, italic, links, etc.) while modifying text content.
  6 | 
  7 | ## Two Approaches: Markdown vs Native Entities
  8 | 
  9 | ### Approach A: Markdown-Based (Current)
 10 | - Use `message.text` which contains markdown syntax
 11 | - Parse with `markdown.parse()` → get (plain_text, entities)
 12 | - Modify and reconstruct with `markdown.unparse()`
 13 | 
 14 | ### Approach B: Native Entities (Recommended)
 15 | - Use `message.raw_text` (plain text) + `message.entities` (list of MessageEntity)
 16 | - Work directly with Telegram's native format
 17 | - More accurate, handles all 21 entity types including those markdown doesn't support
 18 | - **This is how Telegram actually stores and transmits messages**
 19 | 
 20 | ## Why Native Entities is Better
 21 | 
 22 | According to [Telethon documentation](https://docs.telethon.dev/en/stable/modules/custom.html) and [DeepWiki](https://deepwiki.com/LonamiWebs/Telethon/7.1-markdown-and-html-parsing):
 23 | 
 24 | > "Telegram does not natively support markdown or HTML. Clients such as Telethon parse the text into a list of formatting MessageEntity at different offsets."
 25 | 
 26 | > "Message.text returns the text formatted using the current parse mode of the client. By default, this is Telegram's markdown."
 27 | 
 28 | The native format is:
 29 | - **`raw_text`**: Plain text without any formatting markers
 30 | - **`entities`**: Array of MessageEntity objects with offset/length
 31 | 
 32 | This is more reliable because:
 33 | 1. No parsing/unparsing round-trip errors
 34 | 2. Handles entity types that markdown doesn't support (CustomEmoji, Spoiler, etc.)
 35 | 3. Preserves exact offsets as Telegram uses them
 36 | 4. Works correctly with UTF-16 surrogate pairs (emojis)
 37 | 
 38 | ## Current Database Structure
 39 | 
 40 | Messages are stored with the following text-related fields:
 41 | 
 42 | | Field | Description | Example |
 43 | |-------|-------------|---------|
 44 | | `text` | Markdown-formatted text from Telethon | `[Link](https://example.com) and **bold**` |
 45 | | `raw_text` | Plain text without formatting | `Link and bold` |
 46 | | `text_html` | Currently same as `text` (should be HTML) | `[Link](https://example.com) and **bold**` |
 47 | | `entities` | **NOT STORED YET** - Need to add this | `[{_type: "MessageEntityBold", offset: 0, length: 4}, ...]` |
 48 | 
 49 | **Note:** The `text` field in our database contains markdown formatting because Telethon's `message.text` property automatically converts entities to markdown format.
 50 | 
 51 | ## Telegram Message Formatting Architecture
 52 | 
 53 | ### Message Entities
 54 | 
 55 | Telegram stores formatted text as:
 56 | 1. **Plain text** - The actual characters without formatting markers
 57 | 2. **Entities array** - List of formatting instructions with offset/length
 58 | 
 59 | Example:
 60 | ```
 61 | Text: "Hello bold and click here"
 62 | Entities:
 63 |   - MessageEntityBold(offset=6, length=4)      → "bold"
 64 |   - MessageEntityTextUrl(offset=15, length=10, url="https://example.com") → "click here"
 65 | ```
 66 | 
 67 | ### Available Entity Types (21 total)
 68 | 
 69 | | Entity Type | Description | Markdown Syntax |
 70 | |-------------|-------------|-----------------|
 71 | | `MessageEntityBold` | Bold text | `**text**` |
 72 | | `MessageEntityItalic` | Italic text | `*text*` |
 73 | | `MessageEntityStrike` | Strikethrough | `~~text~~` |
 74 | | `MessageEntityUnderline` | Underlined text | `__text__` |
 75 | | `MessageEntityCode` | Inline code | `` `code` `` |
 76 | | `MessageEntityPre` | Code block with language | ` ```python\ncode``` ` |
 77 | | `MessageEntityTextUrl` | Hyperlink | `[text](url)` |
 78 | | `MessageEntityMention` | @username mention | `@username` |
 79 | | `MessageEntityMentionName` | User mention by ID | N/A |
 80 | | `MessageEntityUrl` | Plain URL | `https://...` |
 81 | | `MessageEntityEmail` | Email address | `user@example.com` |
 82 | | `MessageEntityPhone` | Phone number | `+1234567890` |
 83 | | `MessageEntityHashtag` | Hashtag | `#hashtag` |
 84 | | `MessageEntityCashtag` | Cashtag | `$TICKER` |
 85 | | `MessageEntityBotCommand` | Bot command | `/command` |
 86 | | `MessageEntityBlockquote` | Block quote | `> quote` |
 87 | | `MessageEntitySpoiler` | Spoiler text | `\|\|spoiler\|\|` |
 88 | | `MessageEntityCustomEmoji` | Custom emoji | N/A |
 89 | | `MessageEntityBankCard` | Bank card number | N/A |
 90 | | `MessageEntityUnknown` | Unknown entity | N/A |
 91 | 
 92 | ### Telethon Extensions
 93 | 
 94 | Telethon provides markdown/HTML parsing utilities:
 95 | 
 96 | ```python
 97 | from telethon.extensions import markdown
 98 | 
 99 | # Parse markdown to (text, entities)
100 | text, entities = markdown.parse("**bold** and [link](https://example.com)")
101 | # Result: text="bold and link", entities=[MessageEntityBold(...), MessageEntityTextUrl(...)]
102 | 
103 | # Convert back to markdown
104 | markdown_text = markdown.unparse(text, entities)
105 | # Result: "**bold** and [link](https://example.com)"
106 | ```
107 | 
108 | ## The Challenge: Search & Replace
109 | 
110 | When replacing text, we must handle:
111 | 
112 | 1. **Offset Adjustment** - Entities after the replacement need their `offset` shifted
113 | 2. **Length Adjustment** - Entities containing the replacement need their `length` updated
114 | 3. **URL Replacement** - `MessageEntityTextUrl.url` may also need updating
115 | 4. **Nested Entities** - Multiple entities can overlap
116 | 
117 | ### Algorithm for Safe Replacement
118 | 
119 | ```python
120 | import re
121 | import copy
122 | from telethon.extensions import markdown
123 | from telethon.tl.types import MessageEntityTextUrl
124 | 
125 | def search_replace_preserving_formatting(text, entities, search, replace):
126 |     """
127 |     Replace text while preserving entity formatting.
128 | 
129 |     Args:
130 |         text: Plain text (without markdown markers)
131 |         entities: List of MessageEntity objects
132 |         search: String to find
133 |         replace: String to replace with
134 | 
135 |     Returns:
136 |         tuple: (new_text, new_entities)
137 |     """
138 |     # Deep copy to avoid modifying originals
139 |     entities = [copy.copy(e) for e in entities]
140 |     entities = sorted(entities, key=lambda e: e.offset)
141 | 
142 |     new_text = text
143 |     offset_delta = 0
144 | 
145 |     # Process each match
146 |     for match in re.finditer(re.escape(search), text):
147 |         start = match.start()
148 |         end = match.end()
149 |         len_diff = len(replace) - len(search)
150 | 
151 |         # Apply replacement
152 |         adjusted_start = start + offset_delta
153 |         new_text = new_text[:adjusted_start] + replace + new_text[adjusted_start + len(search):]
154 | 
155 |         # Adjust entity offsets and lengths
156 |         for ent in entities:
157 |             ent_start = ent.offset
158 |             ent_end = ent.offset + ent.length
159 | 
160 |             if ent_start >= end:
161 |                 # Entity is after replacement - shift offset
162 |                 ent.offset += len_diff
163 |             elif ent_start <= start and ent_end >= end:
164 |                 # Entity contains replacement - adjust length
165 |                 ent.length += len_diff
166 |             # Entities before replacement: no change needed
167 | 
168 |         offset_delta += len_diff
169 | 
170 |     # Also replace in URL attributes
171 |     for ent in entities:
172 |         if isinstance(ent, MessageEntityTextUrl) and ent.url:
173 |             ent.url = ent.url.replace(search, replace)
174 | 
175 |     return new_text, entities
176 | ```
177 | 
178 | ## Implementation Approach
179 | 
180 | ### Option A: Work with Markdown (Current Data) - Fallback
181 | 
182 | Since our database currently stores markdown-formatted text, we can:
183 | 
184 | 1. Use `markdown.parse()` to get (plain_text, entities)
185 | 2. Apply replacements with entity adjustment
186 | 3. Use `markdown.unparse()` to get markdown back
187 | 4. Store updated markdown in database
188 | 
189 | **Pros:** Works with existing data without migration
190 | **Cons:** Some edge cases with nested markdown, doesn't support all entity types
191 | 
192 | ### Option B: Store Raw Entities (Recommended) - Native Format
193 | 
194 | Modify message saving to store:
195 | - `raw_text`: Plain text (already stored)
196 | - `entities`: Serialized entity list (JSON)
197 | 
198 | Then use entities directly for search/replace.
199 | 
200 | **Pros:** More accurate, handles ALL entity types, matches Telegram's native format
201 | **Cons:** Requires updating message download code
202 | 
203 | ## Entity Serialization
204 | 
205 | ### Converting Entities to JSON
206 | 
207 | ```python
208 | def entity_to_dict(entity):
209 |     """Convert a MessageEntity to a serializable dict."""
210 |     d = {
211 |         '_type': type(entity).__name__,
212 |         'offset': entity.offset,
213 |         'length': entity.length
214 |     }
215 |     # Type-specific attributes
216 |     if hasattr(entity, 'url') and entity.url:
217 |         d['url'] = entity.url
218 |     if hasattr(entity, 'language') and entity.language:
219 |         d['language'] = entity.language
220 |     if hasattr(entity, 'user_id'):
221 |         d['user_id'] = entity.user_id
222 |     if hasattr(entity, 'document_id'):
223 |         d['document_id'] = entity.document_id
224 |     if hasattr(entity, 'collapsed'):
225 |         d['collapsed'] = entity.collapsed
226 |     return d
227 | ```
228 | 
229 | ### Reconstructing Entities from JSON
230 | 
231 | ```python
232 | from telethon.tl import types
233 | 
234 | ENTITY_TYPES = {
235 |     'MessageEntityBold': types.MessageEntityBold,
236 |     'MessageEntityItalic': types.MessageEntityItalic,
237 |     'MessageEntityStrike': types.MessageEntityStrike,
238 |     'MessageEntityUnderline': types.MessageEntityUnderline,
239 |     'MessageEntityCode': types.MessageEntityCode,
240 |     'MessageEntityPre': types.MessageEntityPre,
241 |     'MessageEntityTextUrl': types.MessageEntityTextUrl,
242 |     'MessageEntityUrl': types.MessageEntityUrl,
243 |     'MessageEntityMention': types.MessageEntityMention,
244 |     'MessageEntityMentionName': types.MessageEntityMentionName,
245 |     'MessageEntityHashtag': types.MessageEntityHashtag,
246 |     'MessageEntityCashtag': types.MessageEntityCashtag,
247 |     'MessageEntityBotCommand': types.MessageEntityBotCommand,
248 |     'MessageEntityEmail': types.MessageEntityEmail,
249 |     'MessageEntityPhone': types.MessageEntityPhone,
250 |     'MessageEntityBlockquote': types.MessageEntityBlockquote,
251 |     'MessageEntitySpoiler': types.MessageEntitySpoiler,
252 |     'MessageEntityCustomEmoji': types.MessageEntityCustomEmoji,
253 | }
254 | 
255 | def dict_to_entity(d):
256 |     """Convert a dict back to a MessageEntity."""
257 |     entity_type = d['_type']
258 |     offset = d['offset']
259 |     length = d['length']
260 | 
261 |     cls = ENTITY_TYPES.get(entity_type)
262 |     if not cls:
263 |         return None
264 | 
265 |     # Handle different constructor signatures
266 |     if entity_type == 'MessageEntityTextUrl':
267 |         return cls(offset, length, d.get('url', ''))
268 |     elif entity_type == 'MessageEntityPre':
269 |         return cls(offset, length, d.get('language', ''))
270 |     elif entity_type == 'MessageEntityMentionName':
271 |         return cls(offset, length, d.get('user_id', 0))
272 |     elif entity_type == 'MessageEntityCustomEmoji':
273 |         return cls(offset, length, d.get('document_id', 0))
274 |     elif entity_type == 'MessageEntityBlockquote':
275 |         return cls(offset, length, collapsed=d.get('collapsed', False))
276 |     else:
277 |         return cls(offset, length)
278 | ```
279 | 
280 | ## Recommended Approach: Native Entities
281 | 
282 | ### Algorithm for Search-Replace with Raw Entities
283 | 
284 | ```python
285 | import re
286 | import copy
287 | from telethon.tl.types import MessageEntityTextUrl
288 | 
289 | def search_replace_with_raw_entities(raw_text, entities, search, replace):
290 |     """
291 |     Replace text in raw_text while adjusting entity offsets/lengths.
292 |     This works with Telegram's native format (raw_text + entities list).
293 | 
294 |     Args:
295 |         raw_text: Plain text without formatting markers
296 |         entities: List of MessageEntity objects
297 |         search: String to find
298 |         replace: String to replace with
299 | 
300 |     Returns:
301 |         tuple: (new_raw_text, new_entities)
302 |     """
303 |     # Deep copy entities
304 |     new_entities = [copy.copy(e) for e in entities]
305 |     new_entities = sorted(new_entities, key=lambda e: e.offset)
306 | 
307 |     new_text = raw_text
308 |     offset_delta = 0
309 | 
310 |     # Find all matches and process from left to right
311 |     for match in re.finditer(re.escape(search), raw_text):
312 |         start = match.start()
313 |         end = match.end()
314 |         len_diff = len(replace) - len(search)
315 | 
316 |         # Apply replacement at adjusted position
317 |         adjusted_start = start + offset_delta
318 |         new_text = new_text[:adjusted_start] + replace + new_text[adjusted_start + len(search):]
319 | 
320 |         # Adjust entities
321 |         for ent in new_entities:
322 |             ent_start = ent.offset
323 |             ent_end = ent.offset + ent.length
324 | 
325 |             # Entity is completely AFTER the replacement
326 |             if ent_start >= end:
327 |                 ent.offset += len_diff
328 | 
329 |             # Entity CONTAINS the replacement
330 |             elif ent_start <= start and ent_end >= end:
331 |                 ent.length += len_diff
332 | 
333 |             # Replacement OVERLAPS entity start
334 |             elif start < ent_start < end:
335 |                 overlap = end - ent_start
336 |                 ent.offset = adjusted_start + len(replace)
337 |                 ent.length = max(0, ent.length - overlap)
338 | 
339 |             # Replacement OVERLAPS entity end
340 |             elif start < ent_end <= end:
341 |                 overlap = ent_end - start
342 |                 ent.length = max(0, ent.length - overlap)
343 | 
344 |         offset_delta += len_diff
345 | 
346 |     # Also replace in URL attributes of TextUrl entities
347 |     for ent in new_entities:
348 |         if isinstance(ent, MessageEntityTextUrl) and ent.url:
349 |             ent.url = ent.url.replace(search, replace)
350 | 
351 |     # Remove zero-length entities
352 |     new_entities = [e for e in new_entities if e.length > 0]
353 | 
354 |     return new_text, new_entities
355 | ```
356 | 
357 | ### Regenerating Markdown After Replacement
358 | 
359 | ```python
360 | from telethon.extensions import markdown
361 | 
362 | def apply_replacement_and_get_markdown(raw_text, entities, search, replace):
363 |     """
364 |     Apply replacement and return both raw format and markdown.
365 |     """
366 |     new_raw, new_entities = search_replace_with_raw_entities(
367 |         raw_text, entities, search, replace
368 |     )
369 | 
370 |     # Generate markdown for display
371 |     new_markdown = markdown.unparse(new_raw, new_entities)
372 | 
373 |     return {
374 |         'raw_text': new_raw,
375 |         'entities': new_entities,
376 |         'text': new_markdown  # For backward compatibility
377 |     }
378 | ```
379 | 
380 | ### Phase 2: Message-by-Message Approval UI
381 | 
382 | ```
383 | ╔══════════════════════════════════════════════════════════════════╗
384 | ║                    MESSAGE #508 - PREVIEW                         ║
385 | ╠══════════════════════════════════════════════════════════════════╣
386 | ║ ORIGINAL:                                                         ║
387 | ║ ─────────                                                         ║
388 | ║ Visit [example.com](https://example.com) for **bold info**.       ║
389 | ║                                                                   ║
390 | ║ AFTER REPLACEMENT (example.com → newsite.org):                   ║
391 | ║ ──────────────────────────────────────────────────                ║
392 | ║ Visit [newsite.org](https://newsite.org) for **bold info**.      ║
393 | ║                                                                   ║
394 | ║ Changes: 2 replacements                                           ║
395 | ╠══════════════════════════════════════════════════════════════════╣
396 | ║ [A]pprove  [S]kip  [Q]uit                                        ║
397 | ╚══════════════════════════════════════════════════════════════════╝
398 | ```
399 | 
400 | ## Edge Cases to Handle
401 | 
402 | ### 1. Replacement Inside Entity
403 | ```
404 | Original: **bold text here**
405 | Search: "text"
406 | Replace: "content"
407 | Result: **bold content here**  ✓
408 | ```
409 | 
410 | ### 2. Replacement Spanning Entity Boundary
411 | ```
412 | Original: **bold** text
413 | Search: "bold text"
414 | Replace: "new"
415 | Result: Tricky - may break formatting ⚠️
416 | Solution: Warn user, require confirmation
417 | ```
418 | 
419 | ### 3. URL in Link Text vs URL Attribute
420 | ```
421 | Original: [example.com](https://example.com/page)
422 | Search: "example.com"
423 | Replace: "newsite.org"
424 | Result: [newsite.org](https://newsite.org/page)  ✓
425 | ```
426 | 
427 | ### 4. Partial URL Match
428 | ```
429 | Original: https://example.com/path
430 | Search: "example"
431 | Replace: "test"
432 | Result: https://test.com/path  ✓
433 | ```
434 | 
435 | ### 5. Case Sensitivity
436 | - Implement case-insensitive search option
437 | - Preserve original case when possible
438 | 
439 | ## Testing Checklist
440 | 
441 | - [ ] Simple text replacement
442 | - [ ] Replacement in bold text
443 | - [ ] Replacement in italic text
444 | - [ ] Replacement in link text
445 | - [ ] Replacement in URL attribute
446 | - [ ] Multiple replacements in one message
447 | - [ ] Nested formatting (bold + italic)
448 | - [ ] Unicode/emoji preservation
449 | - [ ] Empty replacement (deletion)
450 | - [ ] Cyrillic and other non-ASCII text
451 | 
452 | ## References
453 | 
454 | - Telethon Documentation: https://docs.telethon.dev/en/stable/
455 | - Message Entities: https://docs.telethon.dev/en/stable/modules/custom.html
456 | - Markdown Extension: `telethon.extensions.markdown`
457 | - HTML Extension: `telethon.extensions.html` (alternative parser)
458 | 


--------------------------------------------------------------------------------
/src/message_export.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Message export module for individual file export functionality.
  3 | Exports messages as separate text files with media analysis.
  4 | """
  5 | import os
  6 | import logging
  7 | from datetime import datetime
  8 | from typing import Dict, Any, List, Optional
  9 | 
 10 | from src.config import EXPORT_DIR
 11 | from src.channels import get_active_channel
 12 | from src.image_analysis import analyze_image_with_openrouter, analyze_multiple_images
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | def sanitize_filename(filename: str) -> str:
 17 |     """
 18 |     Sanitize a filename by removing or replacing invalid characters.
 19 |     
 20 |     Args:
 21 |         filename: Original filename
 22 |         
 23 |     Returns:
 24 |         Sanitized filename safe for filesystem
 25 |     """
 26 |     # Remove or replace invalid characters
 27 |     invalid_chars = '<>:"/\\|?*'
 28 |     for char in invalid_chars:
 29 |         filename = filename.replace(char, '_')
 30 |     
 31 |     # Replace multiple underscores with single underscore
 32 |     while '__' in filename:
 33 |         filename = filename.replace('__', '_')
 34 |     
 35 |     # Remove trailing dots and spaces
 36 |     filename = filename.rstrip('. ')
 37 |     
 38 |     # Ensure filename is not empty
 39 |     if not filename:
 40 |         filename = 'untitled'
 41 |     
 42 |     # Limit filename length
 43 |     if len(filename) > 200:
 44 |         filename = filename[:200]
 45 |     
 46 |     return filename
 47 | 
 48 | def format_message_content(message: Dict[str, Any], include_media_analysis: bool = True) -> str:
 49 |     """
 50 |     Format message content for export to text file.
 51 |     
 52 |     Args:
 53 |         message: Message dictionary
 54 |         include_media_analysis: Whether to include AI analysis of media
 55 |         
 56 |     Returns:
 57 |         Formatted message content as string
 58 |     """
 59 |     content_lines = []
 60 |     
 61 |     # Message header
 62 |     content_lines.append("=" * 80)
 63 |     content_lines.append(f"Message #{message['id']}")
 64 |     content_lines.append("=" * 80)
 65 |     
 66 |     # Metadata
 67 |     content_lines.append(f"Date: {message['date']}")
 68 |     if message.get('edit_date'):
 69 |         content_lines.append(f"Edited: {message['edit_date']}")
 70 |     
 71 |     if message.get('post_author'):
 72 |         content_lines.append(f"Author: {message['post_author']}")
 73 |     elif message.get('from_id'):
 74 |         content_lines.append(f"From ID: {message['from_id']}")
 75 |     
 76 |     if message.get('views'):
 77 |         content_lines.append(f"Views: {message['views']}")
 78 |     
 79 |     if message.get('forwards'):
 80 |         content_lines.append(f"Forwards: {message['forwards']}")
 81 |     
 82 |     if message.get('reply_to'):
 83 |         content_lines.append(f"Reply to: #{message['reply_to']}")
 84 |     
 85 |     content_lines.append("")
 86 |     
 87 |     # Media analysis section
 88 |     if include_media_analysis and message.get('has_media'):
 89 |         media_analysis = analyze_message_media(message)
 90 |         if media_analysis:
 91 |             content_lines.append("MEDIA ANALYSIS:")
 92 |             content_lines.append("-" * 40)
 93 |             content_lines.append(media_analysis)
 94 |             content_lines.append("")
 95 |     
 96 |     # Message text content
 97 |     if message.get('text'):
 98 |         content_lines.append("MESSAGE CONTENT:")
 99 |         content_lines.append("-" * 40)
100 |         content_lines.append(message['text'])
101 |         content_lines.append("")
102 |     
103 |     # Media information
104 |     if message.get('has_media'):
105 |         content_lines.append("MEDIA INFORMATION:")
106 |         content_lines.append("-" * 40)
107 |         content_lines.append(f"Media Type: {message.get('media_type', 'Unknown')}")
108 |         
109 |         if message.get('media_file_path'):
110 |             content_lines.append(f"File Path: {message['media_file_path']}")
111 |         
112 |         if message.get('grouped_id'):
113 |             content_lines.append(f"Media Group ID: {message['grouped_id']}")
114 |         
115 |         content_lines.append("")
116 |     
117 |     # Reactions
118 |     if message.get('reactions') and len(message['reactions']) > 0:
119 |         content_lines.append("REACTIONS:")
120 |         content_lines.append("-" * 40)
121 |         for reaction in message['reactions']:
122 |             emoji = reaction.get('emoticon') or f"Custom({reaction.get('document_id')})"
123 |             count = reaction.get('count', 0)
124 |             chosen = " (chosen)" if reaction.get('chosen') else ""
125 |             content_lines.append(f"{emoji}: {count}{chosen}")
126 |         content_lines.append("")
127 |     
128 |     # Technical metadata
129 |     content_lines.append("TECHNICAL METADATA:")
130 |     content_lines.append("-" * 40)
131 |     content_lines.append(f"Message ID: {message['id']}")
132 |     content_lines.append(f"Post: {message.get('post', False)}")
133 |     content_lines.append(f"Silent: {message.get('silent', False)}")
134 |     content_lines.append(f"Pinned: {message.get('pinned', False)}")
135 |     content_lines.append(f"No Forwards: {message.get('noforwards', False)}")
136 |     content_lines.append(f"Last Update: {message.get('last_update', 'Unknown')}")
137 |     
138 |     return "\n".join(content_lines)
139 | 
140 | def analyze_message_media(message: Dict[str, Any]) -> Optional[str]:
141 |     """
142 |     Analyze media in a message using AI if applicable.
143 |     
144 |     Args:
145 |         message: Message dictionary
146 |         
147 |     Returns:
148 |         Analysis text or None if no analysis available
149 |     """
150 |     if not message.get('has_media') or not message.get('media_file_path'):
151 |         return None
152 |     
153 |     media_path = message['media_file_path']
154 |     
155 |     # Check if file exists
156 |     if not os.path.exists(media_path):
157 |         return f"[Media file not found: {media_path}]"
158 |     
159 |     # Check if it's an image
160 |     image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'}
161 |     file_extension = os.path.splitext(media_path)[1].lower()
162 |     
163 |     if file_extension in image_extensions:
164 |         try:
165 |             result = analyze_image_with_openrouter(media_path)
166 |             
167 |             if result['success']:
168 |                 return f"[Image was attached: {result['analysis']}]"
169 |             else:
170 |                 return f"[Image analysis failed: {result['error']}]"
171 |         except Exception as e:
172 |             logger.error(f"Error analyzing image {media_path}: {str(e)}")
173 |             return f"[Image analysis error: {str(e)}]"
174 |     else:
175 |         # For non-image media, just mention the type
176 |         media_type = message.get('media_type', 'Unknown')
177 |         return f"[{media_type} was attached: {os.path.basename(media_path)}]"
178 | 
179 | def get_media_group_messages(db: Dict[str, Any], channel_id: str, grouped_id: str) -> List[Dict[str, Any]]:
180 |     """
181 |     Get all messages that belong to the same media group.
182 |     
183 |     Args:
184 |         db: Database dictionary
185 |         channel_id: Channel ID
186 |         grouped_id: Media group ID
187 |         
188 |     Returns:
189 |         List of messages in the same media group
190 |     """
191 |     if 'messages' not in db or channel_id not in db['messages']:
192 |         return []
193 |     
194 |     messages = db['messages'][channel_id]
195 |     group_messages = []
196 |     
197 |     for msg_id, msg in messages.items():
198 |         if msg.get('grouped_id') == grouped_id:
199 |             group_messages.append(msg)
200 |     
201 |     # Sort by message ID
202 |     group_messages.sort(key=lambda x: int(x['id']))
203 |     return group_messages
204 | 
205 | def analyze_media_group(group_messages: List[Dict[str, Any]]) -> Optional[str]:
206 |     """
207 |     Analyze a media group using AI.
208 |     
209 |     Args:
210 |         group_messages: List of messages in the media group
211 |         
212 |     Returns:
213 |         Analysis text or None if no analysis available
214 |     """
215 |     # Collect all image paths from the media group
216 |     image_paths = []
217 |     
218 |     for msg in group_messages:
219 |         if (msg.get('has_media') and 
220 |             msg.get('media_file_path') and 
221 |             os.path.exists(msg['media_file_path'])):
222 |             
223 |             file_extension = os.path.splitext(msg['media_file_path'])[1].lower()
224 |             image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'}
225 |             
226 |             if file_extension in image_extensions:
227 |                 image_paths.append(msg['media_file_path'])
228 |     
229 |     if not image_paths:
230 |         return None
231 |     
232 |     if len(image_paths) == 1:
233 |         # Single image
234 |         try:
235 |             result = analyze_image_with_openrouter(image_paths[0])
236 |             if result['success']:
237 |                 return f"[Image was attached: {result['analysis']}]"
238 |             else:
239 |                 return f"[Image analysis failed: {result['error']}]"
240 |         except Exception as e:
241 |             logger.error(f"Error analyzing single image: {str(e)}")
242 |             return f"[Image analysis error: {str(e)}]"
243 |     else:
244 |         # Multiple images
245 |         try:
246 |             result = analyze_multiple_images(image_paths)
247 |             if result['success']:
248 |                 return f"[Media group with {len(image_paths)} images was attached: {result['analysis']}]"
249 |             else:
250 |                 return f"[Media group analysis failed: {result['error']}]"
251 |         except Exception as e:
252 |             logger.error(f"Error analyzing media group: {str(e)}")
253 |             return f"[Media group analysis error: {str(e)}]"
254 | 
255 | def export_individual_messages(db: Dict[str, Any], include_media_analysis: bool = True) -> Dict[str, Any]:
256 |     """
257 |     Export all messages from active channel as individual text files.
258 |     
259 |     Args:
260 |         db: Database dictionary
261 |         include_media_analysis: Whether to include AI analysis of media
262 |         
263 |     Returns:
264 |         Dict with export results
265 |     """
266 |     active = get_active_channel(db)
267 |     if not active:
268 |         return {
269 |             'success': False,
270 |             'error': 'No active channel selected'
271 |         }
272 |     
273 |     channel_id = str(active['id'])
274 |     if 'messages' not in db or channel_id not in db['messages']:
275 |         return {
276 |             'success': False,
277 |             'error': 'No messages found for active channel'
278 |         }
279 |     
280 |     messages = db['messages'][channel_id]
281 |     if not messages:
282 |         return {
283 |             'success': False,
284 |             'error': 'No messages to export'
285 |         }
286 |     
287 |     # Create export directory for this channel
288 |     channel_name = sanitize_filename(active['title'])
289 |     export_path = os.path.join(EXPORT_DIR, f"{channel_name}_{channel_id}")
290 |     os.makedirs(export_path, exist_ok=True)
291 |     
292 |     # Track processed media groups to avoid duplicates
293 |     processed_groups = set()
294 |     
295 |     exported_count = 0
296 |     skipped_count = 0
297 |     error_count = 0
298 |     
299 |     print(f"\nExporting {len(messages)} messages to individual files...")
300 |     print(f"Export directory: {export_path}")
301 |     print(f"Media analysis: {'Enabled' if include_media_analysis else 'Disabled'}")
302 |     print("-" * 50)
303 |     
304 |     # Sort messages by ID for consistent processing
305 |     sorted_messages = sorted(messages.items(), key=lambda x: int(x[1]['id']))
306 |     
307 |     for msg_id, message in sorted_messages:
308 |         try:
309 |             # Handle media groups
310 |             if message.get('grouped_id'):
311 |                 group_id = message['grouped_id']
312 |                 
313 |                 # Skip if we already processed this group
314 |                 if group_id in processed_groups:
315 |                     skipped_count += 1
316 |                     continue
317 |                 
318 |                 # Get all messages in this group
319 |                 group_messages = get_media_group_messages(db, channel_id, group_id)
320 |                 
321 |                 if group_messages:
322 |                     # Export as a single file for the media group
323 |                     first_message = group_messages[0]
324 |                     
325 |                     # Create filename based on first message
326 |                     date_str = first_message['date'][:10]  # YYYY-MM-DD
327 |                     preview = first_message.get('text', '')[:30] if first_message.get('text') else 'media_group'
328 |                     preview = sanitize_filename(preview)
329 |                     
330 |                     filename = f"msg_{first_message['id']}_{date_str}_{preview}.txt"
331 |                     filepath = os.path.join(export_path, filename)
332 |                     
333 |                     # Combine content from all messages in the group
334 |                     combined_content = []
335 |                     combined_content.append("=" * 80)
336 |                     combined_content.append(f"MEDIA GROUP ({len(group_messages)} messages)")
337 |                     combined_content.append("=" * 80)
338 |                     combined_content.append("")
339 |                     
340 |                     # Add media group analysis if enabled
341 |                     if include_media_analysis:
342 |                         group_analysis = analyze_media_group(group_messages)
343 |                         if group_analysis:
344 |                             combined_content.append("MEDIA GROUP ANALYSIS:")
345 |                             combined_content.append("-" * 40)
346 |                             combined_content.append(group_analysis)
347 |                             combined_content.append("")
348 |                     
349 |                     # Add each message in the group
350 |                     for i, group_msg in enumerate(group_messages):
351 |                         if i > 0:
352 |                             combined_content.append("\n" + "=" * 40)
353 |                             combined_content.append(f"Message #{group_msg['id']} (part of group)")
354 |                             combined_content.append("=" * 40)
355 |                         
356 |                         # Don't include individual media analysis for group messages
357 |                         # as we already have the group analysis
358 |                         msg_content = format_message_content(group_msg, include_media_analysis=False)
359 |                         combined_content.append(msg_content)
360 |                     
361 |                     # Write combined content to file
362 |                     with open(filepath, 'w', encoding='utf-8') as f:
363 |                         f.write('\n'.join(combined_content))
364 |                     
365 |                     exported_count += 1
366 |                     processed_groups.add(group_id)
367 |                     
368 |                     print(f"✓ Exported media group: {filename}")
369 |                 else:
370 |                     error_count += 1
371 |                     print(f"✗ Error: Could not find group messages for group {group_id}")
372 |             else:
373 |                 # Regular message (not in a group)
374 |                 date_str = message['date'][:10]  # YYYY-MM-DD
375 |                 preview = message.get('text', '')[:30] if message.get('text') else 'no_text'
376 |                 preview = sanitize_filename(preview)
377 |                 
378 |                 filename = f"msg_{message['id']}_{date_str}_{preview}.txt"
379 |                 filepath = os.path.join(export_path, filename)
380 |                 
381 |                 # Format message content
382 |                 content = format_message_content(message, include_media_analysis)
383 |                 
384 |                 # Write to file
385 |                 with open(filepath, 'w', encoding='utf-8') as f:
386 |                     f.write(content)
387 |                 
388 |                 exported_count += 1
389 |                 print(f"✓ Exported: {filename}")
390 |         
391 |         except Exception as e:
392 |             error_count += 1
393 |             logger.error(f"Error exporting message {msg_id}: {str(e)}")
394 |             print(f"✗ Error exporting message {msg_id}: {str(e)}")
395 |     
396 |     # Create summary file
397 |     summary_content = []
398 |     summary_content.append("MESSAGE EXPORT SUMMARY")
399 |     summary_content.append("=" * 50)
400 |     summary_content.append(f"Channel: {active['title']}")
401 |     summary_content.append(f"Channel ID: {active['id']}")
402 |     summary_content.append(f"Export Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
403 |     summary_content.append(f"Total Messages in Channel: {len(messages)}")
404 |     summary_content.append(f"Files Exported: {exported_count}")
405 |     summary_content.append(f"Messages Skipped: {skipped_count}")
406 |     summary_content.append(f"Errors: {error_count}")
407 |     summary_content.append(f"Media Analysis: {'Enabled' if include_media_analysis else 'Disabled'}")
408 |     summary_content.append("")
409 |     summary_content.append("Export completed successfully!")
410 |     
411 |     summary_path = os.path.join(export_path, "_export_summary.txt")
412 |     with open(summary_path, 'w', encoding='utf-8') as f:
413 |         f.write('\n'.join(summary_content))
414 |     
415 |     print(f"\n{'-' * 50}")
416 |     print(f"Export completed!")
417 |     print(f"Files exported: {exported_count}")
418 |     print(f"Messages skipped: {skipped_count}")
419 |     print(f"Errors: {error_count}")
420 |     print(f"Export directory: {export_path}")
421 |     
422 |     return {
423 |         'success': True,
424 |         'exported_count': exported_count,
425 |         'skipped_count': skipped_count,
426 |         'error_count': error_count,
427 |         'export_path': export_path,
428 |         'summary_path': summary_path
429 |     }


--------------------------------------------------------------------------------
/src/export.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import logging
  4 | from datetime import datetime
  5 | 
  6 | from src.database import save_database
  7 | from src.message_export import export_individual_messages
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | def get_channel_statistics(db, channel_id):
 12 |     """Get statistics for a channel"""
 13 |     channel_id = str(channel_id)
 14 |     
 15 |     # Count messages
 16 |     messages_count = 0
 17 |     if 'messages' in db and channel_id in db['messages']:
 18 |         messages_count = len(db['messages'][channel_id])
 19 |     
 20 |     # Count media
 21 |     media_count = 0
 22 |     video_count = 0
 23 |     if 'messages' in db and channel_id in db['messages']:
 24 |         for msg_id, msg in db['messages'][channel_id].items():
 25 |             if msg.get('has_media', False):
 26 |                 media_count += 1
 27 |                 if msg.get('media_type') in ['MessageMediaDocument', 'MessageMediaVideo']:
 28 |                     video_count += 1
 29 |     
 30 |     # Count users
 31 |     users_count = 0
 32 |     if 'users' in db and channel_id in db['users']:
 33 |         users_count = len(db['users'][channel_id])
 34 |         
 35 |     return {
 36 |         'messages': messages_count,
 37 |         'media': media_count,
 38 |         'videos': video_count,
 39 |         'users': users_count
 40 |     }
 41 | 
 42 | async def list_users_in_channel(db, channel_id, client=None):
 43 |     """List all users who have messages in a channel"""
 44 |     channel_id = str(channel_id)
 45 |     active_users = {}
 46 |     
 47 |     # Build user dictionary with message counts
 48 |     if 'messages' in db and channel_id in db['messages']:
 49 |         for msg_id, msg in db['messages'][channel_id].items():
 50 |             user_id = msg.get('from_id')
 51 |             if user_id:
 52 |                 if user_id not in active_users:
 53 |                     active_users[user_id] = 0
 54 |                 active_users[user_id] += 1
 55 |     
 56 |     # Get user info
 57 |     users = []
 58 |     for user_id, msg_count in active_users.items():
 59 |         # Try to get user info from database first, or fetch from API if needed
 60 |         user_info = await get_user_info_for_id(client, channel_id, user_id, db) if client else None
 61 |         
 62 |         if user_info:
 63 |             users.append({
 64 |                 'id': user_id,
 65 |                 'username': user_info.get('username', 'Unknown'),
 66 |                 'first_name': user_info.get('first_name', ''),
 67 |                 'last_name': user_info.get('last_name', ''),
 68 |                 'message_count': msg_count
 69 |             })
 70 |         else:
 71 |             # If we couldn't get user info, use placeholder
 72 |             users.append({
 73 |                 'id': user_id,
 74 |                 'username': f'Unknown User ({user_id})',
 75 |                 'first_name': '',
 76 |                 'last_name': '',
 77 |                 'message_count': msg_count
 78 |             })
 79 |     
 80 |     # Sort by message count, descending
 81 |     users.sort(key=lambda x: x['message_count'], reverse=True)
 82 |     return users
 83 | 
 84 | async def format_message_for_export(msg, db, channel_id, client=None):
 85 |     """Format a message for export"""
 86 |     # Get sender information
 87 |     sender_name = "Unknown"
 88 |     user_id = msg.get('from_id')
 89 |     
 90 |     if user_id:
 91 |         # Try to get user info from database or fetch from API if needed
 92 |         user = None
 93 |         if 'users' in db and channel_id in db['users'] and str(user_id) in db['users'][channel_id]:
 94 |             user = db['users'][channel_id][str(user_id)]
 95 |         elif client:
 96 |             user = await get_user_info_for_id(client, channel_id, user_id, db)
 97 |             
 98 |         if user:
 99 |             if user.get('username'):
100 |                 sender_name = f"@{user['username']}"
101 |             else:
102 |                 first_name = user.get('first_name', '')
103 |                 last_name = user.get('last_name', '')
104 |                 sender_name = f"{first_name} {last_name}".strip()
105 |         else:
106 |             sender_name = f"User_{user_id}"
107 |     
108 |     # Format date
109 |     date_str = "Unknown date"
110 |     try:
111 |         date_obj = datetime.strptime(msg.get('date'), "%Y-%m-%d %H:%M:%S%z")
112 |         date_str = date_obj.strftime("%Y-%m-%d %H:%M:%S")
113 |     except (ValueError, TypeError):
114 |         pass
115 |     
116 |     # Check if it's a reply
117 |     reply_text = ""
118 |     if msg.get('reply_to'):
119 |         reply_msg_id = msg.get('reply_to')
120 |         if reply_msg_id and str(reply_msg_id) in db['messages'][channel_id]:
121 |             reply_msg = db['messages'][channel_id][str(reply_msg_id)]
122 |             reply_sender_id = reply_msg.get('from_id')
123 |             reply_sender_name = "Unknown"
124 |             
125 |             if reply_sender_id:
126 |                 # Try to get reply user info
127 |                 reply_user = None
128 |                 if 'users' in db and channel_id in db['users'] and str(reply_sender_id) in db['users'][channel_id]:
129 |                     reply_user = db['users'][channel_id][str(reply_sender_id)]
130 |                 elif client:
131 |                     reply_user = await get_user_info_for_id(client, channel_id, reply_sender_id, db)
132 |                     
133 |                 if reply_user:
134 |                     if reply_user.get('username'):
135 |                         reply_sender_name = f"@{reply_user['username']}"
136 |                     else:
137 |                         first = reply_user.get('first_name', '')
138 |                         last = reply_user.get('last_name', '')
139 |                         reply_sender_name = f"{first} {last}".strip()
140 |                 else:
141 |                     reply_sender_name = f"User_{reply_sender_id}"
142 |             
143 |             reply_content = reply_msg.get('text', '')
144 |             if len(reply_content) > 50:
145 |                 reply_content = reply_content[:47] + "..."
146 |             
147 |             reply_text = f"[Replying to {reply_sender_name}: \"{reply_content}\"]\n"
148 |     
149 |     # Build message text
150 |     media_text = ""
151 |     if msg.get('has_media'):
152 |         media_type = msg.get('media_type', 'Unknown media')
153 |         media_text = f"[{media_type}]\n"
154 |     
155 |     views = msg.get('views', 0)
156 |     forwards = msg.get('forwards', 0)
157 |     
158 |     # Format reactions if any
159 |     reactions_text = ""
160 |     if msg.get('reactions') and len(msg.get('reactions')) > 0:
161 |         reactions = []
162 |         for reaction in msg.get('reactions'):
163 |             emoji = reaction.get('reaction', '👍')
164 |             count = reaction.get('count', 1)
165 |             reactions.append(f"{emoji} {count}")
166 |         
167 |         reactions_text = f" [Reactions: {', '.join(reactions)}]"
168 |     
169 |     stats_text = ""
170 |     if views or forwards:
171 |         stats_items = []
172 |         if views:
173 |             stats_items.append(f"{views} views")
174 |         if forwards:
175 |             stats_items.append(f"{forwards} forwards")
176 |         stats_text = f" [{', '.join(stats_items)}]"
177 |     
178 |     formatted_msg = (
179 |         f"[{date_str}] {sender_name}:{stats_text}{reactions_text}\n"
180 |         f"{reply_text}{media_text}{msg.get('text', '')}\n\n"
181 |     )
182 |     
183 |     return formatted_msg
184 | 
185 | async def export_channel_messages(db, channel_id, channel_title, export_dir="exports", client=None):
186 |     """Export all messages from a channel to a text file"""
187 |     channel_id = str(channel_id)
188 |     
189 |     # Create export directory if it doesn't exist
190 |     if not os.path.exists(export_dir):
191 |         os.makedirs(export_dir)
192 |     
193 |     # Sanitize channel title for filename
194 |     safe_title = ''.join(c if c.isalnum() or c in [' ', '-', '_'] else '_' for c in channel_title)
195 |     safe_title = safe_title.strip().replace(' ', '_')
196 |     
197 |     # Create filename with channel ID and sanitized title
198 |     timestamp = time.strftime("%Y%m%d_%H%M%S")
199 |     filename = f"{export_dir}/{channel_id}_{safe_title}_{timestamp}.txt"
200 |     
201 |     # Check if we have messages for this channel
202 |     if 'messages' not in db or channel_id not in db['messages'] or not db['messages'][channel_id]:
203 |         print(f"No messages found for channel {channel_title}")
204 |         return None
205 |     
206 |     # Sort messages by date
207 |     messages = []
208 |     for msg_id, msg in db['messages'][channel_id].items():
209 |         try:
210 |             date_str = msg.get('date')
211 |             if date_str:
212 |                 date_obj = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S%z")
213 |                 messages.append((date_obj, msg))
214 |         except (ValueError, TypeError):
215 |             # If date parsing fails, append to the end
216 |             messages.append((datetime.max, msg))
217 |     
218 |     messages.sort(key=lambda x: x[0])
219 |     
220 |     # Write messages to file
221 |     message_count = 0
222 |     with open(filename, 'w', encoding='utf-8') as f:
223 |         # Write header
224 |         f.write(f"Export of channel: {channel_title} (ID: {channel_id})\n")
225 |         f.write(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
226 |         f.write(f"Total messages: {len(messages)}\n")
227 |         f.write("-" * 80 + "\n\n")
228 |         
229 |         # Write messages
230 |         for _, msg in messages:
231 |             formatted_msg = await format_message_for_export(msg, db, channel_id, client)
232 |             f.write(formatted_msg)
233 |             message_count += 1
234 |             
235 |             # Print progress every 100 messages
236 |             if message_count % 100 == 0:
237 |                 print(f"Exported {message_count}/{len(messages)} messages...")
238 |     
239 |     print(f"\nExport complete: {message_count} messages exported to {filename}")
240 |     return filename
241 | 
242 | async def export_user_messages(db, channel_id, channel_title, user_id, export_dir="exports", client=None):
243 |     """Export messages from a specific user in a channel"""
244 |     channel_id = str(channel_id)
245 |     user_id = str(user_id)
246 |     
247 |     # Get user info - try to fetch from API if not in database
248 |     user_info = await get_user_info_for_id(client, channel_id, user_id, db) if client else None
249 |     username = f"user_{user_id}"
250 |     
251 |     if user_info:
252 |         if user_info.get('username'):
253 |             username = user_info.get('username')
254 |         else:
255 |             first = user_info.get('first_name', '')
256 |             last = user_info.get('last_name', '')
257 |             if first or last:
258 |                 username = f"{first}_{last}".strip('_')
259 |     
260 |     # Create export directory if it doesn't exist
261 |     if not os.path.exists(export_dir):
262 |         os.makedirs(export_dir)
263 |     
264 |     # Sanitize channel title and username for filename
265 |     safe_title = ''.join(c if c.isalnum() or c in [' ', '-', '_'] else '_' for c in channel_title)
266 |     safe_title = safe_title.strip().replace(' ', '_')
267 |     
268 |     safe_username = ''.join(c if c.isalnum() or c in ['-', '_'] else '_' for c in username)
269 |     
270 |     # Create filename
271 |     timestamp = time.strftime("%Y%m%d_%H%M%S")
272 |     filename = f"{export_dir}/{channel_id}_{safe_title}_{safe_username}_{timestamp}.txt"
273 |     
274 |     # Check if we have messages for this channel
275 |     if 'messages' not in db or channel_id not in db['messages'] or not db['messages'][channel_id]:
276 |         print(f"No messages found for channel {channel_title}")
277 |         return None
278 |     
279 |     # Filter and sort messages by the specific user
280 |     user_messages = []
281 |     for msg_id, msg in db['messages'][channel_id].items():
282 |         if str(msg.get('from_id')) == user_id:
283 |             try:
284 |                 date_str = msg.get('date')
285 |                 if date_str:
286 |                     date_obj = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S%z")
287 |                     user_messages.append((date_obj, msg))
288 |             except (ValueError, TypeError):
289 |                 # If date parsing fails, append to the end
290 |                 user_messages.append((datetime.max, msg))
291 |     
292 |     if not user_messages:
293 |         print(f"No messages found for user {username} in channel {channel_title}")
294 |         return None
295 |     
296 |     user_messages.sort(key=lambda x: x[0])
297 |     
298 |     # Write messages to file
299 |     message_count = 0
300 |     with open(filename, 'w', encoding='utf-8') as f:
301 |         # Write header
302 |         f.write(f"Export of messages by ")
303 |         if user_info:
304 |             if user_info.get('username'):
305 |                 f.write(f"@{user_info['username']}")
306 |             else:
307 |                 f.write(f"{user_info.get('first_name', '')} {user_info.get('last_name', '')}".strip())
308 |         else:
309 |             f.write(f"User ID: {user_id}")
310 |         
311 |         f.write(f" in channel: {channel_title} (ID: {channel_id})\n")
312 |         f.write(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
313 |         f.write(f"Total messages: {len(user_messages)}\n")
314 |         f.write("-" * 80 + "\n\n")
315 |         
316 |         # Write messages
317 |         for _, msg in user_messages:
318 |             formatted_msg = await format_message_for_export(msg, db, channel_id, client)
319 |             f.write(formatted_msg)
320 |             message_count += 1
321 |             
322 |             # Print progress every 100 messages
323 |             if message_count % 100 == 0:
324 |                 print(f"Exported {message_count}/{len(user_messages)} messages...")
325 |     
326 |     print(f"\nExport complete: {message_count} messages exported to {filename}")
327 |     return filename
328 | 
329 | async def get_user_info_for_id(client, channel_id, user_id, db):
330 |     """
331 |     Fetch user information for a given user ID from the channel
332 |     Try to find it in the database first, if not found, fetch from Telegram API
333 |     """
334 |     user_id = str(user_id)
335 |     
336 |     # Check if user is already in the database
337 |     if 'users' in db and channel_id in db['users'] and user_id in db['users'][channel_id]:
338 |         return db['users'][channel_id][user_id]
339 |     
340 |     # User not found in database, fetch from API if client is provided
341 |     if client:
342 |         try:
343 |             # Initialize users dict if needed
344 |             if 'users' not in db:
345 |                 db['users'] = {}
346 |             if channel_id not in db['users']:
347 |                 db['users'][channel_id] = {}
348 |                 
349 |             # Try to get user info from Telegram
350 |             entity = await client.get_entity(int(user_id))
351 |             
352 |             # Save user info
353 |             user_dict = {
354 |                 'id': entity.id,
355 |                 'username': entity.username,
356 |                 'first_name': entity.first_name,
357 |                 'last_name': entity.last_name,
358 |                 'phone': getattr(entity, 'phone', None),
359 |                 'bot': getattr(entity, 'bot', False),
360 |                 'scam': getattr(entity, 'scam', False),
361 |                 'fake': getattr(entity, 'fake', False),
362 |                 'premium': getattr(entity, 'premium', False),
363 |                 'verified': getattr(entity, 'verified', False),
364 |                 'restricted': getattr(entity, 'restricted', False),
365 |                 'first_seen': str(datetime.now()),
366 |                 'last_seen': str(datetime.now())
367 |             }
368 |             
369 |             # Save to database
370 |             db['users'][channel_id][user_id] = user_dict
371 |             logger.info(f"Added new user {entity.id} to database")
372 |             
373 |             return user_dict
374 |         except Exception as e:
375 |             logger.error(f"Error fetching user info for {user_id}: {str(e)}")
376 |             return None
377 |     
378 |     return None
379 | 
380 | async def export_menu(db, client=None):
381 |     """Display export options menu"""
382 |     if not db.get('active_channel'):
383 |         print("\nNo active channel selected! Please select a channel first.")
384 |         return
385 |     
386 |     active = db['active_channel']
387 |     channel_id = str(active['id'])
388 |     channel_title = active['title']
389 |     
390 |     # Get statistics for the channel
391 |     stats = get_channel_statistics(db, channel_id)
392 |     
393 |     print(f"\nExport options for channel: {channel_title}")
394 |     print("-" * 50)
395 |     print(f"Messages: {stats['messages']}")
396 |     print(f"Media files: {stats['media']}")
397 |     print(f"Videos: {stats['videos']}")
398 |     print(f"Users: {stats['users']}")
399 |     print("-" * 50)
400 |     
401 |     # Check if we have user data
402 |     if stats['users'] == 0 and stats['messages'] > 0:
403 |         print("\nWarning: No user data found for this channel.")
404 |         print("User information may be incomplete in exports.")
405 |         print("Consider saving channel users first (option 5 in main menu).")
406 |     
407 |     print("\nExport Options:")
408 |     print("1. Export all messages")
409 |     print("2. Export messages from a specific user")
410 |     print("3. Export individual message files with AI media analysis")
411 |     print("0. Cancel")
412 |     
413 |     choice = input("\nEnter your choice (0-3): ")
414 |     
415 |     if choice == '1':
416 |         # Export all messages
417 |         print(f"\nExporting all messages from {channel_title}...")
418 |         await export_channel_messages(db, channel_id, channel_title, client=client)
419 |     elif choice == '2':
420 |         # List users and export messages from a specific user
421 |         users = await list_users_in_channel(db, channel_id, client=client)
422 |         
423 |         if not users:
424 |             print("\nNo users found in this channel!")
425 |             return
426 |         
427 |         print("\nUsers in this channel:")
428 |         print("-" * 50)
429 |         for i, user in enumerate(users[:30], 1):  # Show top 30 users by message count
430 |             username = user['username'] or f"{user['first_name']} {user['last_name']}"
431 |             print(f"{i}. {username} - {user['message_count']} messages")
432 |         
433 |         if len(users) > 30:
434 |             print(f"... and {len(users) - 30} more users")
435 |         
436 |         print("0. Cancel")
437 |         
438 |         user_choice = input("\nEnter user number to export their messages (or 0 to cancel): ")
439 |         
440 |         if user_choice == '0':
441 |             return
442 |         
443 |         try:
444 |             user_index = int(user_choice)
445 |             if 1 <= user_index <= len(users):
446 |                 selected_user = users[user_index - 1]
447 |                 print(f"\nExporting messages from {selected_user['username']}...")
448 |                 await export_user_messages(db, channel_id, channel_title, selected_user['id'], client=client)
449 |             else:
450 |                 print("\nInvalid user number!")
451 |         except ValueError:
452 |             print("\nPlease enter a valid number!")
453 |     elif choice == '3':
454 |         # Export individual message files with AI analysis
455 |         print(f"\nExporting individual message files from {channel_title}...")
456 |         print("This will create a separate text file for each message/media group.")
457 |         
458 |         # Check if OpenRouter API key is configured
459 |         # Load environment variables fresh to catch any updates
460 |         from dotenv import load_dotenv
461 |         load_dotenv()
462 |         import os
463 |         api_key = os.getenv('OPENROUTER_API_KEY')
464 |         if api_key:
465 |             print("✓ OpenRouter API key found - AI image analysis will be included")
466 |             include_analysis = True
467 |         else:
468 |             print("⚠ OpenRouter API key not found - AI image analysis will be disabled")
469 |             print("To enable AI analysis, set OPENROUTER_API_KEY environment variable")
470 |             include_analysis = False
471 |         
472 |         confirm = input(f"\nProceed with individual file export? (y/N): ").lower()
473 |         if confirm == 'y':
474 |             result = export_individual_messages(db, include_media_analysis=include_analysis)
475 |             if result['success']:
476 |                 print(f"\n✓ Export completed successfully!")
477 |                 print(f"Files exported: {result['exported_count']}")
478 |                 print(f"Export location: {result['export_path']}")
479 |             else:
480 |                 print(f"\n✗ Export failed: {result['error']}")
481 |         else:
482 |             print("\nExport cancelled.")
483 |     elif choice == '0':
484 |         return
485 |     else:
486 |         print("\nInvalid option!")


--------------------------------------------------------------------------------
/src/media.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Media management module.
  3 | Handles downloading and managing media content from messages.
  4 | """
  5 | import os
  6 | import logging
  7 | import asyncio
  8 | import random
  9 | from datetime import datetime
 10 | from telethon.errors import FloodWaitError, ServerError, TimedOutError
 11 | 
 12 | from src.config import (
 13 |     VIDEO_TEMP_DIR, MEDIA_DOWNLOAD_TIMEOUT, MEDIA_DOWNLOAD_RETRY,
 14 |     MEDIA_RETRY_DELAY_BASE, MEDIA_DOWNLOAD_DELAY,
 15 |     CHUNK_SIZE
 16 | )
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | async def download_media_safely(client, message, filename, file_size=None):
 21 |     """
 22 |     Enhanced media download method with safety features:
 23 |     - Timeout handling
 24 |     - Chunked downloads for large files
 25 |     - Retry mechanism with exponential backoff
 26 |     - Detailed error reporting
 27 |     
 28 |     Args:
 29 |         client: Telegram client
 30 |         message: Message containing media
 31 |         filename: Base filename to save as
 32 |         file_size: Size of the file in bytes if known
 33 |         
 34 |     Returns:
 35 |         dict: Result with 'success', 'file_path', and optional 'error' fields
 36 |     """
 37 |     result = {
 38 |         'success': False,
 39 |         'file_path': None,
 40 |         'error': None
 41 |     }
 42 |     
 43 |     # Get target file path
 44 |     target_path = os.path.join(VIDEO_TEMP_DIR, filename)
 45 |     
 46 |     # Determine if file is large (over 10MB)
 47 |     is_large_file = False
 48 |     if file_size and file_size > 10 * 1024 * 1024:  # > 10MB
 49 |         is_large_file = True
 50 |         print(f"Large file detected ({file_size/(1024*1024):.2f} MB). Using chunked download.")
 51 |     
 52 |     # For retry mechanism
 53 |     retry_count = 0
 54 |     max_retries = MEDIA_DOWNLOAD_RETRY
 55 |     
 56 |     # Initialize start_time variable for both large and small files
 57 |     start_time = datetime.now()
 58 |     
 59 |     # Try to download with retries
 60 |     while retry_count <= max_retries:
 61 |         try:
 62 |             if retry_count > 0:
 63 |                 # Calculate backoff delay with jitter to avoid thundering herd
 64 |                 backoff_time = MEDIA_RETRY_DELAY_BASE * (2 ** (retry_count - 1))
 65 |                 # Add jitter (±25%)
 66 |                 jitter = random.uniform(0.75, 1.25)
 67 |                 delay = backoff_time * jitter
 68 |                 print(f"Retry {retry_count}/{max_retries} after {delay:.1f} seconds...")
 69 |                 await asyncio.sleep(delay)
 70 |             
 71 |             # Reset start_time for each attempt
 72 |             start_time = datetime.now()
 73 |             
 74 |             if is_large_file:
 75 |                 # Use chunked download for large files
 76 |                 print(f"Starting chunked download with {CHUNK_SIZE/1024:.0f}KB chunks...")
 77 |                 
 78 |                 # Use a custom progress callback
 79 |                 last_update_time = start_time
 80 |                 bytes_downloaded = 0
 81 |                 
 82 |                 # Progress callback function
 83 |                 def progress_callback(downloaded_bytes, total_bytes):
 84 |                     nonlocal bytes_downloaded, last_update_time
 85 |                     bytes_downloaded = downloaded_bytes
 86 |                     
 87 |                     # Only update display every second
 88 |                     current_time = datetime.now()
 89 |                     if (current_time - last_update_time).total_seconds() >= 1:
 90 |                         # Calculate speed
 91 |                         elapsed = (current_time - start_time).total_seconds()
 92 |                         speed = downloaded_bytes / elapsed if elapsed > 0 else 0
 93 |                         
 94 |                         # Format sizes
 95 |                         downloaded_mb = downloaded_bytes / (1024 * 1024)
 96 |                         total_mb = total_bytes / (1024 * 1024) if total_bytes else 0
 97 |                         speed_kbps = speed / 1024
 98 |                         
 99 |                         # Calculate percentage
100 |                         percent = (downloaded_bytes / total_bytes * 100) if total_bytes else 0
101 |                         
102 |                         # Calculate ETA
103 |                         if speed > 0 and total_bytes:
104 |                             remaining_bytes = total_bytes - downloaded_bytes
105 |                             eta_seconds = remaining_bytes / speed
106 |                             eta_str = str(datetime.fromtimestamp(eta_seconds) - datetime.fromtimestamp(0))
107 |                             eta_str = eta_str.split('.')[0]  # Remove microseconds
108 |                         else:
109 |                             eta_str = "unknown"
110 |                         
111 |                         print(f"\rProgress: {downloaded_mb:.2f}/{total_mb:.2f} MB " + 
112 |                               f"({percent:.1f}%) at {speed_kbps:.1f} KB/s - ETA: {eta_str}", 
113 |                               end='')
114 |                         
115 |                         last_update_time = current_time
116 |                     
117 |                     return True  # Continue download
118 |                 
119 |                 # Attempt the chunked download with timeout
120 |                 file_path = await asyncio.wait_for(
121 |                     client.download_media(
122 |                         message.media,
123 |                         file=target_path,
124 |                         progress_callback=progress_callback
125 |                     ),
126 |                     timeout=MEDIA_DOWNLOAD_TIMEOUT
127 |                 )
128 |                 
129 |                 print()  # New line after progress output
130 |                 
131 |             else:
132 |                 # Regular download for smaller files
133 |                 file_path = await asyncio.wait_for(
134 |                     client.download_media(
135 |                         message.media,
136 |                         file=target_path
137 |                     ),
138 |                     timeout=MEDIA_DOWNLOAD_TIMEOUT
139 |                 )
140 |             
141 |             # Success!
142 |             if file_path:
143 |                 # Verify downloaded file
144 |                 if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
145 |                     downloaded_size = os.path.getsize(file_path)
146 |                     if file_size and abs(downloaded_size - file_size) > 1024:  # Allow 1KB difference
147 |                         # File size mismatch, file may be incomplete
148 |                         print(f"Warning: File size mismatch. Expected: {file_size}, Got: {downloaded_size}")
149 |                         logger.warning(f"File size mismatch for {file_path}. Expected: {file_size}, Got: {downloaded_size}")
150 |                     
151 |                     # Calculate download speed
152 |                     elapsed = (datetime.now() - start_time).total_seconds()
153 |                     speed_mbps = (downloaded_size / (1024 * 1024)) / elapsed if elapsed > 0 else 0
154 |                     print(f"Download completed: {downloaded_size/(1024*1024):.2f} MB in {elapsed:.1f} seconds ({speed_mbps:.2f} MB/s)")
155 |                     
156 |                     result['success'] = True
157 |                     result['file_path'] = file_path
158 |                     return result
159 |                 else:
160 |                     # File doesn't exist or is empty
161 |                     result['error'] = "Download failed - file is empty or missing"
162 |                     if os.path.exists(file_path):
163 |                         os.remove(file_path)  # Clean up empty file
164 |                     # Continue to retry
165 |             
166 |             else:
167 |                 result['error'] = "Download returned None"
168 |                 # Continue to retry
169 |         
170 |         except asyncio.TimeoutError:
171 |             result['error'] = f"Download timed out after {MEDIA_DOWNLOAD_TIMEOUT} seconds"
172 |             print(f"Timeout error: {result['error']}")
173 |             logger.warning(f"Download timeout for {filename}")
174 |             # Continue to next retry
175 |         
176 |         except FloodWaitError as e:
177 |             wait_time = e.seconds
178 |             result['error'] = f"Rate limit exceeded! Required to wait {wait_time} seconds"
179 |             print(f"Flood wait error: Need to wait {wait_time} seconds")
180 |             logger.warning(f"FloodWaitError: {wait_time} seconds wait required")
181 |             
182 |             # This is a special case - wait the required time then retry
183 |             await asyncio.sleep(wait_time)
184 |             continue  # Skip retry increment
185 |         
186 |         except (ServerError, TimedOutError) as e:
187 |             result['error'] = f"Telegram server error: {str(e)}"
188 |             print(f"Server error: {result['error']}")
189 |             logger.warning(f"Server error during download: {str(e)}")
190 |             # Continue to retry
191 |         
192 |         except ConnectionError as e:
193 |             result['error'] = f"Connection error: {str(e)}"
194 |             print(f"Connection error: {result['error']}")
195 |             logger.warning(f"Connection error during download: {str(e)}")
196 |             # Continue to retry
197 |         
198 |         except OSError as e:
199 |             result['error'] = f"OS error: {str(e)}"
200 |             print(f"OS error: {result['error']}")
201 |             logger.warning(f"OS error during download: {str(e)}")
202 |             # Continue to retry
203 |         
204 |         except Exception as e:
205 |             result['error'] = f"Unexpected error: {str(e)}"
206 |             print(f"Unexpected error: {result['error']}")
207 |             logger.error(f"Unexpected error during download: {str(e)}", exc_info=True)
208 |             # Continue to retry
209 |         
210 |         # Increment retry counter
211 |         retry_count += 1
212 |     
213 |     # If we get here, all retries failed
214 |     print(f"All {max_retries} download attempts failed")
215 |     return result
216 | 
217 | async def download_video_messages(client, db, db_path, limit=None, round_videos_only=False, video_dir=None):
218 |     """
219 |     Download video messages (video circles or regular videos) from active channel
220 |     
221 |     Args:
222 |         client: Telegram client
223 |         db: Database
224 |         db_path: Path to database file
225 |         limit: Maximum number of videos to download (None for all)
226 |         round_videos_only: If True, download only round videos (video circles/video messages), 
227 |                            otherwise download all videos
228 |         video_dir: Custom directory to save videos (default: temp/videos)
229 |     
230 |     Returns:
231 |         bool: True if download was successful, False otherwise
232 |     """
233 |     from telethon.tl.types import InputMessagesFilterRoundVideo, InputMessagesFilterVideo
234 |     from src.channels import get_active_channel
235 |     from src.database import save_database
236 |     
237 |     active = get_active_channel(db)
238 |     if not active:
239 |         print("\nNo active channel selected!")
240 |         return False
241 |     
242 |     try:
243 |         # Set up download directory
244 |         if video_dir:
245 |             download_dir = video_dir
246 |             os.makedirs(download_dir, exist_ok=True)
247 |         else:
248 |             download_dir = VIDEO_TEMP_DIR
249 |         
250 |         print("\n" + "="*50)
251 |         print(f"Downloading {'round' if round_videos_only else 'all'} videos from channel: {active['title']}")
252 |         print("="*50)
253 |         
254 |         # Initialize video messages list in the database
255 |         channel_id = str(active['id'])
256 |         if 'videos' not in db:
257 |             db['videos'] = {}
258 |         if channel_id not in db['videos']:
259 |             db['videos'][channel_id] = {}
260 |         
261 |         # Counters for tracking progress
262 |         downloaded = 0
263 |         skipped = 0
264 |         errors = 0
265 |         retry_count = 0
266 |         
267 |         # Progress tracking
268 |         start_time = datetime.now()
269 |         last_save_time = start_time
270 |         
271 |         # Define the video filter
272 |         video_filter = InputMessagesFilterRoundVideo() if round_videos_only else InputMessagesFilterVideo()
273 |         
274 |         # Get total count of videos (approximate)
275 |         total_count = 0
276 |         try:
277 |             async for _ in client.iter_messages(active['id'], filter=video_filter, limit=1):
278 |                 # Just to get the first message to check if any exist
279 |                 total_count = 1
280 |             print(f"Scanning for {'round' if round_videos_only else 'all'} videos in the channel...")
281 |         except Exception as e:
282 |             print(f"Error checking for videos: {e}")
283 |             return False
284 |         
285 |         # If no videos found
286 |         if total_count == 0:
287 |             print("\nNo videos found in this channel!")
288 |             return False
289 |         
290 |         # Get videos from newest to oldest
291 |         async for message in client.iter_messages(active['id'], filter=video_filter, limit=limit):
292 |             try:
293 |                 msg_id = str(message.id)
294 |                 video_info = {
295 |                     'id': message.id,
296 |                     'date': str(message.date),
297 |                     'from_id': message.from_id.user_id if message.from_id else None,
298 |                     'media_type': type(message.media).__name__ if message.media else None,
299 |                     'file_path': None,
300 |                     'download_date': None,
301 |                     'file_size': None,
302 |                     'duration': getattr(message.media.document, 'duration', None) if message.media else None,
303 |                     'mime_type': getattr(message.media.document, 'mime_type', None) if message.media else None,
304 |                     'size': getattr(message.media.document, 'size', None) if message.media else None,
305 |                 }
306 |                 
307 |                 # Check if video is already downloaded
308 |                 if msg_id in db['videos'][channel_id] and db['videos'][channel_id][msg_id].get('file_path'):
309 |                     existing_path = db['videos'][channel_id][msg_id]['file_path']
310 |                     if os.path.exists(existing_path):
311 |                         print(f"Video from message #{message.id} already downloaded, skipping...")
312 |                         skipped += 1
313 |                         continue
314 |                 
315 |                 # Create a filename based on message ID and date
316 |                 filename = f"video_{message.id}_{message.date.strftime('%Y%m%d_%H%M%S')}"
317 |                 
318 |                 # Get video file size if available
319 |                 file_size = None
320 |                 if hasattr(message.media, 'document'):
321 |                     file_size = getattr(message.media.document, 'size', None)
322 |                     if file_size:
323 |                         size_mb = file_size / (1024 * 1024)
324 |                         print(f"Video size: {size_mb:.2f} MB")
325 |                 
326 |                 # Download the video using our enhanced method
327 |                 print(f"Downloading video from message #{message.id}...")
328 |                 download_result = await download_media_safely(
329 |                     client=client,
330 |                     message=message,
331 |                     filename=filename,
332 |                     file_size=file_size
333 |                 )
334 |                 
335 |                 if download_result['success']:
336 |                     file_path = download_result['file_path']
337 |                     print(f"Video saved to: {file_path}")
338 |                     
339 |                     # Update video info with download details
340 |                     video_info['file_path'] = file_path
341 |                     video_info['download_date'] = str(datetime.now())
342 |                     video_info['file_size'] = os.path.getsize(file_path) if os.path.exists(file_path) else None
343 |                     
344 |                     # Save to database
345 |                     db['videos'][channel_id][msg_id] = video_info
346 |                     downloaded += 1
347 |                     
348 |                     # Save database periodically
349 |                     current_time = datetime.now()
350 |                     if (current_time - last_save_time).total_seconds() > 300:  # 5 minutes
351 |                         save_database(db_path, db)
352 |                         last_save_time = current_time
353 |                 else:
354 |                     # Handle download failure
355 |                     print(f"Failed to download video: {download_result['error']}")
356 |                     logger.warning(f"Video download failed for message {message.id}: {download_result['error']}")
357 |                     errors += 1
358 |                 
359 |                 # Display progress
360 |                 elapsed = datetime.now() - start_time
361 |                 print(f"Progress: Downloaded: {downloaded}, Skipped: {skipped}, Errors: {errors}")
362 |                 print(f"Elapsed time: {str(elapsed).split('.')[0]}")
363 |                 print(f"Retries due to rate limits: {retry_count}")
364 |                 print("-"*50)
365 |                 
366 |                 # Add delay to avoid rate limits
367 |                 await asyncio.sleep(MEDIA_DOWNLOAD_DELAY)
368 |                 
369 |             except Exception as e:
370 |                 logger.error(f"Error downloading video from message {message.id}: {str(e)}")
371 |                 print(f"Error downloading video from message #{message.id}: {str(e)}")
372 |                 errors += 1
373 |                 continue
374 |         
375 |         # Final save
376 |         save_database(db_path, db)
377 |         
378 |         # Final statistics
379 |         end_time = datetime.now()
380 |         elapsed = end_time - start_time
381 |         
382 |         print("\n" + "="*50)
383 |         print("Download Completed!")
384 |         print("="*50)
385 |         print(f"\nFinal Statistics:")
386 |         print(f"Total videos downloaded: {downloaded}")
387 |         print(f"Videos skipped (already downloaded): {skipped}")
388 |         print(f"Errors: {errors}")
389 |         print(f"Retries due to rate limits: {retry_count}")
390 |         print(f"\nTime Elapsed: {str(elapsed).split('.')[0]}")
391 |         print("="*50)
392 |         
393 |         return True
394 |         
395 |     except Exception as e:
396 |         logger.error(f"Error downloading videos: {e}")
397 |         print(f"\nError downloading videos: {str(e)}")
398 |         return False
399 | 
400 | def list_downloaded_videos(db):
401 |     """
402 |     List all downloaded videos for active channel
403 |     
404 |     Args:
405 |         db: Database
406 |     """
407 |     from src.channels import get_active_channel
408 |     
409 |     active = get_active_channel(db)
410 |     if not active:
411 |         print("\nNo active channel selected!")
412 |         return
413 |         
414 |     channel_id = str(active['id'])
415 |     if 'videos' not in db or channel_id not in db['videos'] or not db['videos'][channel_id]:
416 |         print("\nNo downloaded videos for this channel!")
417 |         return
418 |         
419 |     videos = db['videos'][channel_id]
420 |     total = len(videos)
421 |     
422 |     print("\nDownloaded Videos:")
423 |     print("-" * 80)
424 |     print(f"{'ID':<10} | {'Date':<20} | {'Type':<15} | {'Size':<10} | {'Duration':<10} | {'Path':<30}")
425 |     print("-" * 80)
426 |     
427 |     for video_id, video in sorted(videos.items(), key=lambda x: int(x[0]), reverse=True):
428 |         if not video.get('file_path') or not os.path.exists(video.get('file_path', '')):
429 |             continue
430 |             
431 |         # Format size to KB/MB
432 |         size = "N/A"
433 |         if video.get('file_size'):
434 |             size_bytes = video['file_size']
435 |             if size_bytes < 1024:
436 |                 size = f"{size_bytes} B"
437 |             elif size_bytes < 1024 * 1024:
438 |                 size = f"{size_bytes / 1024:.1f} KB"
439 |             else:
440 |                 size = f"{size_bytes / (1024 * 1024):.1f} MB"
441 |                 
442 |         # Format duration to mm:ss
443 |         duration = "N/A"
444 |         if video.get('duration'):
445 |             seconds = video['duration']
446 |             minutes = seconds // 60
447 |             seconds = seconds % 60
448 |             duration = f"{minutes:02d}:{seconds:02d}"
449 |             
450 |         # Format date
451 |         date = video.get('date', 'Unknown')
452 |         if date and date != 'Unknown':
453 |             try:
454 |                 # Parse and format date string
455 |                 date_obj = datetime.fromisoformat(date.split('+')[0])
456 |                 date = date_obj.strftime('%Y-%m-%d %H:%M:%S')
457 |             except (ValueError, TypeError):
458 |                 pass
459 |                 
460 |         # Format path
461 |         path = video.get('file_path', 'Unknown')
462 |         if len(path) > 30:
463 |             path = "..." + path[-27:]
464 |             
465 |         # Get media type (video type)
466 |         media_type = video.get('media_type', 'Unknown')
467 |         if media_type == 'MessageMediaDocument':
468 |             mime_type = video.get('mime_type', '')
469 |             if 'video' in mime_type:
470 |                 if 'round' in mime_type:
471 |                     media_type = 'Video Circle'
472 |                 else:
473 |                     media_type = 'Video'
474 |                     
475 |         print(f"{video['id']:<10} | {date:<20} | {media_type:<15} | {size:<10} | {duration:<10} | {path:<30}")
476 |         
477 |     print("-" * 80)
478 |     print(f"Total Videos: {total}") 


--------------------------------------------------------------------------------