├── tests ├── core │ ├── __init__.py │ └── ocr │ │ ├── __init__.py │ │ ├── test_dummy_ocr_engine.py │ │ ├── unitest_echo_ocr_engine.py │ │ └── test_ocr_engine_factory.py ├── exporter │ └── __init__.py ├── finder │ ├── __init__.py │ └── test_all_messages_finder.py ├── modules │ ├── __init__.py │ ├── telegram_maintenance │ │ └── __init__.py │ ├── telegram_report_generator │ │ └── __init__.py │ ├── test_state_file_handler.py │ ├── common.py │ ├── test_input_args_handler.py │ └── test_telegram_groups_list.py ├── notifier │ ├── __init__.py │ └── test_notifier_engine.py ├── resources │ ├── 1645024499642.txt │ ├── mat.pdf │ ├── demo.apk │ ├── unknow.mp4 │ ├── sticker.webp │ ├── AnimatedSticker.tgs │ ├── 122761750_387013276008970_8208112669996447119_n.jpg │ └── expected_generated_file_content │ │ ├── test_pandas_rolling_exporter_json_expected_15558987453_202311221005.data │ │ └── test_pandas_rolling_exporter_csv_expected_15558987453_202311221007.data ├── pytest.ini ├── test_TEx.py ├── __init__.py ├── logging.conf ├── config.ini ├── report_templates │ ├── default_index.html │ └── default_report.html └── unittest_configfile.config ├── TEx ├── __init__.py ├── py.typed ├── core │ ├── ocr │ │ ├── __init__.py │ │ ├── ocr_engine_base.py │ │ ├── dummy_ocr_engine.py │ │ ├── ocr_engine_factory.py │ │ └── tesseract_ocr_engine.py │ ├── __init__.py │ ├── mapper │ │ ├── __init__.py │ │ ├── telethon_user_mapper.py │ │ ├── keep_alive_entity_mapper.py │ │ ├── telethon_message_mapper.py │ │ └── telethon_channel_mapper.py │ ├── media_download_handling │ │ ├── __init__.py │ │ ├── do_nothing_media_downloader.py │ │ ├── photo_media_downloader.py │ │ └── std_media_downloader.py │ ├── media_metadata_handling │ │ ├── __init__.py │ │ ├── do_nothing_media_handler.py │ │ ├── geo_handler.py │ │ ├── photo_handler.py │ │ ├── pdf_handler.py │ │ ├── text_handler.py │ │ ├── generic_binary_handler.py │ │ ├── sticker_handler.py │ │ ├── mp4_handler.py │ │ └── webimage_handler.py │ ├── dir_manager.py │ ├── base_module.py │ ├── state_file.py │ └── temp_file.py ├── models │ ├── __init__.py │ ├── facade │ │ ├── __init__.py │ │ ├── signal_entity_model.py │ │ ├── signal_notification_model.py │ │ ├── finder_notification_facade_entity.py │ │ ├── media_handler_facade_entity.py │ │ ├── telegram_message_report_facade_entity.py │ │ └── telegram_group_report_facade_entity.py │ └── database │ │ ├── __init__.py │ │ └── temp_db_models.py ├── modules │ ├── __init__.py │ ├── telegram_maintenance │ │ ├── __init__.py │ │ └── telegram_purge_old_data.py │ ├── telegram_report_generator │ │ ├── __init__.py │ │ └── telegram_report_sent_telegram.py │ ├── temp_file_manager.py │ ├── execution_configuration_handler.py │ ├── database_handler.py │ ├── data_structure_handler.py │ ├── state_file_handler.py │ └── telegram_groups_list.py ├── exporter │ ├── __init__.py │ ├── exporter_base.py │ └── exporter_engine.py ├── finder │ ├── __init__.py │ ├── base_finder.py │ ├── all_messages_finder.py │ └── regex_finder.py ├── notifier │ ├── __init__.py │ ├── notifier_base.py │ ├── notifier_engine.py │ ├── signals_engine.py │ └── elastic_search_notifier.py ├── database │ ├── __init__.py │ ├── db_initializer.py │ ├── db_manager.py │ └── db_migration.py ├── __main__.py ├── logging.conf ├── config.ini └── report_templates │ ├── default_index.html │ └── default_report.html ├── requirements.txt ├── docs ├── requirements.txt ├── media │ ├── auth_required.png │ ├── code_provided.png │ ├── report_stats.png │ ├── how_text_works.png │ ├── export_files_list.png │ ├── html_report_files.png │ ├── html_report_index.png │ ├── text_report_files.png │ ├── html_report_content.png │ ├── text_report_content.png │ └── ocr_tensorflow_tessdata_folder.png ├── secret_chats.md ├── authentication.md ├── contact.md ├── configuration │ ├── proxy.md │ ├── media_download_configuration.md │ ├── basic.md │ ├── media_download_examples.md │ ├── ocr.md │ ├── scenario_based_examples.md │ └── complete_configuration_file_example.md ├── report │ ├── report_status.md │ ├── report_html.md │ ├── report_text.md │ └── report_export_files.md ├── finder │ ├── finder_catchall.md │ ├── finder_regex.md │ └── configuration.md ├── maintenance │ └── purge_old_data.md ├── how_use │ ├── how_to_use_basic.md │ ├── usage_connection.md │ ├── usage_list_groups.md │ ├── usage_load_groups.md │ ├── usage_download_messages.md │ └── usage_message_listener.md ├── notification │ ├── notification_elasticsearch_signals_template.md │ ├── notification_elasticsearch.md │ ├── notification_elasticsearch_index_template.md │ ├── signals.md │ └── notification_discord.md ├── exporting │ └── pandas_rolling.md ├── changelog │ └── v030.md └── index.md ├── coverage.rc ├── .readthedocs.yaml ├── mypy.ini ├── .github └── workflows │ ├── cy.yml │ └── cy_deploy.yml ├── .gitignore ├── tox.ini ├── README.md └── mkdocs.yml /tests/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/ocr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/exporter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/finder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/notifier/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TEx/__init__.py: -------------------------------------------------------------------------------- 1 | """TEx Root.""" 2 | -------------------------------------------------------------------------------- /TEx/py.typed: -------------------------------------------------------------------------------- 1 | # Marker file for PEP 561 -------------------------------------------------------------------------------- /TEx/core/ocr/__init__.py: -------------------------------------------------------------------------------- 1 | """OCR Modules.""" 2 | -------------------------------------------------------------------------------- /TEx/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Models Package.""" 2 | -------------------------------------------------------------------------------- /TEx/modules/__init__.py: -------------------------------------------------------------------------------- 1 | """OSIx Modules.""" 2 | -------------------------------------------------------------------------------- /tests/modules/telegram_maintenance/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TEx/exporter/__init__.py: -------------------------------------------------------------------------------- 1 | """Exporter Modules.""" 2 | -------------------------------------------------------------------------------- /TEx/finder/__init__.py: -------------------------------------------------------------------------------- 1 | """TEx Finder Modules.""" 2 | -------------------------------------------------------------------------------- /TEx/notifier/__init__.py: -------------------------------------------------------------------------------- 1 | """Notifier Modules.""" 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # QA 2 | tox==4.7.0 3 | poetry==1.5.1 -------------------------------------------------------------------------------- /tests/modules/telegram_report_generator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/resources/1645024499642.txt: -------------------------------------------------------------------------------- 1 | FILE CONTENT HERE -------------------------------------------------------------------------------- /TEx/models/facade/__init__.py: -------------------------------------------------------------------------------- 1 | """Facade Objects.""" 2 | -------------------------------------------------------------------------------- /TEx/core/__init__.py: -------------------------------------------------------------------------------- 1 | """OSIx Core Modules and Classes.""" 2 | -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths = 3 | tests 4 | -------------------------------------------------------------------------------- /TEx/core/mapper/__init__.py: -------------------------------------------------------------------------------- 1 | """Centralized Entity Mappers.""" 2 | -------------------------------------------------------------------------------- /TEx/models/database/__init__.py: -------------------------------------------------------------------------------- 1 | """Database Models Module.""" 2 | -------------------------------------------------------------------------------- /TEx/core/media_download_handling/__init__.py: -------------------------------------------------------------------------------- 1 | """Media Download Module.""" 2 | -------------------------------------------------------------------------------- /TEx/modules/telegram_maintenance/__init__.py: -------------------------------------------------------------------------------- 1 | """Maintenance Modules.""" 2 | -------------------------------------------------------------------------------- /TEx/core/media_metadata_handling/__init__.py: -------------------------------------------------------------------------------- 1 | """Telegram Media Handling Files.""" 2 | -------------------------------------------------------------------------------- /TEx/modules/telegram_report_generator/__init__.py: -------------------------------------------------------------------------------- 1 | """Telegram Report Modules.""" 2 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # QA 2 | tox==4.7.0 3 | poetry==1.5.1 4 | mkdocs-material==9.4.2 -------------------------------------------------------------------------------- /tests/resources/mat.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/tests/resources/mat.pdf -------------------------------------------------------------------------------- /tests/resources/demo.apk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/tests/resources/demo.apk -------------------------------------------------------------------------------- /tests/resources/unknow.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/tests/resources/unknow.mp4 -------------------------------------------------------------------------------- /docs/media/auth_required.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/auth_required.png -------------------------------------------------------------------------------- /docs/media/code_provided.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/code_provided.png -------------------------------------------------------------------------------- /docs/media/report_stats.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/report_stats.png -------------------------------------------------------------------------------- /tests/resources/sticker.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/tests/resources/sticker.webp -------------------------------------------------------------------------------- /docs/media/how_text_works.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/how_text_works.png -------------------------------------------------------------------------------- /docs/media/export_files_list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/export_files_list.png -------------------------------------------------------------------------------- /docs/media/html_report_files.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/html_report_files.png -------------------------------------------------------------------------------- /docs/media/html_report_index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/html_report_index.png -------------------------------------------------------------------------------- /docs/media/text_report_files.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/text_report_files.png -------------------------------------------------------------------------------- /docs/media/html_report_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/html_report_content.png -------------------------------------------------------------------------------- /docs/media/text_report_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/text_report_content.png -------------------------------------------------------------------------------- /tests/resources/AnimatedSticker.tgs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/tests/resources/AnimatedSticker.tgs -------------------------------------------------------------------------------- /docs/secret_chats.md: -------------------------------------------------------------------------------- 1 | # A Note About Secret Chats 2 | 3 | Currently, Telegram Explorer do not offer support for Secret Chats. -------------------------------------------------------------------------------- /docs/media/ocr_tensorflow_tessdata_folder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/ocr_tensorflow_tessdata_folder.png -------------------------------------------------------------------------------- /tests/resources/122761750_387013276008970_8208112669996447119_n.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guibacellar/TEx/HEAD/tests/resources/122761750_387013276008970_8208112669996447119_n.jpg -------------------------------------------------------------------------------- /tests/test_TEx.py: -------------------------------------------------------------------------------- 1 | """TEx Main Module Tests.""" 2 | 3 | import unittest 4 | 5 | 6 | class TexTest(unittest.TestCase): 7 | 8 | def test_foo(self): 9 | assert True 10 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | data_dir: str = os.path.join(os.getcwd(), "_data") 4 | db_file: str = os.path.join(data_dir, 'local.db') 5 | 6 | if not os.path.exists(data_dir): 7 | os.mkdir(data_dir) 8 | 9 | if os.path.exists(db_file): 10 | os.remove(db_file) 11 | -------------------------------------------------------------------------------- /TEx/finder/base_finder.py: -------------------------------------------------------------------------------- 1 | """Base Class for All Finders.""" 2 | from __future__ import annotations 3 | 4 | import abc 5 | 6 | 7 | class BaseFinder: 8 | """Base Finder Class.""" 9 | 10 | @abc.abstractmethod 11 | async def find(self, raw_text: str) -> bool: 12 | """Apply Find Logic.""" 13 | -------------------------------------------------------------------------------- /coverage.rc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | 4 | [report] 5 | omit = 6 | *venv*, 7 | *.tox*, 8 | *runner.py, 9 | *__init__.py, 10 | *__main__.py, 11 | *core/chrome_driver_manager.py, 12 | *core/constants.py, 13 | *core/decorator.py, 14 | *core/http_manager.py, 15 | *models/facade/*.py, 16 | -------------------------------------------------------------------------------- /TEx/models/facade/signal_entity_model.py: -------------------------------------------------------------------------------- 1 | """Signal Entity.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict 5 | 6 | from pydantic import BaseModel, ConfigDict 7 | 8 | 9 | class SignalEntity(BaseModel): 10 | """Signal Entity.""" 11 | 12 | model_config = ConfigDict(extra='forbid') 13 | 14 | enabled: bool 15 | keep_alive_interval: int 16 | notifiers: Dict 17 | -------------------------------------------------------------------------------- /docs/authentication.md: -------------------------------------------------------------------------------- 1 | # Authentication 2 | 3 | If you are asked to provide an additional authentication during the first connection, the Telegram Explorer Runner will ask on prompt/tty and waits until you provide the authentication challenge response. 4 | 5 | **Code Request** 6 | ![auth_required.png](media/auth_required.png) 7 | 8 | **Authentication Code sent to Telegram Account** 9 | ![code_provided.png](media/code_provided.png) 10 | -------------------------------------------------------------------------------- /TEx/core/media_metadata_handling/do_nothing_media_handler.py: -------------------------------------------------------------------------------- 1 | """Do Nothing Media Handler.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict, Optional 5 | 6 | from telethon.tl.patched import Message 7 | 8 | 9 | class DoNothingHandler: 10 | """Do Nothing Media Handler.""" 11 | 12 | @staticmethod 13 | def handle_metadata(message: Message) -> Optional[Dict]: 14 | """Handle Media Metadata.""" 15 | return None 16 | -------------------------------------------------------------------------------- /TEx/models/facade/signal_notification_model.py: -------------------------------------------------------------------------------- 1 | """Facade Entities for Signal based Notifications.""" 2 | from __future__ import annotations 3 | 4 | from datetime import datetime 5 | 6 | from pydantic import BaseModel, ConfigDict 7 | 8 | 9 | class SignalNotificationEntityModel(BaseModel): 10 | """Facade Entities for Signal based Notifications.""" 11 | 12 | model_config = ConfigDict(extra='forbid') 13 | 14 | signal: str 15 | date_time: datetime 16 | content: str 17 | -------------------------------------------------------------------------------- /TEx/finder/all_messages_finder.py: -------------------------------------------------------------------------------- 1 | """All Messages Finder.""" 2 | from configparser import SectionProxy 3 | 4 | from TEx.finder.base_finder import BaseFinder 5 | 6 | 7 | class AllMessagesFinder(BaseFinder): 8 | """All Messages Based Finder.""" 9 | 10 | def __init__(self, config: SectionProxy) -> None: 11 | """Initialize All Messages Finder.""" 12 | 13 | async def find(self, raw_text: str) -> bool: 14 | """Find Message. Always Return True.""" 15 | return True 16 | -------------------------------------------------------------------------------- /tests/core/ocr/test_dummy_ocr_engine.py: -------------------------------------------------------------------------------- 1 | """Test the Dummy OCR Engine.""" 2 | 3 | import unittest 4 | 5 | from TEx.core.ocr.dummy_ocr_engine import DummyOcrEngine 6 | from TEx.core.ocr.ocr_engine_base import OcrEngineBase 7 | 8 | 9 | class DummyOcrEngineTest(unittest.TestCase): 10 | 11 | def test_all(self): 12 | """Test Dummy Engine.""" 13 | 14 | target: OcrEngineBase = DummyOcrEngine() 15 | target.configure(config=None) 16 | self.assertIsNone(target.run(file_path='/folder/path')) 17 | -------------------------------------------------------------------------------- /TEx/core/dir_manager.py: -------------------------------------------------------------------------------- 1 | """Directory Manager.""" 2 | 3 | import os 4 | 5 | 6 | class DirectoryManagerUtils: 7 | """Directory Manager.""" 8 | 9 | @staticmethod 10 | def ensure_dir_struct(path: str) -> None: 11 | """Ensure That Directory Exists. 12 | 13 | :param path: 14 | :return: 15 | """ 16 | target_path: str = os.path.abspath(os.path.join(os.getcwd(), path)) 17 | 18 | if not os.path.exists(target_path): 19 | os.makedirs(target_path, exist_ok=True) 20 | -------------------------------------------------------------------------------- /tests/logging.conf: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root,sqlalchemy 3 | 4 | [handlers] 5 | keys=consoleHandler 6 | 7 | [formatters] 8 | keys=simpleFormatter 9 | 10 | [logger_root] 11 | level=DEBUG 12 | handlers=consoleHandler 13 | 14 | [logger_sqlalchemy] 15 | level=ERROR 16 | handlers=consoleHandler 17 | qualname='' 18 | 19 | [handler_consoleHandler] 20 | class=StreamHandler 21 | level=DEBUG 22 | formatter=simpleFormatter 23 | args=(sys.stdout,) 24 | 25 | [formatter_simpleFormatter] 26 | format=%(asctime)s - %(levelname)s - %(message)s 27 | -------------------------------------------------------------------------------- /TEx/core/media_download_handling/do_nothing_media_downloader.py: -------------------------------------------------------------------------------- 1 | """Do Nothing Media Downloader.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict 5 | 6 | from telethon.tl.patched import Message 7 | 8 | 9 | class DoNothingMediaDownloader: 10 | """Do Nothing Media Downloader.""" 11 | 12 | @staticmethod 13 | async def download(message: Message, media_metadata: Dict, data_path: str) -> None: 14 | """Download the Media, Update MetadaInfo and Return the ID from DB Record. 15 | 16 | :param message: 17 | :param media_metadata: 18 | :return: 19 | """ 20 | return 21 | -------------------------------------------------------------------------------- /TEx/core/ocr/ocr_engine_base.py: -------------------------------------------------------------------------------- 1 | """Base Class for OCR Engine.""" 2 | from __future__ import annotations 3 | 4 | import abc 5 | from configparser import SectionProxy 6 | from typing import Optional 7 | 8 | 9 | class OcrEngineBase: 10 | """Base Class for OCR Engine.""" 11 | 12 | def __init__(self) -> None: 13 | """Initialize Base Class.""" 14 | 15 | @abc.abstractmethod 16 | def configure(self, config: Optional[SectionProxy]) -> None: 17 | """Configure Abstract Method.""" 18 | 19 | @abc.abstractmethod 20 | def run(self, file_path: str) -> Optional[str]: 21 | """Extract Text from Image.""" 22 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the OS, Python version and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.8" 13 | # You can also specify other tool versions: 14 | # nodejs: "19" 15 | # rust: "1.64" 16 | # golang: "1.19" 17 | 18 | mkdocs: 19 | configuration: mkdocs.yml 20 | 21 | # Optionally declare the Python requirements required to build your docs 22 | python: 23 | install: 24 | - requirements: docs/requirements.txt -------------------------------------------------------------------------------- /TEx/database/__init__.py: -------------------------------------------------------------------------------- 1 | """Database Module.""" 2 | from cachetools import Cache, TTLCache 3 | 4 | 5 | class NoneSupportedTTLCache(TTLCache): 6 | """Cache Customization to not Save None Values in Memory.""" 7 | 8 | def __setitem__(self, key, value, cache_setitem=Cache.__setitem__) -> None: # type: ignore 9 | """Customize __setitem__ to do not save nullable values.""" 10 | if value: 11 | super().__setitem__(key, value, cache_setitem) # type: ignore 12 | 13 | 14 | GROUPS_CACHE: NoneSupportedTTLCache = NoneSupportedTTLCache(maxsize=256, ttl=300) 15 | USERS_CACHE: NoneSupportedTTLCache = NoneSupportedTTLCache(maxsize=2048, ttl=300) 16 | -------------------------------------------------------------------------------- /TEx/core/ocr/dummy_ocr_engine.py: -------------------------------------------------------------------------------- 1 | """Dummy OCR Engine.""" 2 | from __future__ import annotations 3 | 4 | from configparser import SectionProxy 5 | from typing import Optional 6 | 7 | from TEx.core.ocr.ocr_engine_base import OcrEngineBase 8 | 9 | 10 | class DummyOcrEngine(OcrEngineBase): 11 | """Dummy OCR Engine.""" 12 | 13 | def __init__(self) -> None: 14 | """Initialize Dummy Engine.""" 15 | super().__init__() 16 | 17 | def configure(self, config: Optional[SectionProxy]) -> None: 18 | """Configure Dummy Engine.""" 19 | 20 | def run(self, file_path: str) -> Optional[str]: 21 | """Do Nothing.""" 22 | return None 23 | -------------------------------------------------------------------------------- /docs/contact.md: -------------------------------------------------------------------------------- 1 | # Contact 2 | 3 | **Th3 0bservator** [https://www.theobservator.net/](https://www.theobservator.net/) 4 | 5 | [![Foo](https://img.shields.io/badge/RSS-FFA500?style=for-the-badge&logo=rss&logoColor=white)](https://www.theobservator.net/) 6 | [![Foo](https://img.shields.io/badge/Twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://twitter.com/th3_0bservator) 7 | [![Foo](https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white)](https://github.com/guibacellar/) 8 | [![Foo](https://img.shields.io/badge/LinkedIn-0077B5?style=for-the-badge&logo=linkedin&logoColor=white)](https://www.linkedin.com/in/guilherme-bacellar/) 9 | -------------------------------------------------------------------------------- /TEx/core/media_download_handling/photo_media_downloader.py: -------------------------------------------------------------------------------- 1 | """Photo Media Downloader.""" 2 | from __future__ import annotations 3 | 4 | import os 5 | from typing import Dict 6 | 7 | from telethon.tl.patched import Message 8 | 9 | 10 | class PhotoMediaDownloader: 11 | """Photo Media Downloader.""" 12 | 13 | @staticmethod 14 | async def download(message: Message, media_metadata: Dict, data_path: str) -> None: 15 | """Download the Media and Update MetadaInfo. 16 | 17 | :param message: 18 | :param media_metadata: 19 | :return: 20 | """ 21 | # Download Media 22 | await message.download_media(os.path.join(data_path, media_metadata['file_name'])) 23 | -------------------------------------------------------------------------------- /TEx/core/base_module.py: -------------------------------------------------------------------------------- 1 | """OSIx Base Module.""" 2 | from __future__ import annotations 3 | 4 | import abc 5 | from configparser import ConfigParser 6 | from typing import Dict 7 | 8 | 9 | class BaseModule: 10 | """Base Module Declaration.""" 11 | 12 | @abc.abstractmethod 13 | async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None: 14 | """ 15 | Abstract Base Run Description. 16 | 17 | :return: None 18 | """ 19 | 20 | @abc.abstractmethod 21 | async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool: 22 | """ 23 | Abstract Method for Module Activation Function. 24 | 25 | :return: 26 | """ 27 | -------------------------------------------------------------------------------- /docs/configuration/proxy.md: -------------------------------------------------------------------------------- 1 | # Proxy 2 | If you need to use a proxy server, you can configure this behavior within the configuration file. If not, just omit this section from your file. 3 | 4 | ```ini 5 | [PROXY] 6 | type=HTTP 7 | address=127.0.0.1 8 | port=3128 9 | username=proxy username 10 | password=proxy password 11 | rdns=true 12 | ``` 13 | 14 | * **type** > Required - Protocol to use (HTTP, SOCKS5 or SOCKS4) 15 | * **address** > Required - Proxy Address 16 | * **port** > Required - Proxy IP Port 17 | * **username** > Optional - Username if the proxy requires auth 18 | * **password** > Optional - Password if the proxy requires auth 19 | * **rdns** > Optional - Whether to use remote or local resolve, default remote 20 | -------------------------------------------------------------------------------- /docs/report/report_status.md: -------------------------------------------------------------------------------- 1 | # Internal Status Report 2 | 3 | Telegram Explorer allow you to generate HTML report containing messages, assets (images, videos, binaries, etc) from groups. Also, you may specify groups, period and message filters to generate a more customized report. 4 | 5 | **Full Command:** 6 | 7 | ```bash 8 | python3 -m TEx stats --config CONFIGURATION_FILE_PATH --report_folder REPORT_FOLDER_PATH --limit_days 3 9 | ``` 10 | 11 | **Parameters** 12 | 13 | * **config** > Required - Created Configuration File Path 14 | * **report_folder** > Required - Defines the Report Files Folder 15 | * **limit_days** > Optional - Number of Days of past to filter the Report 16 | 17 | *Output Example:* 18 | ![report_stats.png](../media/report_stats.png) -------------------------------------------------------------------------------- /docs/finder/finder_catchall.md: -------------------------------------------------------------------------------- 1 | # Message Finder System - Catch All Messages 2 | 3 | **Compatibility:** Message Listener Command 4 | 5 | Telegram Explorer allows to catch all messages and redirect to one or more notifications connectior. 6 | 7 | **Configuration Spec:** 8 | 9 | For each rule to be used, you must set a configuration using the default name schema *FINDER.RULE.* 10 | 11 | **Parameters:** 12 | 13 | * **type** > Required - Fixed Value 'all' 14 | * **notifier** > Required - Name of notifiers to be used to notify the triggered message (comma separated). 15 | 16 | **Changes on Configuration File** 17 | ```ini 18 | [FINDER] 19 | enabled=true 20 | 21 | [FINDER.RULE.CatchAll] 22 | type=all 23 | notifier=NOTIFIER.ELASTIC_SEARCH.GENERAL 24 | ``` -------------------------------------------------------------------------------- /tests/core/ocr/unitest_echo_ocr_engine.py: -------------------------------------------------------------------------------- 1 | """Echo OCR Engine for Unittest only.""" 2 | from configparser import SectionProxy 3 | from typing import Optional 4 | 5 | from TEx.core.ocr.ocr_engine_base import OcrEngineBase 6 | 7 | 8 | class UnitTestEchoOcrEngine(OcrEngineBase): 9 | """Dummy OCR Engine.""" 10 | 11 | def __init__(self, echo_message: Optional[str]) -> None: 12 | """Echo OCR Engine for Unittest only.""" 13 | super().__init__() 14 | self.echo: Optional[str] = echo_message 15 | 16 | def configure(self, config: Optional[SectionProxy]) -> None: 17 | """Configure Dummy Engine.""" 18 | pass 19 | 20 | def run(self, file_path: str) -> Optional[str]: 21 | """Do Nothing.""" 22 | return self.echo 23 | -------------------------------------------------------------------------------- /TEx/__main__.py: -------------------------------------------------------------------------------- 1 | """Main Executor for python -m TEx.""" 2 | 3 | import sys 4 | import os 5 | 6 | # If we are running from a wheel, add the wheel to sys.path 7 | if __package__ == "TEx": 8 | 9 | # __file__ is OSIx/__main__.py 10 | # first dirname call strips of '/__main__.py' 11 | # Resulting path is the name of the wheel itself 12 | # Add that to sys.path so we can import pip 13 | path = os.path.dirname(__file__) 14 | sys.path.insert(0, path) 15 | os.chdir(os.path.dirname(__file__)) 16 | 17 | if __name__ == "__main__": 18 | # Work around the error reported in #9540, pending a proper fix. 19 | # Note: It is essential the warning filter is set *before* importing 20 | # pip, as the deprecation happens at import time, not runtime. 21 | from TEx.runner import TelegramMonitorRunner 22 | sys.exit(TelegramMonitorRunner().main()) 23 | -------------------------------------------------------------------------------- /tests/finder/test_all_messages_finder.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import unittest 3 | from configparser import ConfigParser 4 | 5 | from TEx.finder.all_messages_finder import AllMessagesFinder 6 | 7 | 8 | class AllMessagesFinderTest(unittest.TestCase): 9 | 10 | def setUp(self) -> None: 11 | self.config = ConfigParser() 12 | self.config.read('../../config.ini') 13 | 14 | def test_find_true(self): 15 | """Test the always true return.""" 16 | 17 | target: AllMessagesFinder = AllMessagesFinder(config=self.config) 18 | 19 | loop = asyncio.get_event_loop() 20 | tasks = target.find(raw_text='foo'), target.find(raw_text=None) 21 | 22 | h_result_content, h_result_none = loop.run_until_complete( 23 | asyncio.gather(*tasks) 24 | ) 25 | 26 | self.assertTrue(h_result_content) 27 | self.assertTrue(h_result_none) 28 | 29 | -------------------------------------------------------------------------------- /TEx/models/facade/finder_notification_facade_entity.py: -------------------------------------------------------------------------------- 1 | """Facade Entities for Finder e Notification Engine Modules.""" 2 | from __future__ import annotations 3 | 4 | from datetime import datetime 5 | from typing import Optional 6 | 7 | from pydantic import BaseModel, ConfigDict 8 | 9 | from TEx.models.facade.media_handler_facade_entity import MediaHandlingEntity 10 | 11 | 12 | class FinderNotificationMessageEntity(BaseModel): 13 | """Facade Entity for Finder and Notification.""" 14 | 15 | model_config = ConfigDict(extra='forbid') 16 | 17 | date_time: datetime 18 | raw_text: str 19 | group_name: Optional[str] 20 | group_id: Optional[int] 21 | from_id: Optional[int] 22 | to_id: Optional[int] 23 | reply_to_msg_id: Optional[int] 24 | message_id: Optional[int] 25 | is_reply: Optional[bool] 26 | downloaded_media_info: Optional[MediaHandlingEntity] 27 | found_on: str 28 | -------------------------------------------------------------------------------- /docs/maintenance/purge_old_data.md: -------------------------------------------------------------------------------- 1 | # Maintenance - Purge Old Data 2 | 3 | As any system or application that uses a database to store information, Telegram Explorer needs, eventually, database maintenance to ensure proper work and remove old data. 4 | 5 | Our maintenance command purge all old messages and media from database and filesystem. 6 | n messages. 7 | 8 | > NOTE: While other commands can be executed side-by-side, or, simultaneously, the 'purge_old_data' command needs to be executed alone, so, stop all TeX instances that uses the same configuration file, specially the 'listen' command before perform the maintenance. 9 | 10 | **Full Command:** 11 | 12 | ```bash 13 | python3 -m TEx purge_old_data --config CONFIGURATION_FILE_PATH --limit_days 30 14 | ``` 15 | **Parameters** 16 | 17 | * **config** > Required - Created Configuration File Path 18 | * **limit_days** > Optional - Number of Days of past to remove the messages and files. 19 | -------------------------------------------------------------------------------- /TEx/database/db_initializer.py: -------------------------------------------------------------------------------- 1 | """TEx Database Initializer.""" 2 | from TEx.database.db_manager import DbManager 3 | from TEx.database.db_migration import DatabaseMigrator 4 | from TEx.models.database.telegram_db_model import TelegramDataBaseDeclarativeBase 5 | from TEx.models.database.temp_db_models import TempDataBaseDeclarativeBase 6 | 7 | 8 | class DbInitializer: 9 | """Central Database Initializer.""" 10 | 11 | @staticmethod 12 | def init(data_path: str) -> None: 13 | """Initialize DB and Structure.""" 14 | # Initialize Main DB 15 | DbManager.init_db(data_path=data_path) 16 | 17 | # Initialize Main DB 18 | TempDataBaseDeclarativeBase.metadata.create_all(DbManager.SQLALCHEMY_BINDS['temp'], checkfirst=True) 19 | TelegramDataBaseDeclarativeBase.metadata.create_all(DbManager.SQLALCHEMY_BINDS['data'], checkfirst=True) 20 | 21 | # Migrations 22 | DatabaseMigrator.apply_migrations() 23 | -------------------------------------------------------------------------------- /TEx/modules/temp_file_manager.py: -------------------------------------------------------------------------------- 1 | """Temporary Files Manager.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | from configparser import ConfigParser 6 | from typing import Dict 7 | 8 | from TEx.core.base_module import BaseModule 9 | from TEx.core.temp_file import TempFileHandler 10 | 11 | logger = logging.getLogger('TelegramExplorer') 12 | 13 | 14 | class TempFileManager(BaseModule): 15 | """Temporary File Manager.""" 16 | 17 | async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool: 18 | """ 19 | Abstract Method for Module Activation Function. 20 | 21 | :return: 22 | """ 23 | return True 24 | 25 | async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None: 26 | """Execute Module.""" 27 | if args['purge_temp_files']: 28 | TempFileHandler.purge() 29 | 30 | else: 31 | TempFileHandler.remove_expired_entries() 32 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | plugins = sqlalchemy.ext.mypy.plugin, pydantic.mypy 3 | 4 | ignore_missing_imports = True 5 | 6 | check_untyped_defs = True 7 | 8 | disallow_any_explicit = True 9 | disallow_any_unimported = False 10 | disallow_any_expr = False 11 | disallow_any_decorated = False 12 | disallow_any_generics = False 13 | disallow_subclassing_any = True 14 | disallow_untyped_calls = True 15 | disallow_untyped_defs = True 16 | disallow_incomplete_defs = True 17 | disallow_untyped_decorators = False 18 | 19 | warn_redundant_casts = True 20 | warn_unused_ignores = True 21 | warn_no_return = True 22 | warn_return_any = True 23 | warn_unreachable = True 24 | 25 | namespace_packages = True 26 | 27 | follow_imports = normal 28 | 29 | files = TEx/**/*.py 30 | 31 | [pydantic-mypy] 32 | init_forbid_extra = True 33 | init_typed = True 34 | warn_required_dynamic_aliases = True 35 | -------------------------------------------------------------------------------- /TEx/core/media_metadata_handling/geo_handler.py: -------------------------------------------------------------------------------- 1 | """Geo Media Handler.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict, Optional 5 | 6 | from telethon.tl.patched import Message 7 | from telethon.tl.types import MessageMediaGeo 8 | 9 | 10 | class GeoMediaHandler: 11 | """Geo Media Handler.""" 12 | 13 | @staticmethod 14 | def handle_metadata(message: Message) -> Optional[Dict]: 15 | """Handle Media Metadata.""" 16 | # Get Media 17 | geo: MessageMediaGeo = message.geo 18 | 19 | # Create Data Dict 20 | return { 21 | 'file_name': 'geo.bin', 22 | 23 | 'telegram_id': None, 24 | 'extension': None, 25 | 'height': None, 26 | 'width': None, 27 | 'date_time': None, 28 | 'mime_type': 'application/vnd.geo', 29 | 'size_bytes': None, 30 | 'title': f'{geo.lat}|{geo.long}', 31 | 'name': None, 32 | } 33 | -------------------------------------------------------------------------------- /TEx/core/media_metadata_handling/photo_handler.py: -------------------------------------------------------------------------------- 1 | """Photo Media Handler.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict, Optional 5 | 6 | from telethon.tl.patched import Message 7 | from telethon.tl.types import MessageMediaPhoto 8 | 9 | 10 | class PhotoMediaHandler: 11 | """Photo Media Handler.""" 12 | 13 | @staticmethod 14 | def handle_metadata(message: Message) -> Optional[Dict]: 15 | """Handle Media Metadata.""" 16 | media: MessageMediaPhoto = message.media 17 | 18 | return { 19 | 'file_name': f'photo{message.file.ext}', 20 | 'telegram_id': media.photo.id, 21 | 'extension': message.file.ext, 22 | 'height': message.file.height, 23 | 'width': message.file.width, 24 | 'date_time': media.photo.date, 25 | 'mime_type': message.file.mime_type, 26 | 'size_bytes': message.file.size, 27 | 'title': None, 28 | 'name': None, 29 | } 30 | -------------------------------------------------------------------------------- /TEx/core/media_metadata_handling/pdf_handler.py: -------------------------------------------------------------------------------- 1 | """PDF Media Handler.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict, Optional 5 | 6 | from telethon.tl.patched import Message 7 | from telethon.tl.types import DocumentAttributeFilename, MessageMediaPhoto 8 | 9 | 10 | class PdfMediaHandler: 11 | """Photo Media Handler.""" 12 | 13 | @staticmethod 14 | def handle_metadata(message: Message) -> Optional[Dict]: 15 | """Handle Media Metadata.""" 16 | media: MessageMediaPhoto = message.media 17 | 18 | return { 19 | 'file_name': [item for item in media.document.attributes if isinstance(item, DocumentAttributeFilename)][0].file_name, 20 | 'telegram_id': media.document.id, 21 | 'extension': None, 22 | 'height': None, 23 | 'width': None, 24 | 'date_time': media.document.date, 25 | 'mime_type': media.document.mime_type, 26 | 'size_bytes': media.document.size, 27 | 'title': None, 28 | 'name': None, 29 | } 30 | -------------------------------------------------------------------------------- /TEx/finder/regex_finder.py: -------------------------------------------------------------------------------- 1 | """Regex Finder.""" 2 | from __future__ import annotations 3 | 4 | import re 5 | from configparser import SectionProxy 6 | from typing import List 7 | 8 | from TEx.finder.base_finder import BaseFinder 9 | 10 | 11 | class RegexFinder(BaseFinder): 12 | """Regex Based Finder.""" 13 | 14 | def __init__(self, config: SectionProxy) -> None: 15 | """Initialize RegEx Finder.""" 16 | raw_regex_content: str = config['regex'] 17 | regex_conf_list: List[str] = [ 18 | item for item in raw_regex_content.split('\n') if item and item != '' 19 | ] if '\n' in raw_regex_content else [raw_regex_content] 20 | 21 | self.regex_patterns: List[re.Pattern] = [ 22 | re.compile(regex_conf, flags=re.IGNORECASE | re.MULTILINE) for regex_conf in regex_conf_list 23 | ] 24 | 25 | async def find(self, raw_text: str) -> bool: 26 | """Apply Find Logic.""" 27 | if not raw_text or len(raw_text) == 0: 28 | return False 29 | 30 | return any(len(pattern.findall(raw_text)) > 0 for pattern in self.regex_patterns) 31 | -------------------------------------------------------------------------------- /TEx/logging.conf: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root,sqlalchemy,TelegramExplorer,elasticsearch,elastic_transport.transport 3 | 4 | ####################### 5 | 6 | [handlers] 7 | keys=consoleHandler 8 | 9 | ####################### 10 | 11 | [formatters] 12 | keys=simpleFormatter 13 | 14 | ####################### 15 | 16 | [logger_root] 17 | level=INFO 18 | handlers=consoleHandler 19 | 20 | [logger_elasticsearch] 21 | level=ERROR 22 | handlers=consoleHandler 23 | qualname=elasticsearch 24 | 25 | [logger_elastic_transport.transport] 26 | level=ERROR 27 | handlers=consoleHandler 28 | qualname=elastic_transport.transport 29 | 30 | [logger_TelegramExplorer] 31 | level=INFO 32 | handlers=consoleHandler 33 | qualname=TelegramExplorer 34 | propagate=0 35 | 36 | [logger_sqlalchemy] 37 | level=ERROR 38 | handlers=consoleHandler 39 | qualname='' 40 | 41 | ####################### 42 | 43 | [handler_consoleHandler] 44 | class=StreamHandler 45 | level=INFO 46 | formatter=simpleFormatter 47 | args=(sys.stdout,) 48 | 49 | 50 | ####################### 51 | 52 | [formatter_simpleFormatter] 53 | format= %(asctime)s - %(levelname)s - %(message)s 54 | -------------------------------------------------------------------------------- /TEx/core/mapper/telethon_user_mapper.py: -------------------------------------------------------------------------------- 1 | """Telethon User Entity Mapper.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict 5 | 6 | from telethon.tl.types import User 7 | 8 | 9 | class TelethonUserEntiyMapper: 10 | """Telethon User Entity Mapper.""" 11 | 12 | @staticmethod 13 | def to_database_dict(member: User) -> Dict: 14 | """Map Telethon User to TeX Dict to Insert on DB.""" 15 | # Build Model 16 | value: Dict = { 17 | 'id': member.id, 18 | 'is_bot': member.bot, 19 | 'is_fake': member.fake, 20 | 'is_self': member.is_self, 21 | 'is_scam': member.scam, 22 | 'is_verified': member.verified, 23 | 'first_name': member.first_name, 24 | 'last_name': member.last_name, 25 | 'username': member.username, 26 | 'phone_number': member.phone, 27 | 'photo_id': None, # Reserved for Future Version 28 | 'photo_base64': None, # Reserved for Future Version 29 | 'photo_name': None, # Reserved for Future Version 30 | } 31 | 32 | return value 33 | -------------------------------------------------------------------------------- /TEx/core/media_metadata_handling/text_handler.py: -------------------------------------------------------------------------------- 1 | """Plain Text Media Handler.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict, Optional 5 | 6 | from telethon.tl.patched import Message 7 | from telethon.tl.types import DocumentAttributeFilename, MessageMediaDocument 8 | 9 | 10 | class TextPlainHandler: 11 | """Plain Text Media Handler - text/plain.""" 12 | 13 | @staticmethod 14 | def handle_metadata(message: Message) -> Optional[Dict]: 15 | """Handle Media Metadata.""" 16 | media: MessageMediaDocument = message.media 17 | return { 18 | 'file_name': 19 | [item for item in message.media.document.attributes if isinstance(item, DocumentAttributeFilename)][ 20 | 0].file_name, 21 | 'telegram_id': media.document.id, 22 | 'extension': None, 23 | 'height': None, 24 | 'width': None, 25 | 'date_time': media.document.date, 26 | 'mime_type': media.document.mime_type, 27 | 'size_bytes': media.document.size, 28 | 'title': None, 29 | 'name': None, 30 | } 31 | -------------------------------------------------------------------------------- /TEx/modules/execution_configuration_handler.py: -------------------------------------------------------------------------------- 1 | """Execution Configuration Loader.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | import os.path 6 | from configparser import ConfigParser 7 | from typing import Dict 8 | 9 | from TEx.core.base_module import BaseModule 10 | 11 | logger = logging.getLogger('TelegramExplorer') 12 | 13 | 14 | class ExecutionConfigurationHandler(BaseModule): 15 | """Module That Handle the Input Arguments.""" 16 | 17 | async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool: 18 | """ 19 | Abstract Method for Module Activation Function. 20 | 21 | :return: 22 | """ 23 | return True 24 | 25 | async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None: 26 | """Load Configuration for Execution.""" 27 | logger.info('[*] Loading Execution Configurations:') 28 | 29 | if not os.path.exists(args['config']): 30 | logger.fatal(f'[?] CONFIGURATION FILE NOT FOUND AT \"{args["config"]}\"') 31 | data['internals']['panic'] = True 32 | return 33 | 34 | config.read(args['config']) 35 | -------------------------------------------------------------------------------- /TEx/core/media_metadata_handling/generic_binary_handler.py: -------------------------------------------------------------------------------- 1 | """Generic Binary Media Handler.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict, List, Optional 5 | 6 | from telethon.tl.patched import Message 7 | from telethon.tl.types import DocumentAttributeFilename, MessageMediaDocument 8 | 9 | 10 | class GenericBinaryMediaHandler: 11 | """Generic Binary Media Handler.""" 12 | 13 | @staticmethod 14 | def handle_metadata(message: Message) -> Optional[Dict]: 15 | """Handle Media Metadata.""" 16 | media: MessageMediaDocument = message.media 17 | fn_attr: List = [item for item in media.document.attributes if isinstance(item, DocumentAttributeFilename)] 18 | 19 | return { 20 | 'file_name': fn_attr[0].file_name if len(fn_attr) > 0 else 'unknow.bin', 21 | 'telegram_id': media.document.id, 22 | 'extension': None, 23 | 'height': None, 24 | 'width': None, 25 | 'date_time': media.document.date, 26 | 'mime_type': media.document.mime_type, 27 | 'size_bytes': media.document.size, 28 | 'title': None, 29 | 'name': None, 30 | } 31 | -------------------------------------------------------------------------------- /TEx/modules/database_handler.py: -------------------------------------------------------------------------------- 1 | """Database Handler.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | import os 6 | from configparser import ConfigParser 7 | from typing import Dict 8 | 9 | from TEx.core.base_module import BaseModule 10 | from TEx.core.temp_file import TempFileHandler 11 | from TEx.database.db_initializer import DbInitializer 12 | 13 | logger = logging.getLogger('TelegramExplorer') 14 | 15 | 16 | class DatabaseHandler(BaseModule): 17 | """Module That Handle the Internal DB.""" 18 | 19 | async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool: 20 | """ 21 | Abstract Method for Module Activation Function. 22 | 23 | :return: 24 | """ 25 | return True 26 | 27 | async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None: 28 | """Execute.""" 29 | if not os.path.exists(config['CONFIGURATION']['data_path']): 30 | os.mkdir(config['CONFIGURATION']['data_path']) 31 | 32 | # Initialize DB 33 | DbInitializer.init(config['CONFIGURATION']['data_path']) 34 | 35 | # Expire Temp Files 36 | TempFileHandler.remove_expired_entries() 37 | -------------------------------------------------------------------------------- /docs/configuration/media_download_configuration.md: -------------------------------------------------------------------------------- 1 | # Media Download - Configuration 2 | 3 | You can customize (fully enabled, disable or selective enable) media download, just specify these settings on configuration file. 4 | 5 | **Enable / Disable Default Media Download Behaviour** 6 | ```ini 7 | [MEDIA.DOWNLOAD] 8 | default=ALLOW 9 | max_download_size_bytes=256000000 10 | ``` 11 | 12 | * **default** > Required - Set the default behaviour. Enable (ALLOW) of Disable (DISALLOW) 13 | * **max_download_size_bytes** > Optional - Max download size for all medias in bytes 14 | * Default: 256000000 15 | 16 | **Per Media Setting** 17 | Use *MEDIA.DOWNLOAD.* to specify the settings for each individual content-type. 18 | ```ini 19 | [MEDIA.DOWNLOAD.] 20 | enabled=ALLOW 21 | max_download_size_bytes=256000000 22 | groups=* 23 | ``` 24 | 25 | * **enabled** > Required - Enable/Disable this Content-Type download. Enable (ALLOW) of Disable (DISALLOW) 26 | * **max_download_size_bytes** > Optional - Max download size for this Content-Type 27 | * Default: 256000000 28 | * **groups** > Optional - If present, Download the Messages only from Specified Groups ID's. Comma Separated. For All Groups, use * 29 | * Default: * 30 | -------------------------------------------------------------------------------- /TEx/models/database/temp_db_models.py: -------------------------------------------------------------------------------- 1 | """Temporary Data Model.""" 2 | from __future__ import annotations 3 | 4 | from typing import Optional 5 | 6 | from sqlalchemy import Integer, String 7 | from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column 8 | 9 | 10 | class TempDataBaseDeclarativeBase(DeclarativeBase): 11 | """Global Temporary Declarative Base.""" 12 | 13 | 14 | class TempDataOrmEntity(TempDataBaseDeclarativeBase): 15 | """Temporary Data ORM Model.""" 16 | 17 | __bind_key__ = 'temp' 18 | __tablename__ = 'temporary_data' 19 | 20 | path: Mapped[str] = mapped_column(String(255), primary_key=True) 21 | module: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) 22 | data: Mapped[str] = mapped_column(String) 23 | created_at: Mapped[Integer] = mapped_column(Integer) 24 | valid_at: Mapped[Integer] = mapped_column(Integer) 25 | 26 | 27 | class StateFileOrmEntity(TempDataBaseDeclarativeBase): 28 | """Temporary Data ORM Model.""" 29 | 30 | __bind_key__ = 'temp' 31 | __tablename__ = 'state_file' 32 | 33 | path: Mapped[str] = mapped_column(String(255), primary_key=True) 34 | data: Mapped[str] = mapped_column(String) 35 | created_at: Mapped[Integer] = mapped_column(Integer) 36 | -------------------------------------------------------------------------------- /TEx/models/facade/media_handler_facade_entity.py: -------------------------------------------------------------------------------- 1 | """Facade Entities for Media Handling.""" 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class MediaHandlingEntity(BaseModel): 7 | """Facade Entities for Media Handling.""" 8 | 9 | media_id: int 10 | file_name: str 11 | content_type: str 12 | size_bytes: int 13 | disk_file_path: str 14 | is_ocr_supported: bool 15 | 16 | def is_image(self) -> bool: 17 | """Return if Downloaded Image are an Image.""" 18 | return self.content_type in ['image/gif', 'image/jpeg', 'image/png', 'image/webp', 'application/gif'] 19 | 20 | def is_video(self) -> bool: 21 | """Return if Downloaded Image are a Video.""" 22 | return self.content_type in ['application/ogg', 'video/mp4', 'video/quicktime', 'video/webm'] 23 | 24 | def allow_search_in_text_file(self) -> bool: 25 | """Return if Allow to Find in the Text File.""" 26 | return self.content_type in [ 27 | 'application/atom+xml', 28 | 'application/bittorrent', 29 | 'application/csv', 30 | 'application/html', 31 | 'application/json', 32 | 'application/ld+json', 33 | 'text/csv', 34 | 'text/html', 35 | 'text/plain', 36 | 'text/xml', 37 | ] 38 | -------------------------------------------------------------------------------- /docs/report/report_html.md: -------------------------------------------------------------------------------- 1 | # Generate Report - HTML 2 | 3 | Telegram Explorer exports a internal status report containing statistics about message and users count for each group, also a media info with size and content-type. 4 | 5 | **Full Command:** 6 | 7 | ```bash 8 | python3 -m TEx export_text --config CONFIGURATION_FILE_PATH --order_desc --limit_days 3 --regex REGEX --report_folder REPORT_FOLDER_PATH --group_id 12547,1256698 9 | ``` 10 | 11 | **Basic Command:** 12 | 13 | ```bash 14 | python3 -m TEx export_text --config CONFIGURATION_FILE_PATH --limit_days 3 --regex REGEX --report_folder REPORT_FOLDER_PATH 15 | ``` 16 | **Parameters** 17 | 18 | * **config** > Required - Created Configuration File Path 19 | * **report_folder** > Required - Defines the Report Files Folder 20 | * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's 21 | * **limit_days** > Optional - Number of Days of past to filter the Messages 22 | * **regex** > Required - Regex to find the messages. 23 | * Ex: Export Links from Messages (.\*http://.\*),(.\*https://.\*) 24 | 25 | *Output Example Using "*(.\*http://.\*),(.\*https://.\*)*" Regular Expression:* 26 | 27 | *Report Folder* 28 | ![text_report_files.png](../media/text_report_files.png) 29 | 30 | *File Content* 31 | ![text_report_content.png](../media/text_report_content.png) -------------------------------------------------------------------------------- /TEx/core/media_metadata_handling/sticker_handler.py: -------------------------------------------------------------------------------- 1 | """Sticker Media Handler.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict, List, Optional 5 | 6 | from telethon.tl.patched import Message 7 | from telethon.tl.types import DocumentAttributeFilename, DocumentAttributeImageSize, MessageMediaDocument 8 | 9 | 10 | class MediaStickerHandler: 11 | """Sticker Media Handler - application/x-tgsticker.""" 12 | 13 | @staticmethod 14 | def handle_metadata(message: Message) -> Optional[Dict]: 15 | """Handle Media Metadata.""" 16 | media: MessageMediaDocument = message.media 17 | fn_attr_img: List = [item for item in media.document.attributes if isinstance(item, DocumentAttributeImageSize)] 18 | 19 | return { 20 | 'file_name': [item for item in message.media.document.attributes if isinstance(item, DocumentAttributeFilename)][0].file_name, 21 | 'telegram_id': media.document.id, 22 | 'extension': None, 23 | 'height': fn_attr_img[0].h if len(fn_attr_img) > 0 else None, 24 | 'width': fn_attr_img[0].w if len(fn_attr_img) > 0 else None, 25 | 'date_time': media.document.date, 26 | 'mime_type': media.document.mime_type, 27 | 'size_bytes': media.document.size, 28 | 'title': None, 29 | 'name': None, 30 | } 31 | -------------------------------------------------------------------------------- /TEx/core/media_metadata_handling/mp4_handler.py: -------------------------------------------------------------------------------- 1 | """MP4 Media Handler.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict, List, Optional 5 | 6 | from telethon.tl.patched import Message 7 | from telethon.tl.types import DocumentAttributeFilename, DocumentAttributeVideo, MessageMediaDocument 8 | 9 | 10 | class MediaMp4Handler: 11 | """MP4 Media Handler - video/mp4.""" 12 | 13 | @staticmethod 14 | def handle_metadata(message: Message) -> Optional[Dict]: 15 | """Handle Media Metadata.""" 16 | media: MessageMediaDocument = message.media 17 | fn_attr: List = [item for item in media.document.attributes if isinstance(item, DocumentAttributeFilename)] 18 | fn_attr_vid: List = [item for item in media.document.attributes if isinstance(item, DocumentAttributeVideo)] 19 | 20 | return { 21 | 'file_name': fn_attr[0].file_name if len(fn_attr) > 0 else 'unknow.mp4', 22 | 'telegram_id': media.document.id, 23 | 'extension': None, 24 | 'height': fn_attr_vid[0].h if len(fn_attr_vid) > 0 else None, 25 | 'width': fn_attr_vid[0].w if len(fn_attr_vid) > 0 else None, 26 | 'date_time': media.document.date, 27 | 'mime_type': media.document.mime_type, 28 | 'size_bytes': media.document.size, 29 | 'title': None, 30 | 'name': None, 31 | } 32 | -------------------------------------------------------------------------------- /docs/report/report_text.md: -------------------------------------------------------------------------------- 1 | # Generate Report - Text 2 | 3 | Telegram Explorer allow you to export Text content based on Regular Expression Extractors. 4 | 5 | This way, you can generate simple outputs containing any type of information present on messages. 6 | 7 | **Full Command:** 8 | 9 | ```bash 10 | python3 -m TEx export_text --config CONFIGURATION_FILE_PATH --order_desc --limit_days 3 --regex REGEX_CAPTURE_GROUP --report_folder REPORT_FOLDER_PATH --group_id 12547,1256698 11 | ``` 12 | 13 | **Basic Command:** 14 | 15 | ```bash 16 | python3 -m TEx export_text --config CONFIGURATION_FILE_PATH --limit_days 3 --regex REGEX_CAPTURE_GROUP --report_folder REPORT_FOLDER_PATH 17 | ``` 18 | **Parameters** 19 | 20 | * **config** > Required - Created Configuration File Path 21 | * **report_folder** > Required - Defines the Report Files Folder 22 | * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's 23 | * **limit_days** > Optional - Number of Days of past to filter the Messages 24 | * **regex** > Required - Regex Capture Group to find the messages. 25 | * Ex: Export Links from Messages (http[s]?:\/\/[^\"\',]*) 26 | 27 | *Output Example Using "(http[s]?:\/\/[^\"\',]*)" Regular Expression:* 28 | 29 | *Report Folder* 30 | ![text_report_files.png](../media/text_report_files.png) 31 | 32 | *File Content* 33 | ![text_report_content.png](../media/text_report_content.png) -------------------------------------------------------------------------------- /.github/workflows/cy.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | 6 | paths-ignore: 7 | - '**/*.md' 8 | - '**/docs/*.*' 9 | - '**/.github/workflows/deploy.yml' 10 | - '**/.github/workflows/publish.yml' 11 | 12 | pull_request: 13 | branches: 14 | - V*-dev 15 | - main 16 | 17 | paths-ignore: 18 | - '**/*.md' 19 | - '**/docs/*.md' 20 | 21 | jobs: 22 | CodeQuality: 23 | runs-on: ubuntu-latest 24 | 25 | steps: 26 | - uses: actions/checkout@v2 27 | - name: Set up Python 3.8 28 | uses: actions/setup-python@v2 29 | with: 30 | python-version: 3.8 31 | 32 | - name: Install dependencies 33 | run: | 34 | python -m pip install --upgrade pip 35 | python -m pip install -r requirements.txt 36 | 37 | - name: Run Code Quality 38 | run: | 39 | tox -e quality 40 | 41 | TestsAndCodeCoverage: 42 | runs-on: ubuntu-latest 43 | needs: CodeQuality 44 | 45 | steps: 46 | - uses: actions/checkout@v2 47 | - name: Set up Python 3.8 48 | uses: actions/setup-python@v2 49 | with: 50 | python-version: 3.8 51 | 52 | - name: Install dependencies 53 | run: | 54 | python -m pip install --upgrade pip 55 | python -m pip install -r requirements.txt 56 | 57 | - name: Run Unittests and Code Coverage 58 | run: | 59 | tox -e coverage 60 | -------------------------------------------------------------------------------- /TEx/modules/data_structure_handler.py: -------------------------------------------------------------------------------- 1 | """Database Handler.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | import os 6 | from configparser import ConfigParser 7 | from typing import Dict 8 | 9 | from TEx.core.base_module import BaseModule 10 | from TEx.core.dir_manager import DirectoryManagerUtils 11 | 12 | logger = logging.getLogger('TelegramExplorer') 13 | 14 | 15 | class DataStructureHandler(BaseModule): 16 | """Handle the Basic Directory Structure.""" 17 | 18 | async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool: 19 | """ 20 | Abstract Method for Module Activation Function. 21 | 22 | :return: 23 | """ 24 | return 'data_path' in args 25 | 26 | async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None: 27 | """Execute.""" 28 | if not await self.can_activate(config, args, data): 29 | return 30 | 31 | DirectoryManagerUtils.ensure_dir_struct(os.path.join(args['data_path'], 'export')) 32 | DirectoryManagerUtils.ensure_dir_struct(os.path.join(args['data_path'], 'download')) 33 | DirectoryManagerUtils.ensure_dir_struct(os.path.join(args['data_path'], 'profile_pic')) 34 | DirectoryManagerUtils.ensure_dir_struct(os.path.join(args['data_path'], 'media')) 35 | DirectoryManagerUtils.ensure_dir_struct(os.path.join(args['data_path'], 'session')) 36 | -------------------------------------------------------------------------------- /TEx/core/media_download_handling/std_media_downloader.py: -------------------------------------------------------------------------------- 1 | """Standard Media Downloader.""" 2 | from __future__ import annotations 3 | 4 | import os 5 | from typing import Dict, List 6 | 7 | from telethon.tl.patched import Message 8 | 9 | 10 | class StandardMediaDownloader: 11 | """Standard Media Downloader.""" 12 | 13 | @staticmethod 14 | async def download(message: Message, media_metadata: Dict, data_path: str) -> None: 15 | """Download the Media and Update MetadaInfo. 16 | 17 | :param message: 18 | :param media_metadata: 19 | :return: 20 | """ 21 | if not media_metadata: 22 | return 23 | 24 | # Download Media 25 | target_path: str = os.path.join(data_path, StandardMediaDownloader.__sanitize_media_filename(media_metadata['file_name'])) 26 | generated_path: str = await message.download_media(target_path) 27 | media_metadata['extension'] = os.path.splitext(generated_path)[1] 28 | 29 | @staticmethod 30 | def __sanitize_media_filename(filename: str) -> str: 31 | """Sanitize Media Filename.""" 32 | sanit_charts: List[str] = [char for char in filename if not char.isalpha() and char != ' ' and not char.isalnum() and char != '.' and char != '-'] 33 | h_result: str = filename 34 | 35 | for sanit_item in sanit_charts: 36 | h_result = h_result.replace(sanit_item, '_') 37 | 38 | return h_result 39 | -------------------------------------------------------------------------------- /docs/report/report_export_files.md: -------------------------------------------------------------------------------- 1 | # Export Files 2 | 3 | Telegram Explorer also allow you to export all downloaded files from all groups. It is important to highlight that the export process automatically prevents duplicate export of files based on their md5 hash signature. 4 | 5 | This feature is especially usefully for malware analysis and video content analysis. 6 | 7 | **Full Command:** 8 | 9 | ```bash 10 | python3 -m TEx export_file --config CONFIGURATION_FILE_PATH -report_folder REPORT_FOLDER_PATH --group_id * --filter * --limit_days 3 --mime_type text/plain 11 | ``` 12 | 13 | **Basic Command:** 14 | ```bash 15 | python3 -m TEx export_file --config CONFIGURATION_FILE_PATH -report_folder REPORT_FOLDER_PATH --group_id * --limit_days 3 --mime_type text/plain 16 | ``` 17 | 18 | **Parameters** 19 | 20 | * **config** > Required - Created Configuration File Path 21 | * **report_folder** > Required - Defines the Report Files Folder 22 | * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's 23 | * **filter** > Optional - Simple (Comma Separated) FileName String Terms Filter. Ex: malware, "Bot net" 24 | * **limit_days** > Optional - Number of Days of past to filter the Messages 25 | * **mime_type** > Optional - File MIME Type. Ex: application/vnd.android.package-archive 26 | 27 | *Output Example Using "application/vnd.android.package-archive" as mime_type* 28 | 29 | ![export_files_list.png](../media/export_files_list.png) 30 | -------------------------------------------------------------------------------- /TEx/core/media_metadata_handling/webimage_handler.py: -------------------------------------------------------------------------------- 1 | """Web Image Media Handler.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict, List, Optional 5 | 6 | from telethon.tl.patched import Message 7 | from telethon.tl.types import DocumentAttributeFilename, DocumentAttributeImageSize, MessageMediaDocument 8 | 9 | 10 | class WebImageStickerHandler: 11 | """Web Image Media Handler - image/webp.""" 12 | 13 | @staticmethod 14 | def handle_metadata(message: Message) -> Optional[Dict]: 15 | """Handle Media Metadata.""" 16 | media: MessageMediaDocument = message.media 17 | 18 | fn_attr: List = [item for item in message.media.document.attributes if isinstance(item, DocumentAttributeFilename)] 19 | 20 | if not fn_attr or len(fn_attr) == 0: 21 | return None 22 | 23 | return { 24 | 'file_name': fn_attr[0].file_name, 25 | 'telegram_id': media.document.id, 26 | 'extension': None, 27 | 'height': [item for item in message.media.document.attributes if isinstance(item, DocumentAttributeImageSize)][0].h, 28 | 'width': [item for item in message.media.document.attributes if isinstance(item, DocumentAttributeImageSize)][0].w, 29 | 'date_time': media.document.date, 30 | 'mime_type': media.document.mime_type, 31 | 'size_bytes': media.document.size, 32 | 'title': None, 33 | 'name': None, 34 | } 35 | -------------------------------------------------------------------------------- /tests/config.ini: -------------------------------------------------------------------------------- 1 | [PIPELINE] 2 | pre_pipeline_sequence = input_args_handler.InputArgsHandler 3 | data_structure_handler.DataStructureHandler 4 | database_handler.DatabaseHandler 5 | temp_file_manager.TempFileManager 6 | state_file_handler.LoadStateFileHandler 7 | 8 | pipeline_sequence = telegram_connection_manager.TelegramConnector 9 | 10 | telegram_groups_scrapper.TelegramGroupScrapper 11 | telegram_groups_list.TelegramGroupList 12 | telegram_messages_scrapper.TelegramGroupMessageScrapper 13 | telegram_report_generator.telegram_report_sent_telegram.TelegramReportSentViaTelegram 14 | 15 | telegram_connection_manager.TelegramDisconnector 16 | 17 | telegram_report_generator.telegram_html_report_generator.TelegramReportGenerator 18 | telegram_report_generator.telegram_export_text_generator.TelegramExportTextGenerator 19 | telegram_report_generator.telegram_export_file_generator.TelegramExportFileGenerator 20 | 21 | 22 | post_pipeline_sequence = state_file_handler.SaveStateFileHandler 23 | 24 | ########## Modules Config ########## 25 | 26 | [MODULE_LoadStateFileHandler] 27 | file_name = state/{0}.json 28 | 29 | [MODULE_SaveStateFileHandler] 30 | file_name = state/{0}.json 31 | 32 | [Telegram] 33 | -------------------------------------------------------------------------------- /TEx/models/facade/telegram_message_report_facade_entity.py: -------------------------------------------------------------------------------- 1 | """Facade Entity for Report Generation.""" 2 | from __future__ import annotations 3 | 4 | import datetime 5 | from typing import Optional 6 | 7 | from TEx.models.database.telegram_db_model import TelegramMessageOrmEntity 8 | 9 | 10 | class TelegramMessageReportFacadeEntity: 11 | """Facade Entity for Report Generation.""" 12 | 13 | id: int 14 | group_id: int 15 | media_id: Optional[int] 16 | 17 | date_time: datetime.datetime 18 | message: str 19 | raw: str 20 | 21 | from_id: Optional[int] 22 | from_type: Optional[str] 23 | to_id: Optional[int] 24 | 25 | meta_next: bool 26 | meta_previous: bool 27 | 28 | 29 | class TelegramMessageReportFacadeEntityMapper: 30 | """Mapper for TelegramMessageReportFacadeEntity.""" 31 | 32 | @staticmethod 33 | def create_from_dbentity(source: TelegramMessageOrmEntity) -> TelegramMessageReportFacadeEntity: 34 | """Map TelegramMessageOrmEntity to TelegramMessageReportFacadeEntity.""" 35 | h_result: TelegramMessageReportFacadeEntity = TelegramMessageReportFacadeEntity() 36 | 37 | h_result.id = source.id 38 | h_result.group_id = source.group_id 39 | h_result.media_id = source.media_id 40 | h_result.date_time = source.date_time 41 | h_result.message = source.message 42 | h_result.raw = source.raw 43 | h_result.from_id = source.from_id 44 | h_result.from_type = source.from_type 45 | h_result.to_id = source.to_id 46 | 47 | return h_result 48 | -------------------------------------------------------------------------------- /tests/modules/test_state_file_handler.py: -------------------------------------------------------------------------------- 1 | """State File Handler Tests.""" 2 | import asyncio 3 | import unittest 4 | from configparser import ConfigParser 5 | from typing import Dict 6 | 7 | from TEx.modules.state_file_handler import LoadStateFileHandler, SaveStateFileHandler 8 | from tests.modules.common import TestsCommon 9 | 10 | 11 | class StateFileHandlerTest(unittest.TestCase): 12 | 13 | def setUp(self) -> None: 14 | 15 | self.config = ConfigParser() 16 | self.config.read('config.ini') 17 | 18 | TestsCommon.basic_test_setup() 19 | 20 | def test_run(self): 21 | 22 | target_load: LoadStateFileHandler = LoadStateFileHandler() 23 | target_save: SaveStateFileHandler = SaveStateFileHandler() 24 | args: Dict = {'config': 'unittest_configfile.config'} 25 | save_data: Dict = {'demo': 1, 'internals': {'panic': False}} 26 | 27 | TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=save_data) 28 | 29 | loop = asyncio.get_event_loop() 30 | 31 | loop.run_until_complete( 32 | target_save.run( 33 | config=self.config, 34 | args=args, 35 | data=save_data 36 | ) 37 | ) 38 | 39 | load_data: Dict = {} 40 | loop.run_until_complete( 41 | target_load.run( 42 | config=self.config, 43 | args=args, 44 | data=load_data 45 | ) 46 | ) 47 | 48 | self.assertEqual(load_data, save_data) 49 | 50 | -------------------------------------------------------------------------------- /TEx/core/mapper/keep_alive_entity_mapper.py: -------------------------------------------------------------------------------- 1 | """Signal Entity Mapper.""" 2 | from __future__ import annotations 3 | 4 | from configparser import SectionProxy 5 | from typing import Optional 6 | 7 | from TEx.models.facade.signal_entity_model import SignalEntity 8 | 9 | 10 | class SignalEntityMapper: 11 | """Signal Entity Mapper.""" 12 | 13 | @staticmethod 14 | def to_entity(section_proxy: Optional[SectionProxy]) -> SignalEntity: 15 | """Map the Configuration KEEP_ALIVE to Entity.""" 16 | # Build Model 17 | if section_proxy: 18 | return SignalEntity( 19 | enabled=section_proxy.get('enabled', fallback='false') == 'true', 20 | keep_alive_interval=int(section_proxy.get('keep_alive_interval', fallback='0')), 21 | notifiers={ 22 | 'KEEP-ALIVE': section_proxy.get('keep_alive_notifer', fallback='').split(','), 23 | 'INITIALIZATION': section_proxy.get('initialization_notifer', fallback='').split(','), 24 | 'SHUTDOWN': section_proxy.get('shutdown_notifer', fallback='').split(','), 25 | 'NEW-GROUP': section_proxy.get('new_group_notifer', fallback='').split(','), 26 | }, 27 | ) 28 | 29 | return SignalEntity( 30 | enabled=False, 31 | keep_alive_interval=300, 32 | notifiers={ 33 | 'KEEP-ALIVE': [], 34 | 'INITIALIZATION': [], 35 | 'SHUTDOWN': [], 36 | 'NEW-GROUP': [], 37 | }, 38 | ) 39 | -------------------------------------------------------------------------------- /TEx/core/ocr/ocr_engine_factory.py: -------------------------------------------------------------------------------- 1 | """Factory Class for ORC Engines.""" 2 | from __future__ import annotations 3 | 4 | from configparser import ConfigParser, SectionProxy 5 | from typing import Optional 6 | 7 | from TEx.core.ocr.dummy_ocr_engine import DummyOcrEngine 8 | from TEx.core.ocr.ocr_engine_base import OcrEngineBase 9 | from TEx.core.ocr.tesseract_ocr_engine import TesseractOcrEngine 10 | 11 | 12 | class OcrEngineFactory: 13 | """Factory Class for ORC Engines.""" 14 | 15 | @staticmethod 16 | def get_instance(config: ConfigParser) -> OcrEngineBase: 17 | """Configure the Notifier.""" 18 | if not config.has_section('OCR'): 19 | return DummyOcrEngine() 20 | 21 | ocr_settings: SectionProxy = config['OCR'] 22 | 23 | # Get Activation and Type Settings 24 | is_enabled: bool = ocr_settings.get('enabled', fallback='false') == 'true' 25 | if not is_enabled: 26 | return DummyOcrEngine() 27 | 28 | # Get Configurations 29 | ocr_type: str = ocr_settings.get('type', fallback='none') 30 | engine: OcrEngineBase 31 | ocr_engine_settings: Optional[SectionProxy] 32 | 33 | # Return Tesseract Engine 34 | if ocr_type == 'tesseract': 35 | engine = TesseractOcrEngine() 36 | ocr_engine_settings = config['OCR.TESSERACT'] 37 | else: 38 | error_msg: str = f'Invalid OCR Type "{ocr_type}"' 39 | raise AttributeError(error_msg) 40 | 41 | # Configure Engine 42 | engine.configure(config=ocr_engine_settings) 43 | 44 | return engine 45 | -------------------------------------------------------------------------------- /docs/configuration/basic.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | The basic configuration contains exactly 4 settings: 3 | 4 | ```ini 5 | [CONFIGURATION] 6 | api_id=my_api_id 7 | api_hash=my_api_hash 8 | phone_number=my_phone_number 9 | data_path=my_data_path 10 | device_model=device_model_name 11 | timeout=30 12 | ``` 13 | 14 | * **api_id** > Required - Telegram API ID. From https://my.telegram.org/ > login > API development tools 15 | * **api_hash** > Required - Telegram API Hash. From https://my.telegram.org/ > login > API development tools 16 | * **phone_number** > Required - Target Phone Number 17 | * **data_path** > Required - Defines the Path Folder for the SQLite Databases and Dowloaded Files 18 | * **device_model** > Optional - Defines which device model is passed to Telegram Servers. 19 | * If Blank or Absent - Uses 'TeX' for backwards compatibility 20 | * If set as 'AUTO' - Uses the computer/system device model 21 | * **timeout** > Optional - Defines the Timeout in seconds for Telegram Client. 22 | * Default: 10 23 | 24 | 25 | !!! warning "Note about 'device_model'" 26 | 27 | If you are using versions prior to 0.2.15 or have already connected to Telegram and have not configured the 'device_model' parameter, do not make the change, as Telegram may interpret this operation as an attack on your account. 28 | 29 | Place the configuration file anywhere you want with .config extension. 30 | 31 | **EXAMPLE (myconfig.config)** 32 | ```ini 33 | [CONFIGURATION] 34 | api_id=12555896 35 | api_hash=dead1f29db5d1fa56cc42757acbabeef 36 | phone_number=15552809753 37 | data_path=/usr/home/tex_data/ 38 | device_model=AMD64 39 | timeout=15 40 | ``` 41 | -------------------------------------------------------------------------------- /docs/how_use/how_to_use_basic.md: -------------------------------------------------------------------------------- 1 | # Basic Usage 2 | 3 | ## The Basics 4 | Considering a *my_TEx_config.config* file created at */usr/my_TEx_config.config* with follow: 5 | 6 | ```ini 7 | [CONFIGURATION] 8 | api_id=12555896 9 | api_hash=dead1f29db5d1fa56cc42757acbabeef 10 | phone_number=15552809753 11 | data_path=/usr/home/tex_data/ 12 | ``` 13 | 14 | Execute the first 2 commands to configure and sync TEx and the last one to activate the listener module. 15 | 16 | ```bash 17 | python3 -m TEx connect --config /usr/my_TEx_config.config 18 | python3 -m TEx load_groups --config /usr/my_TEx_config.config 19 | python3 -m TEx listen --config /usr/my_TEx_config.config 20 | ``` 21 | 22 | 23 | ## Command Line 24 | 25 | ### Connect to Telegram Servers 26 | ```bash 27 | python3 -m TEx connect --config CONFIGURATION_FILE_PATH 28 | ``` 29 | * **config** > Required - Created Configuration File Path 30 | 31 | ### Update Groups List (Optional, but Recommended) 32 | ```bash 33 | python3 -m TEx load_groups --config CONFIGURATION_FILE_PATH --refresh_profile_photos 34 | ``` 35 | 36 | * **config** > Required - Created Configuration File Path 37 | * **refresh_profile_photos** > Optional - If present, forces the Download and Update all Channels Members Profile Photo 38 | 39 | ### Listen Messages (Start the Message Listener) 40 | ```bash 41 | python3 -m TEx listen --config CONFIGURATION_FILE_PATH --group_id 1234,5678 42 | ``` 43 | 44 | * **config** > Required - Created Configuration File Path 45 | * **ignore_media** > Optional - If present, don't Download any Media 46 | * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's 47 | -------------------------------------------------------------------------------- /.github/workflows/cy_deploy.yml: -------------------------------------------------------------------------------- 1 | name: CI-Deploy 2 | 3 | on: 4 | push: 5 | 6 | tags: 7 | - V* 8 | 9 | jobs: 10 | CodeQuality: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python 3.8 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: 3.8 19 | 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | python -m pip install -r requirements.txt 24 | 25 | - name: Run Code Quality 26 | run: | 27 | tox -e quality 28 | 29 | TestsAndCodeCoverage: 30 | runs-on: ubuntu-latest 31 | needs: CodeQuality 32 | 33 | steps: 34 | - uses: actions/checkout@v2 35 | - name: Set up Python 3.8 36 | uses: actions/setup-python@v2 37 | with: 38 | python-version: 3.8 39 | 40 | - name: Install dependencies 41 | run: | 42 | python -m pip install --upgrade pip 43 | python -m pip install -r requirements.txt 44 | 45 | - name: Run Unittests and Code Coverage 46 | run: | 47 | tox -e coverage 48 | 49 | PublishPypi: 50 | runs-on: ubuntu-latest 51 | needs: TestsAndCodeCoverage 52 | 53 | steps: 54 | - uses: actions/checkout@v2 55 | - name: Set up Python 3.8 56 | uses: actions/setup-python@v2 57 | with: 58 | python-version: 3.8 59 | 60 | - name: Install dependencies 61 | run: | 62 | python -m pip install --upgrade pip 63 | python -m pip install -r requirements.txt 64 | 65 | - name: Deployment 66 | env: 67 | PYPI_DEPLOY_TOKEN: ${{ secrets.PYPI_DEPLOY_TOKEN }} 68 | run: | 69 | tox -e deploy -------------------------------------------------------------------------------- /tests/modules/common.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from sqlalchemy import delete 4 | 5 | from TEx.core.dir_manager import DirectoryManagerUtils 6 | from TEx.database.db_initializer import DbInitializer 7 | from TEx.database.db_manager import DbManager 8 | from TEx.models.database.telegram_db_model import ( 9 | TelegramGroupOrmEntity, 10 | TelegramMediaOrmEntity, TelegramMessageOrmEntity, TelegramUserOrmEntity, ) 11 | from TEx.modules.execution_configuration_handler import ExecutionConfigurationHandler 12 | 13 | 14 | class TestsCommon: 15 | 16 | @staticmethod 17 | def basic_test_setup(): 18 | """Execute Basic Tasks for Tests.""" 19 | 20 | DirectoryManagerUtils.ensure_dir_struct('_data') 21 | DirectoryManagerUtils.ensure_dir_struct('_data/resources') 22 | DirectoryManagerUtils.ensure_dir_struct('_data/media') 23 | 24 | DbInitializer.init(data_path='_data/') 25 | 26 | # Reset SQLlite Groups 27 | DbManager.SESSIONS['data'].execute(delete(TelegramMessageOrmEntity)) 28 | DbManager.SESSIONS['data'].execute(delete(TelegramGroupOrmEntity)) 29 | DbManager.SESSIONS['data'].execute(delete(TelegramMediaOrmEntity)) 30 | DbManager.SESSIONS['data'].execute(delete(TelegramUserOrmEntity)) 31 | DbManager.SESSIONS['data'].commit() 32 | 33 | @staticmethod 34 | def execute_basic_pipeline_steps_for_initialization(config, args, data): 35 | 36 | execution_configuration_loader: ExecutionConfigurationHandler = ExecutionConfigurationHandler() 37 | 38 | loop = asyncio.get_event_loop() 39 | loop.run_until_complete( 40 | execution_configuration_loader.run( 41 | config=config, 42 | args=args, 43 | data=data 44 | ) 45 | ) 46 | -------------------------------------------------------------------------------- /docs/notification/notification_elasticsearch_signals_template.md: -------------------------------------------------------------------------------- 1 | # Notification System - Elastic Search Connector - Signals Template 2 | 3 | In order to use the Signal Notification with Elastic Search, you should create a new Index Template before start sending the Signals. 4 | 5 | This will help you to get the best of all signals provided. 6 | 7 | **Index Mapping JSON** 8 | ```json 9 | { 10 | "numeric_detection": false, 11 | "dynamic_date_formats": [ 12 | "strict_date_optional_time", 13 | "yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z" 14 | ], 15 | "dynamic": "true", 16 | "dynamic_templates": [], 17 | "date_detection": true, 18 | "properties": { 19 | "source": { 20 | "fielddata_frequency_filter": { 21 | "min": 0.01, 22 | "max": 1, 23 | "min_segment_size": 50 24 | }, 25 | "fielddata": true, 26 | "type": "text" 27 | }, 28 | "time": { 29 | "type": "date" 30 | }, 31 | "signal": { 32 | "eager_global_ordinals": false, 33 | "index_phrases": false, 34 | "fielddata_frequency_filter": { 35 | "min": 0.01, 36 | "max": 1, 37 | "min_segment_size": 50 38 | }, 39 | "fielddata": true, 40 | "norms": true, 41 | "index": true, 42 | "store": false, 43 | "type": "text", 44 | "index_options": "positions" 45 | }, 46 | "content": { 47 | "eager_global_ordinals": false, 48 | "index_phrases": false, 49 | "fielddata_frequency_filter": { 50 | "min": 0.01, 51 | "max": 1, 52 | "min_segment_size": 50 53 | }, 54 | "fielddata": true, 55 | "norms": true, 56 | "index": true, 57 | "store": false, 58 | "type": "text", 59 | "index_options": "positions" 60 | } 61 | } 62 | } 63 | ``` 64 | -------------------------------------------------------------------------------- /docs/how_use/usage_connection.md: -------------------------------------------------------------------------------- 1 | # Connection to Telegram Servers 2 | 3 | First step for every phone number to be used is to connect to Telegram servers. After that, the runner will create a session file under *'data_path'* folder specified in the configuration file. 4 | 5 | **Full Command:** 6 | 7 | ```bash 8 | python3 -m TEx connect --config CONFIGURATION_FILE_PATH 9 | ``` 10 | 11 | **Parameters** 12 | 13 | * **config** > Required - Created Configuration File Path 14 | 15 | *Output Example:* 16 | ```bash 17 | TEx - Telegram Explorer 18 | Version 0.2.12 19 | By: Th3 0bservator 20 | 21 | 2023-10-01 20:07:06,501 - INFO - [*] Loading Configurations: 22 | 2023-10-01 20:07:06,502 - INFO - [*] Installed Modules: 23 | 2023-10-01 20:07:06,502 - INFO - data_structure_handler.py 24 | 2023-10-01 20:07:06,502 - INFO - database_handler.py 25 | 2023-10-01 20:07:06,502 - INFO - execution_configuration_handler.py 26 | 2023-10-01 20:07:06,502 - INFO - telegram_connection_manager.py 27 | 2023-10-01 20:07:06,502 - INFO - telegram_groups_list.py 28 | 2023-10-01 20:07:06,502 - INFO - telegram_groups_scrapper.py 29 | 2023-10-01 20:07:06,502 - INFO - telegram_maintenance 30 | 2023-10-01 20:07:06,502 - INFO - telegram_messages_listener.py 31 | 2023-10-01 20:07:06,502 - INFO - telegram_messages_scrapper.py 32 | 2023-10-01 20:07:06,502 - INFO - telegram_report_generator 33 | 2023-10-01 20:07:06,502 - INFO - telegram_stats_generator.py 34 | 2023-10-01 20:07:06,987 - INFO - [*] Executing Pipeline: 35 | 2023-10-01 20:07:06,987 - INFO - [+] telegram_connection_manager.TelegramConnector 36 | 2023-10-01 20:07:07,392 - INFO - Authorizing on Telegram... 37 | 2023-10-01 20:07:13,590 - INFO - User Authorized on Telegram: True 38 | 2023-10-01 20:07:13,851 - INFO - [*] Executing Termination: 39 | 2023-10-01 20:07:13,851 - INFO - [+] state_file_handler.SaveStateFileHandler 40 | ``` -------------------------------------------------------------------------------- /TEx/config.ini: -------------------------------------------------------------------------------- 1 | [PIPELINE] 2 | pre_pipeline_sequence = input_args_handler.InputArgsHandler 3 | execution_configuration_handler.ExecutionConfigurationHandler 4 | data_structure_handler.DataStructureHandler 5 | database_handler.DatabaseHandler 6 | temp_file_manager.TempFileManager 7 | state_file_handler.LoadStateFileHandler 8 | 9 | pipeline_sequence = telegram_connection_manager.TelegramConnector 10 | 11 | telegram_groups_scrapper.TelegramGroupScrapper 12 | telegram_groups_list.TelegramGroupList 13 | telegram_messages_scrapper.TelegramGroupMessageScrapper 14 | telegram_messages_listener.TelegramGroupMessageListener 15 | telegram_report_generator.telegram_report_sent_telegram.TelegramReportSentViaTelegram 16 | 17 | telegram_connection_manager.TelegramDisconnector 18 | 19 | telegram_report_generator.telegram_html_report_generator.TelegramReportGenerator 20 | telegram_report_generator.telegram_export_text_generator.TelegramExportTextGenerator 21 | telegram_report_generator.telegram_export_file_generator.TelegramExportFileGenerator 22 | 23 | telegram_stats_generator.TelegramStatsGenerator 24 | 25 | telegram_maintenance.telegram_purge_old_data.TelegramMaintenancePurgeOldData 26 | 27 | 28 | post_pipeline_sequence = state_file_handler.SaveStateFileHandler 29 | 30 | ########## Modules Config ########## 31 | 32 | [MODULE_LoadStateFileHandler] 33 | file_name = state/{0}.json 34 | 35 | [MODULE_SaveStateFileHandler] 36 | file_name = state/{0}.json 37 | -------------------------------------------------------------------------------- /TEx/core/state_file.py: -------------------------------------------------------------------------------- 1 | """State File Handle.""" 2 | from datetime import datetime 3 | from typing import cast 4 | 5 | import pytz 6 | 7 | from TEx.database.db_manager import DbManager 8 | from TEx.models.database.temp_db_models import StateFileOrmEntity 9 | 10 | 11 | class StateFileHandler: 12 | """State File Handler.""" 13 | 14 | @staticmethod 15 | def file_exist(path: str) -> bool: 16 | """ 17 | Return if a File Exists. 18 | 19 | :param path: File Path 20 | :return: 21 | """ 22 | return bool(DbManager.SESSIONS['temp'].query(StateFileOrmEntity).filter_by(path=path).count() > 0) 23 | 24 | @staticmethod 25 | def read_file_text(path: str) -> str: 26 | """Read All File Content. 27 | 28 | :param path: File Path 29 | :return: File Content 30 | """ 31 | entity: StateFileOrmEntity = cast(StateFileOrmEntity, DbManager.SESSIONS['temp'].query(StateFileOrmEntity).filter_by(path=path).first()) 32 | return str(entity.data) 33 | 34 | @staticmethod 35 | def write_file_text(path: str, content: str) -> None: 36 | """Write Text Content into File. 37 | 38 | :param path: File Path 39 | :param content: File Content 40 | :param validate_seconds: File Validation in Seconds 41 | :return: None 42 | """ 43 | # Delete if Exists 44 | DbManager.SESSIONS['temp'].execute( 45 | StateFileOrmEntity.__table__.delete().where(StateFileOrmEntity.path == path), # type: ignore 46 | ) 47 | 48 | entity: StateFileOrmEntity = StateFileOrmEntity( 49 | path=path, 50 | data=content, 51 | created_at=int(datetime.now(tz=pytz.UTC).timestamp()), 52 | ) 53 | DbManager.SESSIONS['temp'].add(entity) 54 | 55 | # Execute 56 | DbManager.SESSIONS['temp'].flush() 57 | DbManager.SESSIONS['temp'].commit() 58 | -------------------------------------------------------------------------------- /docs/configuration/media_download_examples.md: -------------------------------------------------------------------------------- 1 | # Media Download - Examples 2 | 3 | ### Default Behaviour (Download All Medias) 4 | ```ini 5 | [MEDIA.DOWNLOAD] 6 | default=ALLOW 7 | max_download_size_bytes=256000000 8 | ``` 9 | 10 | ### Download Only Images from All Groups 11 | ```ini 12 | [MEDIA.DOWNLOAD] 13 | default=DISALLOW 14 | 15 | [MEDIA.DOWNLOAD.image/gif] 16 | enabled=ALLOW 17 | max_download_size_bytes=256000000 18 | groups=* 19 | 20 | [MEDIA.DOWNLOAD.image/jpeg] 21 | enabled=ALLOW 22 | max_download_size_bytes=256000000 23 | groups=* 24 | 25 | [MEDIA.DOWNLOAD.image/png] 26 | enabled=ALLOW 27 | max_download_size_bytes=256000000 28 | groups=* 29 | 30 | [MEDIA.DOWNLOAD.image/webp] 31 | enabled=ALLOW 32 | max_download_size_bytes=256000000 33 | groups=* 34 | ``` 35 | 36 | ### Download All Medias, Except Compressed Ones 37 | ```ini 38 | [MEDIA.DOWNLOAD] 39 | default=ALLOW 40 | max_download_size_bytes=256000000 41 | 42 | [MEDIA.DOWNLOAD.application/rar] 43 | enabled=DISALLOW 44 | 45 | [MEDIA.DOWNLOAD.application/vnd.rar] 46 | enabled=DISALLOW 47 | 48 | [MEDIA.DOWNLOAD.application/x-7z-compressed] 49 | enabled=DISALLOW 50 | 51 | [MEDIA.DOWNLOAD.application/x-compressed-tar] 52 | enabled=DISALLOW 53 | 54 | [MEDIA.DOWNLOAD.application/application/zip] 55 | enabled=DISALLOW 56 | ``` 57 | 58 | ### Download All Medias, but Compressed Ones only from two groups (id=1234 and id=5678) 59 | ```ini 60 | [MEDIA.DOWNLOAD] 61 | default=ALLOW 62 | max_download_size_bytes=256000000 63 | 64 | [MEDIA.DOWNLOAD.application/rar] 65 | enabled=DISALLOW 66 | groups=1234,5678 67 | 68 | [MEDIA.DOWNLOAD.application/vnd.rar] 69 | enabled=DISALLOW 70 | groups=1234,5678 71 | 72 | [MEDIA.DOWNLOAD.application/x-7z-compressed] 73 | enabled=DISALLOW 74 | groups=1234,5678 75 | 76 | [MEDIA.DOWNLOAD.application/x-compressed-tar] 77 | enabled=DISALLOW 78 | groups=1234,5678 79 | 80 | [MEDIA.DOWNLOAD.application/application/zip] 81 | enabled=DISALLOW 82 | groups=1234,5678 83 | ``` -------------------------------------------------------------------------------- /docs/finder/finder_regex.md: -------------------------------------------------------------------------------- 1 | # Message Finder System - RegEx 2 | 3 | **Compatibility:** Message Listener Command 4 | 5 | Telegram Explorer allows to specify many message finders using Regular Expressions. 6 | 7 | Each time one Finder rule match, the system automatically uses the Notification System to report that message. 8 | 9 | Every Finder is defined in the configuration files. 10 | 11 | **Configuration Spec:** 12 | 13 | For each rule to be used, you must set a configuration using the default name schema *FINDER.RULE.* 14 | 15 | **Parameters:** 16 | 17 | * **type** > Required - Fixed Value 'regex' 18 | * **regex** > Required - The regular expression. You can also use one regex per Line 19 | * **notifier** > Required - Name of notifiers to be used to notify the triggered message (comma separated). 20 | 21 | **Changes on Configuration File** 22 | ```ini 23 | [FINDER] 24 | enabled=true 25 | 26 | [FINDER.RULE.MessagesWithURL] 27 | type=regex 28 | regex=/^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%%_\+.~#?&\/=]*)$/ 29 | notifier=NOTIFIER.DISCORD.MY_HOOK_1 30 | 31 | [FINDER.RULE.FindMessagesWithCreditCard] 32 | type=regex 33 | regex=(^4[0-9]{12}(?:[0-9]{3})?$)|(^(?:5[1-5][0-9]{2}|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}$)|(3[47][0-9]{13})|(^3(?:0[0-5]|[68][0-9])[0-9]{11}$)|(^6(?:011|5[0-9]{2})[0-9]{12}$)|(^(?:2131|1800|35\d{3})\d{11}$) 34 | notifier=NOTIFIER.DISCORD.MY_HOOK_1,NOTIFIER.DISCORD.MY_HOOK_2 35 | 36 | [FINDER.RULE.MultipleRegEx] 37 | type=regex 38 | regex= 39 | /^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%%_\+.~#?&\/=]*)$/ 40 | (^4[0-9]{12}(?:[0-9]{3})?$)|(^(?:5[1-5][0-9]{2}|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}$)|(3[47][0-9]{13})|(^3(?:0[0-5]|[68][0-9])[0-9]{11}$)|(^6(?:011|5[0-9]{2})[0-9]{12}$)|(^(?:2131|1800|35\d{3})\d{11}$) 41 | notifier=NOTIFIER.DISCORD.MY_HOOK_1,NOTIFIER.DISCORD.MY_HOOK_2 42 | ``` -------------------------------------------------------------------------------- /TEx/database/db_manager.py: -------------------------------------------------------------------------------- 1 | """Database Manager.""" 2 | 3 | import os 4 | from sqlite3 import Connection 5 | 6 | from sqlalchemy import create_engine 7 | from sqlalchemy.event import listen 8 | from sqlalchemy.orm import sessionmaker 9 | from sqlalchemy.pool import _ConnectionRecord 10 | 11 | 12 | class DbManager: 13 | """Main Database Manager.""" 14 | 15 | SQLALCHEMY_BINDS = {} # type:ignore 16 | SESSIONS = {} # type:ignore 17 | 18 | @staticmethod 19 | def init_db(data_path: str) -> None: 20 | """Initialize the DB Connection.""" 21 | DbManager.SQLALCHEMY_BINDS = { 22 | 'temp': create_engine( 23 | f'sqlite:///{os.path.join(data_path, "temp_local.db")}?nolock=1&check_same_thread=false', 24 | connect_args={'check_same_thread': False, 'timeout': 120}, 25 | echo=False, logging_name='sqlalchemy', 26 | ), 27 | 'data': create_engine( 28 | f'sqlite:///{os.path.join(data_path, "data_local.db")}?nolock=1&check_same_thread=false', 29 | connect_args={'check_same_thread': False, 'timeout': 120}, 30 | echo=False, logging_name='sqlalchemy', 31 | ), 32 | } 33 | 34 | DbManager.SESSIONS = { 35 | 'temp': sessionmaker(autocommit=False, autoflush=True, bind=DbManager.SQLALCHEMY_BINDS['temp'])(), 36 | 'data': sessionmaker(autocommit=False, autoflush=True, bind=DbManager.SQLALCHEMY_BINDS['data'])(), 37 | } 38 | 39 | listen(DbManager.SQLALCHEMY_BINDS['data'], 'connect', DbManager.do_connect) 40 | 41 | @staticmethod 42 | def do_connect(dbapi_connection: Connection, connection_record: _ConnectionRecord) -> None: 43 | """Disable SQLLite Transaction auto Start.""" 44 | # disable pysqlite's emitting of the BEGIN statement entirely. 45 | # also stops it from emitting COMMIT before any DDL. 46 | dbapi_connection.isolation_level = None 47 | -------------------------------------------------------------------------------- /docs/finder/configuration.md: -------------------------------------------------------------------------------- 1 | # Message Finder System 2 | 3 | **Compatibility:** Message Listener Command 4 | 5 | Telegram Explorer allows to specify many message finders. Usually, the finder engine looks at messages, but, they also can look at downloaded text files (plain, csv, xml, json, etc.). 6 | 7 | It's through the Finder engine that you are able to send notifications or export the chat contents (Check the *Notification System* and *Message Export System* for more information). 8 | 9 | **Configuration Spec:** 10 | 11 | In order to use the finder engine, you must set a configuration to enable-it and configure if you want to allow the engine to find on files. 12 | 13 | **Parameters:** 14 | 15 | * **enabled** > Required - Enable(true)/Disable(false) the finder engine. 16 | * **find_in_text_files_enabled** > Optional - Enable(true)/Disable(false) the behavior that run the finder engine inside the downloaded files. 17 | * Default: false 18 | * **find_in_text_files_max_size_bytes** > Optional - Set the max size in bytes of file that allow the engine to load the file in memory and perform the searches. 19 | * Default: 10000000 20 | * **notifier** > Optional - The list of all (comma separated) notifiers that runs when the finder triggers. 21 | * **exporter** > Optional - The list of all (comma separated) file exporters that runs when the finder triggers. 22 | 23 | 24 | **Changes on Configuration File** 25 | ```ini 26 | [FINDER] 27 | enabled=true 28 | find_in_text_files_enabled=true 29 | find_in_text_files_max_size_bytes=20000000 30 | notifier=NOTIFIER.DISCORD.MY_HOOK_1,NOTIFIER.DISCORD.MY_HOOK_2 31 | exporter=EXPORTER.ROLLING_PANDAS.MY_EXPORTER_1,EXPORTER.ROLLING_PANDAS.MY_EXPORTER_2 32 | ``` 33 | 34 | **Files Supported for the Engine:** 35 | 36 | * application/atom+xml 37 | * application/bittorrent 38 | * application/csv 39 | * application/html 40 | * application/json 41 | * application/ld+json 42 | * text/csv 43 | * text/html 44 | * text/plain 45 | * text/xml -------------------------------------------------------------------------------- /docs/notification/notification_elasticsearch.md: -------------------------------------------------------------------------------- 1 | # Notification System - Elastic Search Connector 2 | 3 | Telegram Explorer allows to send notifications to Elastic Search through ingestion API. 4 | 5 | Every Notification is defined in the configuration files. 6 | 7 | !!! info "Elastic Search Compatibility" 8 | 9 | Tested on Elastic Search 8+ 10 | 11 | !!! warning "Index Template" 12 | 13 | If you want, and we recommend, create a new Index Template before create your indexes. Please, check on "Notification System" > "Elastic Search Connector" > "Index Template" and "Signals Template" for more information. 14 | 15 | **Configuration Spec:** 16 | 17 | For each connector you must set a configuration using the default name schema *NOTIFIER.ELASTIC_SEARCH.* 18 | 19 | **Parameters:** 20 | 21 | * **address** > Optional - Elastic Search Address. Multiple values comma separated. 22 | * **api_key** > Required - Elastic Search API Key. 23 | * **cloud_id** > Optional - Elastic Search Cloud ID. 24 | * **verify_ssl_cert** > Optional - Configure if the connector checks the SSL cert. Default=True 25 | * **index_name** > Required - Elastic Search Index Name. 26 | * **pipeline_name** > Required - Elastic Search Ingestion Pipeline Name. 27 | 28 | 29 | **Changes on Configuration File (with Address)** 30 | ```ini 31 | [NOTIFIER.ELASTIC_SEARCH.ELASTIC_INDEX_01] 32 | address=https://elastic_search_url_1:9200,https://elastic_search_url_2:9200 33 | api_key=bHJtVEg0c0JnNkwwTnYtYTFdeadbeefrXzd6NVFSUmEtQ21mQldiUjEwUQ== 34 | verify_ssl_cert=False 35 | index_name=search-telegram_explorer 36 | pipeline_name=ent-search-generic-ingestion 37 | ``` 38 | 39 | **Changes on Configuration File (with Cloud ID)** 40 | ```ini 41 | [NOTIFIER.ELASTIC_SEARCH.ELASTIC_INDEX_02] 42 | cloud_id=deployment-name:dXMtZWFzdDQuZ2Nw 43 | api_key=bHJtVEg0c0JnNkwwTnYtYTFdeadbeefrXzd6NVFSUmEtQ21mQldiUjEwUQ== 44 | verify_ssl_cert=True 45 | index_name=search-telegram_explorer 46 | pipeline_name=ent-search-generic-ingestion 47 | ``` 48 | -------------------------------------------------------------------------------- /TEx/modules/state_file_handler.py: -------------------------------------------------------------------------------- 1 | """State File Handler.""" 2 | from __future__ import annotations 3 | 4 | import json 5 | import logging 6 | from configparser import ConfigParser 7 | from typing import Dict 8 | 9 | from TEx.core.base_module import BaseModule 10 | from TEx.core.state_file import StateFileHandler 11 | 12 | logger = logging.getLogger('TelegramExplorer') 13 | 14 | 15 | class LoadStateFileHandler(BaseModule): 16 | """Module that Loads Previous Created State File.""" 17 | 18 | async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool: 19 | """ 20 | Abstract Method for Module Activation Function. 21 | 22 | :return: 23 | """ 24 | return True 25 | 26 | async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None: 27 | """Execute Module.""" 28 | state_file_name: str = config['MODULE_LoadStateFileHandler']['file_name'].replace('{0}', config['CONFIGURATION']['phone_number']) 29 | 30 | if StateFileHandler.file_exist(state_file_name): 31 | data.update( 32 | json.loads(StateFileHandler.read_file_text(state_file_name)), 33 | ) 34 | logger.debug('\t\tState File Loaded.') 35 | 36 | 37 | class SaveStateFileHandler(BaseModule): 38 | """Module that Save a New State File.""" 39 | 40 | async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool: 41 | """ 42 | Abstract Method for Module Activation Function. 43 | 44 | :return: 45 | """ 46 | return True 47 | 48 | async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None: 49 | """Execute Module.""" 50 | state_file_name: str = config['MODULE_SaveStateFileHandler']['file_name'].replace('{0}', config['CONFIGURATION']['phone_number']) 51 | 52 | # Remove Internal Controls 53 | del data['internals'] 54 | 55 | StateFileHandler.write_file_text( 56 | state_file_name, 57 | json.dumps(data), 58 | ) 59 | -------------------------------------------------------------------------------- /TEx/models/facade/telegram_group_report_facade_entity.py: -------------------------------------------------------------------------------- 1 | """Facade Entity for Report Generation.""" 2 | from __future__ import annotations 3 | 4 | from typing import Optional 5 | 6 | from TEx.models.database.telegram_db_model import TelegramGroupOrmEntity 7 | 8 | 9 | class TelegramGroupReportFacadeEntity: 10 | """Facade Entity for Report Generation.""" 11 | 12 | id: int 13 | constructor_id: str 14 | access_hash: str 15 | group_username: str 16 | title: str 17 | 18 | fake: bool 19 | gigagroup: bool 20 | has_geo: bool 21 | restricted: bool 22 | scam: bool 23 | verified: bool 24 | 25 | participants_count: Optional[int] 26 | 27 | photo_id: Optional[int] 28 | photo_base64: Optional[str] 29 | photo_name: Optional[str] 30 | 31 | source: str 32 | 33 | meta_message_count: int 34 | 35 | 36 | class TelegramGroupReportFacadeEntityMapper: 37 | """Mapper for TelegramGroupReportFacadeEntity.""" 38 | 39 | @staticmethod 40 | def create_from_dbentity(source: TelegramGroupOrmEntity) -> TelegramGroupReportFacadeEntity: 41 | """Map TelegramGroupOrmEntity to TelegramGroupReportFacadeEntity.""" 42 | h_result: TelegramGroupReportFacadeEntity = TelegramGroupReportFacadeEntity() 43 | 44 | h_result.id = source.id 45 | h_result.constructor_id = source.constructor_id 46 | h_result.access_hash = source.access_hash 47 | h_result.group_username = source.group_username 48 | h_result.title = source.title 49 | 50 | h_result.fake = source.fake 51 | h_result.gigagroup = source.gigagroup 52 | h_result.has_geo = source.has_geo 53 | h_result.restricted = source.restricted 54 | h_result.scam = source.scam 55 | h_result.verified = source.verified 56 | 57 | h_result.participants_count = source.participants_count 58 | 59 | h_result.photo_id = source.photo_id 60 | h_result.photo_base64 = source.photo_base64 61 | h_result.photo_name = source.photo_name 62 | 63 | h_result.source = source.source 64 | 65 | return h_result 66 | -------------------------------------------------------------------------------- /docs/exporting/pandas_rolling.md: -------------------------------------------------------------------------------- 1 | # Message Exporting System - Pandas Rolling Exporter 2 | 3 | Telegram Explorer allows to export messages as CSV, XML, JSON or Pickle Serialized Pandas DataFrame almost at real time. 4 | 5 | This way you can configure many exporters you want, one for each need or category you like. 6 | 7 | !!! warning "NOTE ABOUT THE EXPORTING PROCESS" 8 | 9 | This specific exporter only writes the output file when the rolling period terminates, and/or, when Telegram Explorer process stops. 10 | 11 | Every Exporter is defined in the configuration files. 12 | 13 | **Configuration Spec:** 14 | 15 | For each Pandas Rolling Exporter you must set a configuration using the default name schema *EXPORTER.ROLLING_PANDAS.* 16 | 17 | **Parameters:** 18 | 19 | * **file_root_path** > Required - Root path for the exported files. 20 | * **rolling_every_minutes** > Optional - Time (in minutes) that the system will roll a new file. 21 | * Default: 30 22 | * **fields** > Optional - The list (comma separated) with the fields you want to be exported. 23 | * Default: date_time,raw_text,group_name,group_id,from_id,to_id,reply_to_msg_id,message_id,is_reply,found_on 24 | * **use_header** > Optional - Enable/Disable the file header on exported file. 25 | * Default: true 26 | * **output_format** > Optional - Specify the output file format (json, csv, xml and pickle). 27 | * Default: csv 28 | * **keep_last_files** > Optional - Specify how many files the engine keep on folder before starts to delete the old ones. 29 | * Default: 20 30 | 31 | **Changes on Configuration File** 32 | ```ini 33 | [EXPORTER.ROLLING_PANDAS.MY_EXPORTER_1] 34 | file_root_path=/path/to/export/folder/ 35 | rolling_every_minutes=5 36 | fields=date_time,raw_text,group_name,group_id,from_id,to_id,reply_to_msg_id,message_id,is_reply,found_on 37 | use_header=true 38 | output_format=json 39 | keep_last_files=20 40 | 41 | [EXPORTER.ROLLING_PANDAS.MY_EXPORTER_2] 42 | file_root_path=/path/to/export/folder/ 43 | rolling_every_minutes=10 44 | fields=date_time,group_id,group_name,raw_text,from_id,to_id,message_id 45 | ``` 46 | -------------------------------------------------------------------------------- /docs/changelog/v030.md: -------------------------------------------------------------------------------- 1 | # Changelog - V0.3.0 2 | 3 | !!! warning "Python Version" 4 | 5 | This are the latest version of Telegram Explorer that supports Python 3.8 and 3.9. 6 | 7 | Please, consider upgrate to Python 3.10+ as possible. 8 | 9 | **🚀 Features** 10 | 11 | - Proxy (HTTP, SOCKS4, SOCKS5) support ([#26](https://github.com/guibacellar/TEx/issues/26)) 12 | - Discord Notifications now have a source information with account/phone number 13 | - It is now possible to set the connection timeout for the Telegram servers connectors 14 | - Discord Notifications now allow to send downloaded files as attachments ([#41](https://github.com/guibacellar/TEx/issues/41)) 15 | - New Message Finder Rule to Catch All Messages 16 | - New Notification connector for ElasticSearch ([#12](https://github.com/guibacellar/TEx/issues/12)) 17 | - Fine Control on Media Download Settings ([#37](https://github.com/guibacellar/TEx/issues/37)) 18 | - OCR Support with Tesseract for all Downloaded Images ([#39](https://github.com/guibacellar/TEx/issues/39)) 19 | - RegEx Finder now Supports Many RegEx at same Finder Configuration, One per Line ([#49](https://github.com/guibacellar/TEx/issues/49)) 20 | - Added The Ability to Configure Signals to be Received ([#48](https://github.com/guibacellar/TEx/issues/48)) 21 | - Export Messages as CSV, JSON, XML or Pandas Serialized Dataframe ([#53](https://github.com/guibacellar/TEx/issues/53)) 22 | 23 | **🐛 Bug Fixes** 24 | 25 | - Fix "export_text" command Regex Handling that causes crash on using invalid regex ([#31](https://github.com/guibacellar/TEx/issues/31)) 26 | 27 | **⚙️ Internal Improvements** 28 | 29 | - Replace Pylint, PyDocStyle and Flake8 code quality tools for Ruff ([#22](https://github.com/guibacellar/TEx/issues/22)) 30 | - Fix Invalid TypeHint for Message Object from Telethon 31 | - Changes in message finder and notification system to use a facade objects with Pydantic to reduce cognitive complexity and allow the construction of new connectors more easily 32 | - Improvements on Database Handling + Removed Isolation Level from SQL Handling ([#45](https://github.com/guibacellar/TEx/issues/45)) 33 | -------------------------------------------------------------------------------- /TEx/core/ocr/tesseract_ocr_engine.py: -------------------------------------------------------------------------------- 1 | """Tesseract OCR Engine.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | import os 6 | from configparser import SectionProxy 7 | from typing import Optional, cast 8 | 9 | from pytesseract import pytesseract as tesseract 10 | 11 | from TEx.core.ocr.ocr_engine_base import OcrEngineBase 12 | 13 | logger = logging.getLogger('TelegramExplorer') 14 | 15 | 16 | class TesseractOcrEngine(OcrEngineBase): 17 | """Tesseract OCR Engine.""" 18 | 19 | def __init__(self) -> None: 20 | """Initialize Discord Notifier.""" 21 | super().__init__() 22 | self.cmd: str = '' 23 | self.language: str = '' 24 | 25 | def configure(self, config: Optional[SectionProxy]) -> None: 26 | """Configure the Notifier.""" 27 | if not config: 28 | error_msg_config: str = 'No [OCR.TESSERACT] config found, but OCR type is "tesseract"' 29 | raise AttributeError(error_msg_config) 30 | 31 | self.cmd = config.get('tesseract_cmd', fallback='') 32 | self.language = config.get('language', fallback='eng') 33 | 34 | # Check if Tesseract CMD property are set 35 | if self.cmd == '': 36 | error_msg_cmd: str = '"tesseract_cmd" setting are no properly set, but OCR type is "tesseract"' 37 | raise AttributeError(error_msg_cmd) 38 | 39 | # Check if Tesseract CMD can be Found 40 | if not os.path.exists(self.cmd): 41 | error_msg_path: str = f'Tesseract command cannot be found at "{self.cmd}"' 42 | raise AttributeError(error_msg_path) 43 | 44 | # Configure Tesseract Engine 45 | tesseract.tesseract_cmd = self.cmd 46 | 47 | def run(self, file_path: str) -> Optional[str]: 48 | """Run Tesseract Engine and Return Detected Text.""" 49 | try: 50 | 51 | if not os.path.exists(file_path): 52 | return '' 53 | 54 | return cast(str, tesseract.image_to_string(file_path, lang=self.language)) 55 | 56 | except Exception as ex: 57 | logger.exception(msg='OCR Fail', exc_info=ex) 58 | 59 | return '' 60 | -------------------------------------------------------------------------------- /TEx/notifier/notifier_base.py: -------------------------------------------------------------------------------- 1 | """Base Class for All Notifiers.""" 2 | from __future__ import annotations 3 | 4 | import abc 5 | import hashlib 6 | from configparser import SectionProxy 7 | from typing import Optional, Tuple, Union 8 | 9 | from cachetools import TTLCache 10 | 11 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity 12 | from TEx.models.facade.signal_notification_model import SignalNotificationEntityModel 13 | 14 | 15 | class BaseNotifier: 16 | """Base Notifier.""" 17 | 18 | def __init__(self) -> None: 19 | """Initialize the Base Notifier.""" 20 | self.cache: Optional[TTLCache] = None 21 | self.timeout_seconds: int 22 | self.media_attachments_enabled: bool 23 | self.media_attachments_max_size_bytes: int 24 | 25 | def configure_base(self, config: SectionProxy) -> None: 26 | """Configure Base Notifier.""" 27 | self.cache = TTLCache(maxsize=4096, ttl=int(config.get('prevent_duplication_for_minutes', fallback='240')) * 60) 28 | self.timeout_seconds = int(config.get('timeout_seconds', fallback='30')) 29 | self.media_attachments_enabled = config.get('media_attachments_enabled', fallback='false') == 'true' 30 | self.media_attachments_max_size_bytes = int(config.get('media_attachments_max_size_bytes', fallback='10000000')) 31 | 32 | def check_is_duplicated(self, message: str) -> Tuple[bool, str]: 33 | """Check if Message is Duplicated on Notifier.""" 34 | if not message or self.cache is None: 35 | return False, '' 36 | 37 | # Compute Deduplication Tag 38 | tag: str = hashlib.md5(message.encode('UTF-8')).hexdigest() 39 | 40 | # If Found, Return True 41 | if self.cache.get(tag): 42 | return True, tag 43 | 44 | # Otherwise, Just Insert and Return False 45 | self.cache[tag] = True 46 | return False, tag 47 | 48 | @abc.abstractmethod 49 | async def run(self, entity: Union[FinderNotificationMessageEntity, SignalNotificationEntityModel], rule_id: str, source: str) -> None: 50 | """Run the Notification Process.""" 51 | -------------------------------------------------------------------------------- /docs/how_use/usage_list_groups.md: -------------------------------------------------------------------------------- 1 | # List Groups 2 | 3 | You can list groups directly in the console/tty output for a quick view of all groups already present in the database. 4 | 5 | **Full Command:** 6 | 7 | ```bash 8 | python3 -m TEx list_groups --config CONFIGURATION_FILE_PATH 9 | ``` 10 | 11 | **Parameters** 12 | 13 | * **config** > Required - Created Configuration File Path 14 | 15 | *Output Example:* 16 | ```bash 17 | TEx - Telegram Explorer 18 | Version 0.2.12 19 | By: Th3 0bservator 20 | 21 | 2023-10-01 20:41:15,142 - INFO - [*] Loading Configurations: 22 | 2023-10-01 20:41:15,142 - INFO - [*] Installed Modules: 23 | 2023-10-01 20:41:15,143 - INFO - data_structure_handler.py 24 | 2023-10-01 20:41:15,143 - INFO - database_handler.py 25 | 2023-10-01 20:41:15,143 - INFO - execution_configuration_handler.py 26 | 2023-10-01 20:41:15,143 - INFO - telegram_connection_manager.py 27 | 2023-10-01 20:41:15,143 - INFO - telegram_groups_list.py 28 | 2023-10-01 20:41:15,143 - INFO - telegram_groups_scrapper.py 29 | 2023-10-01 20:41:15,143 - INFO - telegram_maintenance 30 | 2023-10-01 20:41:15,143 - INFO - telegram_messages_listener.py 31 | 2023-10-01 20:41:15,143 - INFO - telegram_messages_scrapper.py 32 | 2023-10-01 20:41:15,143 - INFO - telegram_report_generator 33 | 2023-10-01 20:41:15,143 - INFO - telegram_stats_generator.py 34 | 2023-10-01 20:41:15,484 - INFO - [*] Executing Pipeline: 35 | 2023-10-01 20:41:15,823 - INFO - [+] telegram_groups_list.TelegramGroupList 36 | 2023-10-01 20:41:16,535 - INFO - Found 2 Groups 37 | 2023-10-01 20:41:16,536 - INFO - ID Username Title 38 | 2023-10-01 20:41:16,536 - INFO - 1769587896 mygroup1 My Group 1 39 | 2023-10-01 20:41:16,536 - INFO - 1259876541 texbetagroup TeX Beta Group 40 | 2023-10-01 20:41:16,703 - INFO - [*] Executing Termination: 41 | 2023-10-01 20:41:16,703 - INFO - [+] state_file_handler.SaveStateFileHandler 42 | ``` -------------------------------------------------------------------------------- /TEx/exporter/exporter_base.py: -------------------------------------------------------------------------------- 1 | """Base Class for All Exporters.""" 2 | from __future__ import annotations 3 | 4 | import abc 5 | import logging 6 | import os 7 | from configparser import SectionProxy 8 | from pathlib import Path 9 | from typing import List 10 | 11 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity 12 | 13 | logger = logging.getLogger('TelegramExplorer') 14 | 15 | 16 | class BaseExporter: 17 | """Base Notifier.""" 18 | 19 | def __init__(self) -> None: 20 | """Initialize the Base Exporter.""" 21 | self.file_root_path: str = '' 22 | 23 | def configure_base(self, config: SectionProxy) -> None: 24 | """Configure Base Exporter.""" 25 | self.file_root_path = config.get('file_root_path') 26 | 27 | @abc.abstractmethod 28 | async def run(self, entity: FinderNotificationMessageEntity, rule_id: str) -> None: 29 | """Run the Exporting Process.""" 30 | 31 | @abc.abstractmethod 32 | def shutdown(self) -> None: 33 | """Shutdown and Flush all Data into Disk.""" 34 | 35 | def _keep_last_files_only(self, directory_path: str, file_count: int) -> None: 36 | """Ensure the Directory Contains Only the 'file_count' newest files. Note: CHAT GPT-4 Assisted Code.""" 37 | if not os.path.exists(directory_path): 38 | return 39 | 40 | # List All Files 41 | files: List = [ 42 | os.path.join(directory_path, file) for file in os.listdir(directory_path) if 43 | Path(os.path.join(directory_path, file)).is_file() 44 | ] 45 | 46 | # Check File Limit 47 | if len(files) <= file_count: 48 | return 49 | 50 | # Sort Files by Date/Time 51 | files.sort(key=lambda x: Path(x).stat().st_mtime) 52 | 53 | # Compute File Remove Counter 54 | files_to_delete: int = len(files) - file_count 55 | 56 | # Remove Old Files 57 | for i in range(files_to_delete): 58 | try: 59 | os.remove(files[i]) 60 | except Exception as ex: 61 | logger.exception(msg=f'Unable to Remove {files[i]}', exc_info=ex) 62 | -------------------------------------------------------------------------------- /docs/notification/notification_elasticsearch_index_template.md: -------------------------------------------------------------------------------- 1 | # Notification System - Elastic Search Connector - Index Template 2 | 3 | If you want, create a new Index Template before create all Telegram Explorer indexes. 4 | 5 | This will help you to get the best of all data provided and allow's to extract many more value and informations from the data. 6 | 7 | **Index Mapping JSON** 8 | ```json 9 | { 10 | "numeric_detection": false, 11 | "dynamic_date_formats": [ 12 | "strict_date_optional_time", 13 | "yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z" 14 | ], 15 | "dynamic": "true", 16 | "dynamic_templates": [], 17 | "date_detection": true, 18 | "properties": { 19 | "from_id": { 20 | "type": "long" 21 | }, 22 | "media_size": { 23 | "type": "long" 24 | }, 25 | "group_name": { 26 | "fielddata_frequency_filter": { 27 | "min": 0.01, 28 | "max": 1, 29 | "min_segment_size": 50 30 | }, 31 | "fielddata": true, 32 | "type": "text" 33 | }, 34 | "reply_to_msg_id": { 35 | "type": "long" 36 | }, 37 | "has_media": { 38 | "type": "boolean" 39 | }, 40 | "raw": { 41 | "fielddata_frequency_filter": { 42 | "min": 0.01, 43 | "max": 1, 44 | "min_segment_size": 50 45 | }, 46 | "fielddata": true, 47 | "type": "text" 48 | }, 49 | "rule": { 50 | "fielddata_frequency_filter": { 51 | "min": 0.01, 52 | "max": 1, 53 | "min_segment_size": 50 54 | }, 55 | "fielddata": true, 56 | "type": "text" 57 | }, 58 | "to_id": { 59 | "type": "long" 60 | }, 61 | "message_id": { 62 | "type": "text" 63 | }, 64 | "source": { 65 | "fielddata_frequency_filter": { 66 | "min": 0.01, 67 | "max": 1, 68 | "min_segment_size": 50 69 | }, 70 | "fielddata": true, 71 | "type": "text" 72 | }, 73 | "is_reply": { 74 | "type": "boolean" 75 | }, 76 | "found_on": { 77 | "type": "text" 78 | }, 79 | "group_id": { 80 | "type": "long" 81 | }, 82 | "media_mime_type": { 83 | "fielddata_frequency_filter": { 84 | "min": 0.01, 85 | "max": 1, 86 | "min_segment_size": 50 87 | }, 88 | "fielddata": true, 89 | "type": "text" 90 | }, 91 | "time": { 92 | "type": "date" 93 | } 94 | } 95 | } 96 | ``` 97 | -------------------------------------------------------------------------------- /tests/resources/expected_generated_file_content/test_pandas_rolling_exporter_json_expected_15558987453_202311221005.data: -------------------------------------------------------------------------------- 1 | [{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"}] -------------------------------------------------------------------------------- /tests/resources/expected_generated_file_content/test_pandas_rolling_exporter_csv_expected_15558987453_202311221007.data: -------------------------------------------------------------------------------- 1 | date_time,raw_text,group_name,group_id,from_id,to_id,reply_to_msg_id,message_id,is_reply,found_on 2 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 3 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 4 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 5 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 6 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 7 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 8 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 9 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 10 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 11 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 12 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 13 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 14 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 15 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 16 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 17 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 18 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 19 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 20 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 21 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6 22 | -------------------------------------------------------------------------------- /TEx/modules/telegram_groups_list.py: -------------------------------------------------------------------------------- 1 | """Telegram Group List.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | from configparser import ConfigParser 6 | from typing import Dict, List, cast 7 | 8 | from TEx.core.base_module import BaseModule 9 | from TEx.database.telegram_group_database import TelegramGroupDatabaseManager 10 | from TEx.models.database.telegram_db_model import TelegramGroupOrmEntity 11 | 12 | logger = logging.getLogger('TelegramExplorer') 13 | 14 | 15 | class TelegramGroupList(BaseModule): 16 | """List all Groups on Telegram Account.""" 17 | 18 | async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool: 19 | """ 20 | Abstract Method for Module Activation Function. 21 | 22 | :return: 23 | """ 24 | return cast(bool, args['list_groups']) 25 | 26 | async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None: 27 | """Execute Module.""" 28 | if not await self.can_activate(config, args, data): 29 | logger.debug('\t\tModule is Not Enabled...') 30 | return 31 | 32 | # Check Data Dict 33 | if 'groups' not in data: 34 | data['groups'] = {} 35 | 36 | if 'members' not in data: 37 | data['members'] = {} 38 | 39 | # Get all Groups from DB 40 | db_groups: List[TelegramGroupOrmEntity] = TelegramGroupDatabaseManager.get_all_by_phone_number( 41 | config['CONFIGURATION']['phone_number']) 42 | logger.info(f'\t\tFound {len(db_groups)} Groups') 43 | 44 | # Get the Bigger Username Size 45 | max_username_size: int = max( 46 | [len(item.group_username) if item.group_username is not None else 0 for item in db_groups], 47 | ) 48 | 49 | # Get the Bigger Title Size 50 | max_title_size: int = max( 51 | [len(item.title) if item.title is not None else 0 for item in db_groups], 52 | ) 53 | 54 | # Print Groups 55 | logger.info(f'\t\tID \t{"Username".ljust(max_username_size)}\t{"Title".ljust(max_title_size)}') 56 | for group in db_groups: 57 | formatted_username: str = group.group_username.ljust(max_username_size) if group.group_username is not None else 'UNDEFINED'.ljust(max_username_size) 58 | formatted_title: str = group.title.ljust(max_title_size) if group.title is not None else 'UNDEFINED'.ljust(max_title_size) 59 | logger.info(f'\t\t{group.id}\t{formatted_username}\t{formatted_title}') 60 | -------------------------------------------------------------------------------- /docs/configuration/ocr.md: -------------------------------------------------------------------------------- 1 | # Configuration - OCR 2 | 3 | Using Tesseract, Telegram Explorer does OCR and extract all texts from any downloaded images. 4 | 5 | By default, Tesseract comes with 2 languages, English and OSD, but you can install additional languages as you wish. 6 | 7 | 8 | !!! warning "OCR Results" 9 | 10 | Remember, OCR are not magical thing and the results may vary, especially in the wild enviroment like analize any, uncontrolled, multiple sources, unstandarized, downloaded images from any kind of Telegram groups. 11 | 12 | ```ini 13 | [OCR] 14 | enabled=true 15 | type=tesseract 16 | 17 | [OCR.TESSERACT] 18 | tesseract_cmd=/path/to/tesseract/cmd 19 | language=eng 20 | ``` 21 | 22 | * **enabled** > Required - Enable/Disable OCR Feature (true = enable / false = disable) 23 | * **type** > Required - Engine Type (fixed=tesseract) 24 | * **tesseract_cmd** > Required - Path to Tesseract CMD 25 | * **language** > Required - Tesseract Language, multiple Languages supported (Ex: eng+por) 26 | 27 | ## OCR Text 28 | 29 | All extracted content is combined with the original content of the messages, so Telegram Explorer's search and notification mechanisms work seamlessly. 30 | 31 | Here's a real message example: 32 | 33 | ``` 34 | Yeah, we got compromised by APT29, but luckily MalwareBytes™ FREE AV 35 | stopped the infection in their tracks! 36 | 37 | To be extra safe, we swung by the local Hotel and used their 38 | WiFi to install it. 39 | 40 | ====OCR CONTENT==== 41 | 42 | Malwarebytes 4.0 43 | Premium 44 | Real-Time Protectin 45 | 46 | My Computer Global 47 | 48 | 17 total 49 | 50 | Malicious sites 2 51 | Malware PUPs 3 52 | Ransomware 1 53 | Explits 9 54 | ``` 55 | 56 | 57 | 58 | 59 | 60 | ## Installing Tesseract 61 | 62 | * **Linux Users**: Follow the "Installation" procedures at [https://tesseract-ocr.github.io/tessdoc/Installation.html](https://tesseract-ocr.github.io/tessdoc/Installation.html) 63 | * **Windows Users**: Get and Install from [https://github.com/UB-Mannheim/tesseract/wiki](https://github.com/UB-Mannheim/tesseract/wiki) 64 | 65 | ## Adding New Languages 66 | 67 | Installing new languages are simple as download trained data for the new language and copy the downloaded file to **tessdata** folder into Tesseract installation folder. 68 | 69 | To obtain the languages, access [https://github.com/tesseract-ocr/tessdata](https://github.com/tesseract-ocr/tessdata) 70 | 71 | As an example, that are my *tessdata* directory: 72 | 73 | ![ocr_tensorflow_tessdata_folder.png](../media/ocr_tensorflow_tessdata_folder.png) -------------------------------------------------------------------------------- /docs/how_use/usage_load_groups.md: -------------------------------------------------------------------------------- 1 | # Update Groups List 2 | 3 | Despite the fact that the Telegram Explorer performs automatic group synchronization every time when a new group/chat is detected, the automatic system only register the group inside database. 4 | 5 | The Group Load command perform a full group synchronization, including all information about te group (name, pictures and full members list, members photos, etc). 6 | 7 | **Full Command:** 8 | 9 | ```bash 10 | python3 -m TEx load_groups --config CONFIGURATION_FILE_PATH --refresh_profile_photos 11 | ``` 12 | 13 | **Basic Command:** 14 | 15 | ```bash 16 | python3 -m TEx load_groups --config CONFIGURATION_FILE_PATH 17 | ``` 18 | 19 | **Parameters** 20 | 21 | * **config** > Required - Created Configuration File Path 22 | * **refresh_profile_photos** > Optional - If present, forces the Download and Update all Channels Members Profile Photo 23 | 24 | *Output Example:* 25 | ```bash 26 | TEx - Telegram Explorer 27 | Version 0.2.12 28 | By: Th3 0bservator 29 | 30 | 2023-10-01 20:37:14,514 - INFO - [*] Loading Configurations: 31 | 2023-10-01 20:37:14,514 - INFO - [*] Installed Modules: 32 | 2023-10-01 20:37:14,514 - INFO - data_structure_handler.py 33 | 2023-10-01 20:37:14,514 - INFO - database_handler.py 34 | 2023-10-01 20:37:14,515 - INFO - execution_configuration_handler.py 35 | 2023-10-01 20:37:14,515 - INFO - telegram_connection_manager.py 36 | 2023-10-01 20:37:14,515 - INFO - telegram_groups_list.py 37 | 2023-10-01 20:37:14,515 - INFO - telegram_groups_scrapper.py 38 | 2023-10-01 20:37:14,515 - INFO - telegram_maintenance 39 | 2023-10-01 20:37:14,515 - INFO - telegram_messages_listener.py 40 | 2023-10-01 20:37:14,515 - INFO - telegram_messages_scrapper.py 41 | 2023-10-01 20:37:14,515 - INFO - telegram_report_generator 42 | 2023-10-01 20:37:14,515 - INFO - telegram_stats_generator.py 43 | 2023-10-01 20:37:14,525 - INFO - [*] Loading Execution Configurations: 44 | 2023-10-01 20:37:14,525 - INFO - [+] data_structure_handler.DataStructureHandler 45 | 2023-10-01 20:37:14,813 - INFO - [*] Executing Pipeline: 46 | 2023-10-01 20:37:21,361 - INFO - [+] telegram_groups_scrapper.TelegramGroupScrapper 47 | 2023-10-01 20:37:21,364 - INFO - Enumerating Groups 48 | 2023-10-01 20:37:22,169 - INFO - Processing "My Group 1 (1769587896)" Members and Group Profile Picture 49 | 2023-10-01 20:37:27,782 - INFO - Processing "TeX Beta Group (1259876541)" Members and Group Profile Picture 50 | 2023-10-01 20:37:27,859 - INFO - [*] Executing Termination: 51 | 2023-10-01 20:07:27,958 - INFO - [+] state_file_handler.SaveStateFileHandler 52 | ``` -------------------------------------------------------------------------------- /docs/how_use/usage_download_messages.md: -------------------------------------------------------------------------------- 1 | # Download Messages (Download since first message for each group) 2 | 3 | Unlike the process of listening to messages, this command downloads messages from Telegram groups from the first message. Essentially downloading every message, every media (if 'ignore_media' was not present). 4 | 5 | We can compare this command with any scrapper. 6 | 7 | > 🚨🚨🚨🚨🚨 **CRITICAL INFORMATION**🚨🚨🚨🚨🚨

Download all messages from all groups can lead your account to be banned. So, use carefully only and if necessary.

**Note:** Extremely recommended to use with the groups filter. 8 | 9 | **Full Command:** 10 | ```bash 11 | python3 -m TEx download_messages --config CONFIGURATION_FILE_PATH --ignore_media --group_id 1234,5678 12 | ``` 13 | 14 | **Basic Command:** 15 | ```bash 16 | python3 -m TEx download_messages --config CONFIGURATION_FILE_PATH 17 | ``` 18 | 19 | **Parameters** 20 | 21 | * **config** > Required - Created Configuration File Path 22 | * **ignore_media** > Optional - If present, don't Download any Media 23 | * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's 24 | 25 | 26 | *Output Example:* 27 | ```bash 28 | 2023-10-01 21:01:35,543 - INFO - [*] Loading Configurations: 29 | 2023-10-01 21:01:35,543 - INFO - [*] Installed Modules: 30 | 2023-10-01 21:01:35,543 - INFO - data_structure_handler.py 31 | 2023-10-01 21:01:35,543 - INFO - database_handler.py 32 | 2023-10-01 21:01:35,543 - INFO - execution_configuration_handler.py 33 | 2023-10-01 21:01:35,543 - INFO - telegram_connection_manager.py 34 | 2023-10-01 21:01:35,544 - INFO - telegram_groups_list.py 35 | 2023-10-01 21:01:35,544 - INFO - telegram_groups_scrapper.py 36 | 2023-10-01 21:01:35,544 - INFO - telegram_maintenance 37 | 2023-10-01 21:01:35,544 - INFO - telegram_messages_listener.py 38 | 2023-10-01 21:01:35,544 - INFO - telegram_messages_scrapper.py 39 | 2023-10-01 21:01:35,544 - INFO - telegram_report_generator 40 | 2023-10-01 21:01:35,544 - INFO - telegram_stats_generator.py 41 | 2023-10-01 21:01:35,894 - INFO - [*] Executing Pipeline: 42 | 2023-10-01 21:01:42,659 - INFO - [+] telegram_messages_scrapper.TelegramGroupMessageScrapper 43 | 2023-10-01 21:01:42,706 - INFO - Found 2 Groups 44 | 2023-10-01 21:01:43,468 - INFO - Download Messages from "My Group 1" > Last Offset: 3936 45 | 2023-10-01 21:01:54,468 - INFO - Download Messages from "TeX Beta Group" > Last Offset: 158742 46 | 2023-10-01 20:37:27,859 - INFO - [*] Executing Termination: 47 | 2023-10-01 20:07:27,958 - INFO - [+] state_file_handler.SaveStateFileHandler 48 | ``` -------------------------------------------------------------------------------- /TEx/exporter/exporter_engine.py: -------------------------------------------------------------------------------- 1 | """Exporter Engine.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | from configparser import ConfigParser 6 | from typing import Dict, List 7 | 8 | from TEx.exporter.exporter_base import BaseExporter 9 | from TEx.exporter.pandas_rolling_exporter import PandasRollingExporter 10 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity 11 | 12 | logger = logging.getLogger('TelegramExplorer') 13 | 14 | 15 | class ExporterEngine: 16 | """Primary Export Engine.""" 17 | 18 | def __init__(self) -> None: 19 | """Initialize Exporter Engine.""" 20 | self.exporters: Dict = {} 21 | 22 | def __load_exporters(self, config: ConfigParser) -> None: 23 | """Load all Registered Exporters.""" 24 | registered_exporters: List[str] = [item for item in config.sections() if 'EXPORTER.' in item] 25 | 26 | for register in registered_exporters: 27 | if 'ROLLING_PANDAS' in register: 28 | 29 | exporter: PandasRollingExporter = PandasRollingExporter() 30 | exporter.configure(config=config[register], source=config['CONFIGURATION']['phone_number']) 31 | 32 | self.exporters.update({ 33 | register: {'instance': exporter}, 34 | }) 35 | 36 | def configure(self, config: ConfigParser) -> None: 37 | """Configure Finder.""" 38 | self.__load_exporters(config) 39 | 40 | async def run(self, exporters: List[str], entity: FinderNotificationMessageEntity, rule_id: str) -> None: 41 | """Dispatch all Exporting Processes.""" 42 | if len(exporters) == 0: 43 | return 44 | 45 | for dispatcher_name in exporters: 46 | 47 | target_exporter: BaseExporter = self.exporters[dispatcher_name]['instance'] 48 | 49 | try: 50 | await target_exporter.run(entity=entity, rule_id=rule_id) 51 | 52 | except Exception as _ex: # Yes, Catch All 53 | logging.exception('Unable to Export Data') 54 | 55 | async def shutdown(self) -> None: 56 | """Shutdown all Exporters and Flush all to Disk.""" 57 | for dispatcher_name in self.exporters: 58 | 59 | target_exporter: BaseExporter = self.exporters[dispatcher_name]['instance'] 60 | 61 | try: 62 | target_exporter.shutdown() 63 | 64 | except Exception as _ex: # Yes, Catch All 65 | logging.exception(f'Unable to Shutdown the "{dispatcher_name}" Exporter Gracefully. Data may be lost.') 66 | -------------------------------------------------------------------------------- /docs/configuration/scenario_based_examples.md: -------------------------------------------------------------------------------- 1 | # Scenario-Based Configuration File Examples 2 | 3 | ### Sent All Messages to Elasticsearch 4 | ```ini 5 | [CONFIGURATION] 6 | api_id=12555896 7 | api_hash=dead1f29db5d1fa56cc42757acbabeef 8 | phone_number=15552809753 9 | data_path=/usr/home/tex_data/ 10 | device_model=AMD64 11 | timeout=30 12 | 13 | [FINDER] 14 | enabled=true 15 | 16 | [FINDER.RULE.CatchAll] 17 | type=all 18 | notifier=NOTIFIER.ELASTIC_SEARCH.GENERAL 19 | 20 | [NOTIFIER.ELASTIC_SEARCH.GENERAL] 21 | address=https://localhost:9200 22 | api_key=bHJtVEg0c0JnNkwwTnYtFFDEADlo6NS1rXzd6NVFSUmEtQ21mQldiUjEwUQ== 23 | verify_ssl_cert=False 24 | index_name=index-name 25 | pipeline_name=ent-search-generic-ingestion 26 | ``` 27 | 28 | 29 | ### Export All Messages as CSV File 30 | ```ini 31 | [CONFIGURATION] 32 | api_id=12555896 33 | api_hash=dead1f29db5d1fa56cc42757acbabeef 34 | phone_number=15552809753 35 | data_path=/usr/home/tex_data/ 36 | device_model=AMD64 37 | timeout=30 38 | 39 | [FINDER] 40 | enabled=true 41 | 42 | [FINDER.RULE.CatchAll] 43 | type=all 44 | exporter=EXPORTER.ROLLING_PANDAS.EXPORT_ALL_MESSAGES 45 | 46 | [EXPORTER.ROLLING_PANDAS.EXPORT_ALL_MESSAGES] 47 | file_root_path=/path/to/export/folder/ 48 | rolling_every_minutes=5 49 | fields=date_time,raw_text,group_name,group_id,from_id,to_id,reply_to_msg_id,message_id,is_reply,found_on 50 | use_header=true 51 | output_format=json 52 | keep_last_files=20 53 | ``` 54 | 55 | ### Sent Signals to Elasticsearch and Discord 56 | ```ini 57 | [CONFIGURATION] 58 | api_id=12555896 59 | api_hash=dead1f29db5d1fa56cc42757acbabeef 60 | phone_number=15552809753 61 | data_path=/usr/home/tex_data/ 62 | device_model=AMD64 63 | timeout=30 64 | 65 | [FINDER] 66 | enabled=true 67 | 68 | [NOTIFIER.DISCORD.SIGNALS_HOOK] 69 | webhook=https://discord.com/api/webhooks/1128765187657681875/foobarqOMFp_457EDs2mbeefNPPeqJnBZZdfaubQvOKIUHYzfdeadZd5aqGX6FmCmbNjv 70 | prevent_duplication_for_minutes=0 71 | media_attachments_enabled=true 72 | media_attachments_max_size_bytes=10000000 73 | 74 | [NOTIFIER.ELASTIC_SEARCH.SIGNALS] 75 | address=https://localhost:9200 76 | api_key=bHJtVEg0c0JnNkwwTnYtFFDEADlo6NS1rXzd6NVFSUmEtQ21mQldiUjEwUQ== 77 | verify_ssl_cert=False 78 | index_name=index-name-for-signals 79 | pipeline_name=ent-search-generic-ingestion 80 | 81 | [SIGNALS] 82 | enabled=true 83 | keep_alive_interval=300 84 | 85 | keep_alive_notifer=NOTIFIER.ELASTIC_SEARCH.SIGNALS 86 | initialization_notifer=NOTIFIER.ELASTIC_SEARCH.SIGNALS 87 | shutdown_notifer=NOTIFIER.ELASTIC_SEARCH.SIGNALS 88 | new_group_notifer=NOTIFIER.DISCORD.SIGNALS_HOOK,NOTIFIER.ELASTIC_SEARCH.SIGNALS 89 | ``` 90 | -------------------------------------------------------------------------------- /docs/notification/signals.md: -------------------------------------------------------------------------------- 1 | # Notification System - Signals 2 | 3 | Signals are the way that Telegram Explorer report some internal behaviors and events. 4 | 5 | Currently, there are 4 unique signals: 6 | 7 | - **Initialization** - Happens everytime the Telegram Explorer starts the 'listen' command 8 | - **Keep Alive** - Sent every (keep_alive_interval) seconds while the Telegram Explorer are running the 'listen' command 9 | - **New Group** - Happen everytime when the 'listen' command receive a new group for first time 10 | - **Shutdown** - Happens everytime the Telegram Explorer finish the 'listen' command 11 | 12 | **Configuration Spec:** 13 | 14 | You are able to fully enable/disable the signal system and have a fine control on each signal. 15 | 16 | Also, Signals works like any notification from Telegram Explorer and you can configure each signal individually to be sent on any supported Notification Engines. 17 | 18 | !!! info "Use Separated Notifiers" 19 | 20 | Although you can use the same notifiers that you use for finder mechanisms, we strong recommend to create a dedicated configuration to use the signals, specially if you are going to use on Elastic Search, because Telegram Explorer have a new and dedicated Index Template for this. 21 | 22 | **Elastic Search Signals Index Template:** [Check the Template Here](notification_elasticsearch_signals_template.md) 23 | 24 | **Parameters:** 25 | 26 | * **enabled** > Required - Enable/Disable the Signals System 27 | * **keep_alive_interval** > Required - Time (in seconds) that the system goes to sent the KEEP-ALIVE signal 28 | * **keep_alive_notifer** > Optional - Name of notifiers to be used to receive the KEEP-ALIVE signal (comma separated). Supress to Disable this Signal 29 | * **initialization_notifer** > Optional - Name of notifiers to be used to receive the INITIALIZATION signal (comma separated). Supress to Disable this Signal 30 | * **shutdown_notifer** > Optional - Name of notifiers to be used to receive the SHUTDOWN signal (comma separated). Supress to Disable this Signal 31 | * **new_group_notifer** > Optional - Name of notifiers to be used to receive the NEW-GROUP signal (comma separated). Supress to Disable this Signal 32 | 33 | 34 | **Changes on Configuration File** 35 | ```ini 36 | [SIGNALS] 37 | enabled=true 38 | keep_alive_interval=300 39 | 40 | keep_alive_notifer=NOTIFIER.ELASTIC_SEARCH.ELASTIC_INDEX_01 41 | initialization_notifer=NOTIFIER.ELASTIC_SEARCH.ELASTIC_INDEX_01,NOTIFIER.DISCORD.MY_HOOK_2 42 | shutdown_notifer=NOTIFIER.ELASTIC_SEARCH.ELASTIC_INDEX_01,NOTIFIER.DISCORD.MY_HOOK_2 43 | new_group_notifer=NOTIFIER.DISCORD.MY_HOOK_2 44 | ``` 45 | -------------------------------------------------------------------------------- /docs/notification/notification_discord.md: -------------------------------------------------------------------------------- 1 | # Notification System - Discord Hook 2 | 3 | Telegram Explorer allows to send notifications through Discord WebHooks. Each WebHook is linked to a specific channel. 4 | 5 | This way you can configure many notification hooks, one for each need or category you like. 6 | 7 | Every Notification is defined in the configuration files. 8 | 9 | **Configuration Spec:** 10 | 11 | For each notification hook you must set a configuration using the default name schema *NOTIFIER.DISCORD.* 12 | 13 | **Parameters:** 14 | 15 | * **webhook** > Required - Discord Webhook URI 16 | * **prevent_duplication_for_minutes** > Required - Time (in minutes) that the system keep track of messages sent to Discord servers to prevent others message with same content to be sent to the webhook. If you don't want to use this feature, just set the parameter to 0. 17 | * **timeout_seconds** > Optional - Timeout (in seconds) that waits to send the message. If the message sent take more that time, the message will be ignored. 18 | * Default: 30 19 | * **media_attachments_enabled** > Optional - Enable/Disable the behavior for sending downloaded medias on messages that have been reported. 20 | * Default: false 21 | * **media_attachments_max_size_bytes** > Optional - Set the max size in bytes to send the medias on the notifications. 22 | * Default: 10000000 23 | 24 | =true 25 | media_attachments_max_size_bytes=10000000 26 | **Changes on Configuration File** 27 | ```ini 28 | [NOTIFIER.DISCORD.MY_HOOK_1] 29 | webhook=https://discord.com/api/webhooks/1157896186751897357/o7foobar4txvAvKSdeadHiI-9XYeXaGlQtd-5PtrrX_eCE0XElWktpPqjrZ0KbeefPtQC 30 | prevent_duplication_for_minutes=240 31 | timeout_seconds=30 32 | media_attachments_enabled=true 33 | media_attachments_max_size_bytes=10000000 34 | 35 | [NOTIFIER.DISCORD.MY_HOOK_2] 36 | webhook=https://discord.com/api/webhooks/1128765187657681875/foobarqOMFp_4tM2ic2mbeefNPOZqJnBZZdfaubQv2vJgbYzfdeadZd5aqGX6FmCmbNjX 37 | prevent_duplication_for_minutes=240 38 | media_attachments_enabled=false 39 | media_attachments_max_size_bytes=10000000 40 | 41 | [NOTIFIER.DISCORD.MY_HOOK_3] 42 | webhook=https://discord.com/api/webhooks/1256789875462124045/bQ9TZqOzgA05PLVu8E2LU3N5foobarFU8-0nQbeefP5oIgAUOlydeadf7Uc19Hs00OJQ 43 | prevent_duplication_for_minutes=60 44 | timeout_seconds=30 45 | media_attachments_enabled=true 46 | media_attachments_max_size_bytes=25000000 47 | 48 | [NOTIFIER.DISCORD.MY_HOOK_4] 49 | webhook=https://discord.com/api/webhooks/1487651987651004895/mR0v3zOywH3Z5HvdeadrGEqqndkcYepgCM-Q6foobardjAMXAEbeefuA_F7-h5JcBM4RT 50 | prevent_duplication_for_minutes=240 51 | media_attachments_enabled=true 52 | ``` 53 | -------------------------------------------------------------------------------- /tests/modules/test_input_args_handler.py: -------------------------------------------------------------------------------- 1 | """Input Args Handler Tests.""" 2 | 3 | import asyncio 4 | import sys 5 | import unittest 6 | from configparser import ConfigParser 7 | from typing import Dict 8 | 9 | from TEx.modules.input_args_handler import InputArgsHandler 10 | 11 | 12 | class InputArgsHandlerTest(unittest.TestCase): 13 | 14 | def setUp(self) -> None: 15 | 16 | self.config = ConfigParser() 17 | self.config.read('../../config.ini') 18 | 19 | def test_report_commands_complete(self): 20 | 21 | sys.argv = [ 22 | '__main__.py', 23 | 'report', 24 | '--config', '/usr/home/config_file.config', 25 | '--order_desc', 26 | '--limit_days', '8', 27 | '--filter', 'filter1, "Filter 2", Filter3', 28 | '--report_folder', 'reports/ut01', 29 | '--around_messages', '7', 30 | '--group_id', '99,5,78,56987' 31 | ] 32 | 33 | target: InputArgsHandler = InputArgsHandler() 34 | args: Dict = {} 35 | data: Dict = {} 36 | 37 | loop = asyncio.get_event_loop() 38 | loop.run_until_complete( 39 | target.run( 40 | config=self.config, 41 | args=args, 42 | data=data 43 | ) 44 | ) 45 | 46 | self.assertEqual('/usr/home/config_file.config', args['config']) 47 | self.assertTrue(args['order_desc']) 48 | self.assertEqual('filter1, "Filter 2", Filter3', args['filter']) 49 | self.assertEqual(8, int(args['limit_days'])) 50 | self.assertEqual('reports/ut01', args['report_folder']) 51 | self.assertEqual(7, int(args['around_messages'])) 52 | self.assertEqual('99,5,78,56987', args['group_id']) 53 | 54 | def test_report_commands_default(self): 55 | 56 | sys.argv = [ 57 | '__main__.py', 58 | 'report', 59 | '--config', '/usr/home/config_file2.config', 60 | ] 61 | 62 | target: InputArgsHandler = InputArgsHandler() 63 | args: Dict = {} 64 | data: Dict = {} 65 | 66 | loop = asyncio.get_event_loop() 67 | loop.run_until_complete( 68 | target.run( 69 | config=self.config, 70 | args=args, 71 | data=data 72 | ) 73 | ) 74 | 75 | self.assertEqual('/usr/home/config_file2.config', args['config']) 76 | self.assertFalse(args['order_desc']) 77 | self.assertIsNone(args['filter']) 78 | self.assertEqual(3650, int(args['limit_days'])) 79 | self.assertEqual('reports', args['report_folder']) 80 | self.assertEqual(1, int(args['around_messages'])) 81 | self.assertEqual('*', args['group_id']) 82 | -------------------------------------------------------------------------------- /TEx/notifier/notifier_engine.py: -------------------------------------------------------------------------------- 1 | """Notifier Modules.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | from configparser import ConfigParser 6 | from typing import Dict, List, Union 7 | 8 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity 9 | from TEx.models.facade.signal_notification_model import SignalNotificationEntityModel 10 | from TEx.notifier.discord_notifier import DiscordNotifier 11 | from TEx.notifier.elastic_search_notifier import ElasticSearchNotifier 12 | from TEx.notifier.notifier_base import BaseNotifier 13 | 14 | logger = logging.getLogger('TelegramExplorer') 15 | 16 | 17 | class NotifierEngine: 18 | """Primary Notification Engine.""" 19 | 20 | def __init__(self) -> None: 21 | """Initialize Finder Engine.""" 22 | self.notifiers: Dict = {} 23 | 24 | def __load_notifiers(self, config: ConfigParser) -> None: 25 | """Load all Registered Notifiers.""" 26 | registered_notifiers: List[str] = [item for item in config.sections() if 'NOTIFIER.' in item] 27 | 28 | for register in registered_notifiers: 29 | if 'DISCORD' in register: 30 | 31 | notifier: DiscordNotifier = DiscordNotifier() 32 | notifier.configure(url=config[register]['webhook'], config=config[register]) 33 | 34 | self.notifiers.update({ 35 | register: {'instance': notifier}, 36 | }) 37 | 38 | if 'ELASTIC_SEARCH' in register: 39 | notifier_es: ElasticSearchNotifier = ElasticSearchNotifier() 40 | notifier_es.configure(config=config[register]) 41 | 42 | self.notifiers.update({ 43 | register: {'instance': notifier_es}, 44 | }) 45 | 46 | def configure(self, config: ConfigParser) -> None: 47 | """Configure Finder.""" 48 | self.__load_notifiers(config) 49 | 50 | async def run(self, notifiers: List[str], entity: Union[FinderNotificationMessageEntity, SignalNotificationEntityModel], rule_id: str, source: str) -> None: 51 | """Dispatch all Notifications. 52 | 53 | :param notifiers: 54 | :param message: Message Object 55 | :param rule_id: Triggered Rule ID 56 | :param source: Source Account/Phone Number 57 | :return: 58 | """ 59 | if len(notifiers) == 0: 60 | return 61 | 62 | for dispatcher_name in notifiers: 63 | 64 | target_notifier: BaseNotifier = self.notifiers[dispatcher_name]['instance'] 65 | 66 | try: 67 | await target_notifier.run(entity=entity, rule_id=rule_id, source=source) 68 | 69 | except Exception: # Yes, Catch All 70 | logging.exception('Unable to Send Notification') 71 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Other 3 | data/ 4 | tests/data/ 5 | tests/_data/ 6 | reports/ 7 | .idea 8 | .tox 9 | session.session 10 | session.session-journal 11 | assets/chrome_driver.zip 12 | assets/chromedriver.exe 13 | assets/chromedriver.so 14 | assets/chromedriver 15 | poetry.lock 16 | compiled_docs/ 17 | 18 | # Byte-compiled / optimized / DLL files 19 | __pycache__/ 20 | *.py[cod] 21 | *$py.class 22 | 23 | # C extensions 24 | *.so 25 | 26 | # Distribution / packaging 27 | .Python 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | wheels/ 40 | pip-wheel-metadata/ 41 | share/python-wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | MANIFEST 46 | 47 | # PyInstaller 48 | # Usually these files are written by a python script from a template 49 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 50 | *.manifest 51 | *.spec 52 | 53 | # Installer logs 54 | pip-log.txt 55 | pip-delete-this-directory.txt 56 | 57 | # Unit test / coverage reports 58 | htmlcov/ 59 | .tox/ 60 | .nox/ 61 | .coverage 62 | .coverage.* 63 | .cache 64 | nosetests.xml 65 | coverage.xml 66 | *.cover 67 | *.py,cover 68 | .hypothesis/ 69 | .pytest_cache/ 70 | 71 | # Translations 72 | *.mo 73 | *.pot 74 | 75 | # Django stuff: 76 | *.log 77 | local_settings.py 78 | db.sqlite3 79 | db.sqlite3-journal 80 | 81 | # Flask stuff: 82 | instance/ 83 | .webassets-cache 84 | 85 | # Scrapy stuff: 86 | .scrapy 87 | 88 | # Sphinx documentation 89 | docs/_build/ 90 | 91 | # PyBuilder 92 | target/ 93 | 94 | # Jupyter Notebook 95 | .ipynb_checkpoints 96 | 97 | # IPython 98 | profile_default/ 99 | ipython_config.py 100 | 101 | # pyenv 102 | .python-version 103 | 104 | # pipenv 105 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 106 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 107 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 108 | # install all needed dependencies. 109 | #Pipfile.lock 110 | 111 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 112 | __pypackages__/ 113 | 114 | # Celery stuff 115 | celerybeat-schedule 116 | celerybeat.pid 117 | 118 | # SageMath parsed files 119 | *.sage.py 120 | 121 | # Environments 122 | .env 123 | .venv 124 | env/ 125 | venv/ 126 | ENV/ 127 | env.bak/ 128 | venv.bak/ 129 | 130 | # Spyder project settings 131 | .spyderproject 132 | .spyproject 133 | 134 | # Rope project settings 135 | .ropeproject 136 | 137 | # mkdocs documentation 138 | /site 139 | 140 | # mypy 141 | .mypy_cache/ 142 | .dmypy.json 143 | dmypy.json 144 | 145 | # Pyre type checker 146 | .pyre/ 147 | /tests/_report/ 148 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist=py38,quality,coverage,test,build,deploy,docs 3 | skipsdist=True 4 | 5 | 6 | [testenv:quality] 7 | allowlist_externals = poetry 8 | mypy 9 | ruff 10 | changedir = . 11 | deps = 12 | -rrequirements.txt 13 | 14 | commands = 15 | poetry lock --no-update 16 | poetry install -v --sync 17 | 18 | ruff check ./TEx 19 | 20 | mypy --config-file mypy.ini 21 | 22 | [testenv:coverage] 23 | allowlist_externals = poetry 24 | changedir = tests 25 | deps = 26 | -rrequirements.txt 27 | 28 | commands = 29 | poetry lock --no-update 30 | poetry install -v --sync 31 | poetry run coverage erase 32 | poetry run coverage run --source='../TEx' -m pytest . {posargs} --color=yes 33 | poetry run coverage report --rcfile=../coverage.rc 34 | poetry run coverage html --rcfile=../coverage.rc --fail-under=85 35 | 36 | 37 | [testenv] 38 | allowlist_externals = poetry 39 | changedir = tests 40 | deps = 41 | -rrequirements.txt 42 | 43 | commands = 44 | poetry lock --no-update 45 | poetry install -v --sync 46 | poetry run pytest . {posargs} --verbose --color=yes 47 | 48 | 49 | [testenv:build] 50 | allowlist_externals = cp 51 | rm 52 | skip_install = True 53 | changedir = . 54 | deps = 55 | -rrequirements.txt 56 | 57 | commands = 58 | cp README.md TEx 59 | cp pyproject.toml TEx 60 | 61 | poetry lock --no-update 62 | poetry install --without dev -v --sync 63 | poetry build -v 64 | 65 | rm TEx/README.md 66 | rm TEx/pyproject.toml 67 | 68 | 69 | [testenv:deploy] 70 | allowlist_externals = cp 71 | rm 72 | skip_install = True 73 | changedir = . 74 | 75 | deps = 76 | poetry==1.5.1 77 | 78 | passenv = 79 | PYPI_DEPLOY_TOKEN 80 | 81 | commands = 82 | cp README.md TEx 83 | cp pyproject.toml TEx 84 | 85 | poetry lock --no-update 86 | poetry install --without dev -v --sync 87 | poetry config pypi-token.pypi {env:PYPI_DEPLOY_TOKEN} 88 | poetry publish --build 89 | 90 | rm TEx/README.md 91 | rm TEx/pyproject.toml 92 | 93 | [testenv:docs] 94 | allowlist_externals = mkdocs 95 | 96 | skip_install = True 97 | changedir = . 98 | 99 | deps = 100 | poetry==1.5.1 101 | 102 | commands = 103 | poetry lock --no-update 104 | poetry install -v --sync 105 | mkdocs build --clean --site-dir compiled_docs -v 106 | 107 | [flake8] 108 | ignore=E501,D202,D401,D902,I100,I201,I202 109 | exclude=coverage,codequality,.git,__pycache__,build,dist,venv,.tox,data,assets,htmlcov,.idea,tests 110 | 111 | max-complexity=15 112 | verbose=2 113 | count=True 114 | hang_closing=True 115 | hang-closing=True 116 | show_source=True 117 | show-source=True 118 | statistics=True 119 | jobs=6 120 | -------------------------------------------------------------------------------- /TEx/report_templates/default_index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 23 | 24 | 25 | 26 |
27 |
28 |

TEx - Telegram Explorer ({{target_phone}})

29 |
30 |
31 | Report generated at {{now}} 32 |
From {{start}} To {{end}} 33 |
Groups: {{groups_filter}} 34 |
Filtering: {{words_filter}} 35 |
36 |
37 | 38 | 39 | 40 | 41 | 42 | {% for group in groups %} 43 | 44 | 49 | 54 | 57 | 58 | {% endfor %} 59 |
GroupsN. Messages
45 | 46 | 47 | 48 | 50 | 51 | {{group.title}} - {{group.group_username}} ({{group.id}}) 52 | 53 | 55 | {{group.meta_message_count}} 56 |
60 |
61 |
62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /tests/report_templates/default_index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 23 | 24 | 25 | 26 |
27 |
28 |

TEx - Telegram Explorer ({{target_phone}})

29 |
30 |
31 | Report generated at {{now}} 32 |
From {{start}} To {{end}} 33 |
Groups: {{groups_filter}} 34 |
Filtering: {{words_filter}} 35 |
36 |
37 | 38 | 39 | 40 | 41 | 42 | {% for group in groups %} 43 | 44 | 49 | 54 | 57 | 58 | {% endfor %} 59 |
GroupsN. Messages
45 | 46 | 47 | 48 | 50 | 51 | {{group.title}} - {{group.group_username}} ({{group.id}}) 52 | 53 | 55 | {{group.meta_message_count}} 56 |
60 |
61 |
62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /TEx/core/temp_file.py: -------------------------------------------------------------------------------- 1 | """Temp File Handle.""" 2 | 3 | from datetime import datetime 4 | from typing import cast 5 | 6 | import pytz 7 | 8 | from TEx.database.db_manager import DbManager 9 | from TEx.models.database.temp_db_models import TempDataOrmEntity 10 | 11 | 12 | class TempFileHandler: 13 | """Temporary File Hander.""" 14 | 15 | @staticmethod 16 | def file_exist(path: str) -> bool: 17 | """Return if a File Exists. 18 | 19 | :param path: File Path 20 | :return: 21 | """ 22 | return bool(DbManager.SESSIONS['temp'].query(TempDataOrmEntity).filter_by(path=path).count() > 0) 23 | 24 | @staticmethod 25 | def read_file_text(path: str) -> str: 26 | """Read All File Content. 27 | 28 | :param path: File Path 29 | :return: File Content 30 | """ 31 | entity: TempDataOrmEntity = cast(TempDataOrmEntity, DbManager.SESSIONS['temp'].query(TempDataOrmEntity).filter_by(path=path).first()) 32 | return str(entity.data) 33 | 34 | @staticmethod 35 | def remove_expired_entries() -> int: 36 | """Remove all Expired Entries.""" 37 | total: int = DbManager.SESSIONS['temp'].execute( 38 | TempDataOrmEntity.__table__.delete().where( # type: ignore 39 | TempDataOrmEntity.valid_at <= int(datetime.now(tz=pytz.UTC).timestamp()), 40 | ), 41 | ).rowcount 42 | 43 | DbManager.SESSIONS['temp'].flush() 44 | DbManager.SESSIONS['temp'].commit() 45 | return total 46 | 47 | @staticmethod 48 | def purge() -> int: 49 | """Remove all Entries.""" 50 | total: int = DbManager.SESSIONS['temp'].execute(TempDataOrmEntity.__table__.delete()).rowcount # type: ignore 51 | DbManager.SESSIONS['temp'].flush() 52 | DbManager.SESSIONS['temp'].commit() 53 | return total 54 | 55 | @staticmethod 56 | def write_file_text(path: str, content: str, validate_seconds: int = 3600) -> None: 57 | """ 58 | Write Text Content into File. 59 | 60 | :param path: File Path 61 | :param content: File Content 62 | :param validate_seconds: File Validation in Seconds 63 | :return: None 64 | """ 65 | # Delete if Exists 66 | DbManager.SESSIONS['temp'].execute( 67 | TempDataOrmEntity.__table__.delete().where(TempDataOrmEntity.path == path), # type: ignore 68 | ) 69 | 70 | entity: TempDataOrmEntity = TempDataOrmEntity( 71 | path=path, 72 | data=content, 73 | created_at=int(datetime.now(tz=pytz.UTC).timestamp()), 74 | valid_at=int(datetime.now(tz=pytz.UTC).timestamp()) + validate_seconds, 75 | ) 76 | DbManager.SESSIONS['temp'].add(entity) 77 | 78 | # Execute 79 | DbManager.SESSIONS['temp'].flush() 80 | DbManager.SESSIONS['temp'].commit() 81 | -------------------------------------------------------------------------------- /TEx/database/db_migration.py: -------------------------------------------------------------------------------- 1 | """DB Migrator.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | 6 | import sqlalchemy 7 | from sqlalchemy import Index, MetaData, Table 8 | 9 | from TEx.database.db_manager import DbManager 10 | from TEx.models.database.telegram_db_model import TelegramMediaOrmEntity, TelegramMessageOrmEntity 11 | 12 | logger = logging.getLogger('TelegramExplorer') 13 | 14 | 15 | class DatabaseMigrator: 16 | """Global Telegram DB Declarative Base.""" 17 | 18 | @staticmethod 19 | def apply_migrations() -> None: 20 | """Apply all Migrations.""" 21 | # Check Data Copy Migration to Shards 22 | for db_name in ['data']: 23 | DatabaseMigrator.__apply_migration_for_bind(db_name=db_name) 24 | 25 | @staticmethod 26 | def __apply_migration_for_bind(db_name: str) -> None: 27 | """Apply Migrations.""" 28 | meta: MetaData = sqlalchemy.MetaData() 29 | meta.reflect(bind=DbManager.SQLALCHEMY_BINDS[db_name]) 30 | 31 | # ix_telegram_message_group_id_date - V0.3.0 32 | DatabaseMigrator.__create_index( 33 | metadata=meta, 34 | table_name='telegram_message', 35 | index_name='ix_telegram_message_group_id_date', 36 | version='V0.3.0', 37 | field_spec=(TelegramMessageOrmEntity.group_id, TelegramMessageOrmEntity.date_time.desc()), 38 | db_name=db_name, 39 | ) 40 | 41 | # ix_telegram_media_group_id_date - V0.3.0 42 | DatabaseMigrator.__create_index( 43 | metadata=meta, 44 | table_name='telegram_media', 45 | index_name='ix_telegram_media_group_id_date', 46 | version='V0.3.0', 47 | field_spec=(TelegramMediaOrmEntity.group_id, TelegramMediaOrmEntity.date_time.desc()), 48 | db_name=db_name, 49 | ) 50 | 51 | @staticmethod 52 | def __create_index(metadata: MetaData, table_name: str, index_name: str, version: str, field_spec: tuple, 53 | db_name: str) -> None: 54 | 55 | # Messages Table 56 | table: Table = metadata.tables[table_name] 57 | 58 | # ix_telegram_message_group_id_date - V0.3.0 59 | index_exists: bool = DatabaseMigrator.__check_index_exists( 60 | table=table, 61 | index_name=index_name, 62 | ) 63 | 64 | if not index_exists: 65 | logger.info(f'\t[*] APPLYING DB ({db_name}) MIGRATION ({version}) - {index_name}') 66 | 67 | new_index: Index = sqlalchemy.Index( 68 | index_name, 69 | *field_spec, 70 | ) 71 | new_index.create(bind=DbManager.SQLALCHEMY_BINDS[db_name]) 72 | 73 | @staticmethod 74 | def __check_index_exists(table: Table, index_name: str) -> bool: 75 | """Check if Index Exists on Table.""" 76 | return len([item for item in table.indexes if item.name == index_name]) == 1 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Welcome to Telegram Explorer 2 | 3 | [![](https://img.shields.io/github/last-commit/guibacellar/TEx)](https://github.com/guibacellar/TEx/tree/main) 4 | [![](https://img.shields.io/github/languages/code-size/guibacellar/TEx)](https://github.com/guibacellar/TEx/tree/main) 5 | [![](https://img.shields.io/badge/Python-3.8+-green.svg)](https://www.python.org/downloads/) 6 | [![](https://github.com/guibacellar/TEx/actions/workflows/cy.yml/badge.svg?branch=main)](https://github.com/guibacellar/TEx/actions/workflows/cy.yml) 7 | [![](https://telegramexplorer.readthedocs.io/en/latest/?badge=latest)](https://telegramexplorer.readthedocs.io/en/latest/) 8 | [![](https://img.shields.io/badge/maintainer-Th3%200bservator-blue)](https://theobservator.net/) 9 | ![](https://img.shields.io/github/v/release/guibacellar/TeX) 10 | 11 | 12 | ## About The Project 13 | 14 | TEx is a Telegram Explorer tool created to help Researchers, Investigators and Law Enforcement Agents to Collect and Process the Huge Amount of Data Generated from Criminal, Fraud, Security and Others Telegram Groups. 15 | 16 | > ⚠️ **BETA VERSION** ⚠️ 17 | >
Please note that V0.3.0 are the latest beta version for this project, so it is possible that you may encounter bugs that have not yet been mapped out. 18 | >
I kindly ask you to report the bugs at: [https://github.com/guibacellar/TEx/issues](https://github.com/guibacellar/TEx/issues) 19 | 20 | 21 | ## Requirements 22 | - Python 3.8.1+ (⚠️ Deprecated. Consider using version 3.10+ ⚠️) 23 | - Windows x64 or Linux x64 24 | 25 | 26 | ## Features 27 | - Connection Manager (Handle Telegram Connection) 28 | - Group Information Scrapper 29 | - List Groups (Scrap info for all groups, including members, members info and profile pic) 30 | - Automatic Group Information Sync 31 | - Automatic Users Information Sync 32 | - Messages Listener (Listen all Incoming Messages) 33 | - Messages Scrapper (Scrap all Group Messages, since the first one) 34 | - Download Media (Including fine media settings like size, groups and/or media type) 35 | - HTML Report Generation 36 | - Export Downloaded Files 37 | - Export Messages 38 | - Message Finder System (Allow to Find, using terms or RegEx) patterns on messages 39 | - Message Notification System (Send alert's, finds, or all messages to Discord) 40 | - Elastic Search 8+ Native Integration 41 | - Image OCR using Tesseract 42 | - Signals for Helping Monitoring 43 | 44 | 45 | 46 | ## Installing 47 | Telegram Explorer is available through *pip*, so, just use pip install in order to fully install TeX. 48 | 49 | ```bash 50 | pip install TelegramExplorer 51 | ``` 52 | 53 | 54 | ## Upgrading 55 | To upgrade TeX to the latest version, just use *pip install upgrade* command. 56 | 57 | ```bash 58 | pip install --upgrade TelegramExplorer 59 | ``` 60 | 61 | ## Documentation 62 | [https://telegramexplorer.readthedocs.io/en/latest/](https://telegramexplorer.readthedocs.io/en/latest/) 63 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Telegram Explorer 2 | repo_url: https://github.com/guibacellar/TEx/ 3 | copyright: Copyright © 2023 - Th3 0bservator 4 | 5 | theme: 6 | name: material 7 | highlightjs: true 8 | features: 9 | - navigation.footer 10 | 11 | markdown_extensions: 12 | - admonition 13 | - pymdownx.details 14 | - pymdownx.superfences 15 | - footnotes 16 | 17 | 18 | nav: 19 | - Home: 'index.md' 20 | - 'Authentication': 'authentication.md' 21 | - 'Contact': 'contact.md' 22 | - 'Secret Chats': 'secret_chats.md' 23 | - 'Configuration': 24 | - 'Basic Configuration': 'configuration/basic.md' 25 | - 'Proxy': 'configuration/proxy.md' 26 | - 'Media Download': 27 | - 'Configuration': 'configuration/media_download_configuration.md' 28 | - 'Examples': 'configuration/media_download_examples.md' 29 | - 'Content-Types': 'configuration/media_download_content_types.md' 30 | - 'OCR': 'configuration/ocr.md' 31 | - 'Examples': 32 | - 'Scenario-Based Examples': 'configuration/scenario_based_examples.md' 33 | - 'Complete Configuration File Example': 'configuration/complete_configuration_file_example.md' 34 | - 'How to Use': 35 | - 'Basic Usage': 'how_use/how_to_use_basic.md' 36 | - 'Connecting to Telegram Servers': 'how_use/usage_connection.md' 37 | - 'Download/Update Groups': 'how_use/usage_load_groups.md' 38 | - 'List Groups': 'how_use/usage_list_groups.md' 39 | - 'Listen Messages': 'how_use/usage_message_listener.md' 40 | - 'Download Messages': 'how_use/usage_download_messages.md' 41 | - 'Message Finder System': 42 | - 'Configuration': 'finder/configuration.md' 43 | - 'Catch All': 'finder/finder_catchall.md' 44 | - 'RegEx Finder': 'finder/finder_regex.md' 45 | - 'Notification System': 46 | - 'Discord Notification Hook': 'notification/notification_discord.md' 47 | - 'Elastic Search Connector': 48 | - 'Configuration': 'notification/notification_elasticsearch.md' 49 | - 'Index Template': 'notification/notification_elasticsearch_index_template.md' 50 | - 'Signals Template': 'notification/notification_elasticsearch_signals_template.md' 51 | - 'Signals': 'notification/signals.md' 52 | - 'Message Exporter System': 53 | - 'Pandas Rolling Exporter': 'exporting/pandas_rolling.md' 54 | - 'Reports': 55 | - 'Export Files': 'report/report_export_files.md' 56 | - 'HTML Report': 'report/report_html.md' 57 | - 'Status Report': 'report/report_status.md' 58 | - 'Text Report': 'report/report_text.md' 59 | - 'Maintenance': 60 | - 'Purging Old Data': 'maintenance/purge_old_data.md' 61 | - 'Changelog': 62 | - 'V0.3.0': 'changelog/v030.md' 63 | 64 | site_author: Th3 0bservator 65 | 66 | extra: 67 | social: 68 | - icon: fontawesome/brands/twitter 69 | link: https://twitter.com/th3_0bservator 70 | - icon: fontawesome/brands/github 71 | link: https://github.com/guibacellar/ 72 | - icon: fontawesome/brands/linkedin 73 | link: https://www.linkedin.com/in/guilherme-bacellar/ -------------------------------------------------------------------------------- /TEx/modules/telegram_report_generator/telegram_report_sent_telegram.py: -------------------------------------------------------------------------------- 1 | """Telegram Report Generator.""" 2 | from __future__ import annotations 3 | 4 | import asyncio 5 | import datetime 6 | import logging 7 | import os 8 | import zipfile 9 | from configparser import ConfigParser 10 | from os.path import basename 11 | from typing import Dict, cast 12 | 13 | import pytz 14 | from telethon import TelegramClient 15 | 16 | from TEx.core.base_module import BaseModule 17 | 18 | logger = logging.getLogger('TelegramExplorer') 19 | 20 | 21 | class TelegramReportSentViaTelegram(BaseModule): 22 | """Sent the Report to a Telegram user.""" 23 | 24 | __USERS_RESOLUTION_CACHE: Dict = {} 25 | 26 | async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool: 27 | """ 28 | Abstract Method for Module Activation Function.. 29 | 30 | :return: 31 | """ 32 | return cast(bool, args['sent_report_telegram']) 33 | 34 | async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None: 35 | """Execute Module.""" 36 | if not await self.can_activate(config, args, data): 37 | logger.debug('\t\tModule is Not Enabled...') 38 | return 39 | 40 | # Check Report and Assets Folder 41 | report_root_folder: str = args['report_folder'] 42 | 43 | # Create Report File Name 44 | attach_name: str = args['attachment_name'].replace('@@now@@', datetime.datetime.strftime(datetime.datetime.now(tz=pytz.UTC), '%y%m%d_%H%M%S')) + '.zip' 45 | report_filename: str = os.path.join(report_root_folder, attach_name) 46 | logger.info(f'\t\t\tTarget Report Filename: {report_filename}') 47 | 48 | # Create a Zip File 49 | logger.info('\t\t\tGenerating Report ZIP File') 50 | with zipfile.ZipFile(report_filename, 'w', compresslevel=9, compression=zipfile.ZIP_DEFLATED) as zip_obj: 51 | # Iterate over all the files in directory 52 | for folder_name, _subfolders, filenames in os.walk(report_root_folder): 53 | for filename in filenames: 54 | file_path = os.path.join(folder_name, filename) 55 | 56 | if file_path == report_filename: 57 | continue 58 | 59 | zip_obj.write(file_path, os.path.join(basename(folder_name), filename)) 60 | 61 | # Sent via Telegram 62 | client: TelegramClient = data['telegram_client'] 63 | receiver = await client.get_input_entity(args['destination_username']) 64 | 65 | # Sent Message 66 | logger.info('\t\t\tSending Message') 67 | await client.send_message( 68 | receiver, 69 | args['title'].replace( 70 | '@@now@@', 71 | datetime.datetime.strftime(datetime.datetime.now(tz=pytz.UTC), '%y-%m-%d %H:%M:%S'), 72 | ).replace('\\n', '\n'), 73 | ) 74 | await asyncio.sleep(1) 75 | # Sent the Report 76 | await client.send_file(receiver, f'{report_root_folder}/{attach_name}') 77 | 78 | # Remove Report File 79 | os.remove(report_filename) 80 | -------------------------------------------------------------------------------- /tests/unittest_configfile.config: -------------------------------------------------------------------------------- 1 | [CONFIGURATION] 2 | api_id=12345678 3 | api_hash=deff1f2587358746548deadbeef58ddd 4 | phone_number=5526986587745 5 | data_path=_data 6 | device_model=UT_DEVICE_01 7 | timeout=20 8 | 9 | [OCR] 10 | enabled=true 11 | type=tesseract 12 | 13 | [OCR.TESSERACT] 14 | tesseract_cmd=/path/to/folder 15 | language=eng 16 | 17 | [PROXY] 18 | type=HTTP 19 | address=1.2.3.4 20 | port=4444 21 | username=ut_username 22 | password=ut_password 23 | rdns=true 24 | 25 | [MEDIA.DOWNLOAD] 26 | default=ALLOW 27 | max_download_size_bytes=256000000 28 | 29 | [MEDIA.DOWNLOAD.application/json] 30 | enabled=ALLOW 31 | max_download_size_bytes=256000000 32 | groups=* 33 | 34 | [MEDIA.DOWNLOAD.image/jpeg] 35 | enabled=ALLOW 36 | max_download_size_bytes=25600000 37 | groups=* 38 | 39 | [MEDIA.DOWNLOAD.text/plain] 40 | enabled=ALLOW 41 | max_download_size_bytes=256000000 42 | groups=5586,12099,1 43 | 44 | [FINDER] 45 | enabled=true 46 | find_in_text_files_enabled=true 47 | find_in_text_files_max_size_bytes=20000000 48 | 49 | [FINDER.RULE.UT_Finder_Demo] 50 | type=regex 51 | regex=term1|term2|term3 52 | notifier=NOTIFIER.DISCORD.NOT_002 53 | exporter=EXPORTER.ROLLING_PANDAS.TEST_EXPORTER_001 54 | 55 | [FINDER.RULE.UT_Finder_Demo_MultiLine] 56 | type=regex 57 | regex=term1 58 | term2 59 | term3 60 | notifier=NOTIFIER.DISCORD.NOT_002 61 | 62 | [FINDER.RULE.UT_Finder_Demo_MultiLine_WithLineBreak] 63 | type=regex 64 | regex= 65 | term1 66 | term2 67 | term3 68 | notifier=NOTIFIER.DISCORD.NOT_002 69 | 70 | [FINDER.RULE.UT_Finder_Demo_MultiLine_UrlAndCreditCard_WithLineBreak] 71 | type=regex 72 | regex= 73 | /^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%%_\+.~#?&\/=]*)$/ 74 | (^4[0-9]{12}(?:[0-9]{3})?$)|(^(?:5[1-5][0-9]{2}|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}$)|(3[47][0-9]{13})|(^3(?:0[0-5]|[68][0-9])[0-9]{11}$)|(^6(?:011|5[0-9]{2})[0-9]{12}$)|(^(?:2131|1800|35\d{3})\d{11}$) 75 | notifier=NOTIFIER.DISCORD.NOT_002 76 | 77 | [NOTIFIER.DISCORD.NOT_001] 78 | webhook=https://uri.domain.com/webhook/001 79 | prevent_duplication_for_minutes=240 80 | media_attachments_enabled=true 81 | media_attachments_max_size_bytes=10000000 82 | 83 | [NOTIFIER.DISCORD.NOT_002] 84 | webhook=https://uri.domain.com/webhook/002 85 | prevent_duplication_for_minutes=240 86 | media_attachments_enabled=true 87 | media_attachments_max_size_bytes=10000000 88 | 89 | [NOTIFIER.ELASTIC_SEARCH.UT_01] 90 | address=https://localhost:666 91 | api_key=test_api_key 92 | verify_ssl_cert=False 93 | index_name=test_index_name 94 | pipeline_name=test_pipeline_name 95 | 96 | [EXPORTER.ROLLING_PANDAS.TEST_EXPORTER_001] 97 | file_root_path=_data/export 98 | rolling_every_minutes=1 99 | fields=date_time,raw_text,group_name,group_id,from_id,to_id,reply_to_msg_id,message_id,is_reply,found_on 100 | use_header=true 101 | output_format=csv 102 | keep_last_files=30 103 | 104 | [SIGNALS] 105 | enabled=true 106 | keep_alive_interval=2 107 | 108 | keep_alive_notifer=NOTIFIER.DISCORD.NOT_001 109 | initialization_notifer=NOTIFIER.ELASTIC_SEARCH.UT_01 110 | shutdown_notifer=NOTIFIER.DISCORD.NOT_001,NOTIFIER.ELASTIC_SEARCH.UT_01 111 | new_group_notifer=NOTIFIER.ELASTIC_SEARCH.UT_01,NOTIFIER.DISCORD.NOT_001 -------------------------------------------------------------------------------- /TEx/core/mapper/telethon_message_mapper.py: -------------------------------------------------------------------------------- 1 | """Telethon Event Entity Mapper.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | from typing import Optional, Union 6 | 7 | from pydantic import BaseModel 8 | from telethon.errors import ChannelPrivateError 9 | from telethon.tl.patched import Message 10 | from telethon.tl.types import Channel, Chat, PeerUser, User 11 | 12 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity 13 | from TEx.models.facade.media_handler_facade_entity import MediaHandlingEntity 14 | 15 | logger = logging.getLogger('TelegramExplorer') 16 | 17 | 18 | class TelethonMessageEntityMapper: 19 | """Telethon Event Entity Mapper.""" 20 | 21 | class ChatPropsModel(BaseModel): 22 | """Model for __map_chat_props method.""" 23 | 24 | chat_id: int 25 | chat_title: str 26 | 27 | @staticmethod 28 | async def to_finder_notification_facade_entity(message: Message, downloaded_media_info: Optional[MediaHandlingEntity], ocr_content: Optional[str]) -> \ 29 | Optional[FinderNotificationMessageEntity]: 30 | """Map Telethon Event to FinderNotificationMessageEntity.""" 31 | if not message: 32 | return None 33 | 34 | try: 35 | mapped_chat_props: TelethonMessageEntityMapper.ChatPropsModel = TelethonMessageEntityMapper.__map_chat_props( 36 | entity=await message.get_chat(), 37 | ) 38 | except ChannelPrivateError as _ex: 39 | return None 40 | 41 | raw_text: str = message.raw_text 42 | if ocr_content: 43 | if raw_text and raw_text != '': 44 | raw_text += '\n\n' 45 | 46 | raw_text += ocr_content 47 | 48 | h_result: FinderNotificationMessageEntity = FinderNotificationMessageEntity( 49 | date_time=message.date, 50 | raw_text=raw_text, 51 | group_name=mapped_chat_props.chat_title, 52 | group_id=mapped_chat_props.chat_id, 53 | from_id=message.from_id.user_id if isinstance(message.from_id, PeerUser) else None, 54 | to_id=message.to_id.channel_id if message.to_id is not None and hasattr(message.to_id, 'channel_id') else None, 55 | reply_to_msg_id=message.reply_to.reply_to_msg_id if message.is_reply and message.reply_to else None, 56 | message_id=message.id, 57 | is_reply=message.is_reply, 58 | downloaded_media_info=downloaded_media_info, 59 | found_on='UNDEFINED', 60 | ) 61 | 62 | return h_result 63 | 64 | @staticmethod 65 | def __map_chat_props(entity: Union[Channel, User, Chat]) -> TelethonMessageEntityMapper.ChatPropsModel: 66 | """Map Chat Specific Props.""" 67 | if isinstance(entity, (Channel, Chat)): 68 | return TelethonMessageEntityMapper.ChatPropsModel( 69 | chat_id=entity.id, 70 | chat_title=entity.title if entity.title else '', 71 | ) 72 | 73 | if isinstance(entity, User): 74 | return TelethonMessageEntityMapper.ChatPropsModel( 75 | chat_id=entity.id, 76 | chat_title=entity.username if entity.username else (entity.phone if entity.phone else ''), 77 | ) 78 | 79 | raise AttributeError(entity, 'Invalid entity type: ' + str(type(entity))) 80 | -------------------------------------------------------------------------------- /tests/notifier/test_notifier_engine.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import unittest 3 | from configparser import ConfigParser 4 | from datetime import datetime 5 | from typing import Dict 6 | from unittest import mock 7 | from unittest.mock import call 8 | 9 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity 10 | from TEx.notifier.notifier_engine import NotifierEngine 11 | from tests.modules.common import TestsCommon 12 | from tests.modules.mockups_groups_mockup_data import base_messages_mockup_data 13 | 14 | 15 | class NotifierEngineTest(unittest.TestCase): 16 | 17 | def setUp(self) -> None: 18 | self.config = ConfigParser() 19 | self.config.read('../../config.ini') 20 | 21 | def test_run(self): 22 | """Test Run Method with Telegram Server Connection.""" 23 | 24 | # Setup Mock 25 | discord_notifier_mockup = mock.AsyncMock() 26 | discord_notifier_mockup.run = mock.AsyncMock() 27 | 28 | elastic_notifier_mockup = mock.AsyncMock() 29 | elastic_notifier_mockup.run = mock.AsyncMock() 30 | 31 | target: NotifierEngine = NotifierEngine() 32 | args: Dict = { 33 | 'export_text': True, 34 | 'config': 'unittest_configfile.config', 35 | 'report_folder': '_report', 36 | 'group_id': '2', 37 | 'order_desc': True, 38 | 'filter': 'Message', 39 | 'limit_days': 30, 40 | 'regex': '(.*http://.*),(.*https://.*)' 41 | } 42 | data: Dict = {} 43 | TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data) 44 | 45 | # Set Message 46 | message_entity: FinderNotificationMessageEntity = FinderNotificationMessageEntity( 47 | date_time=datetime(2023, 10, 1, 9, 58, 22), 48 | raw_text="Mocked Raw Text", 49 | group_name="Channel 1972142108", 50 | group_id=1972142108, 51 | from_id="1234", 52 | to_id=9876, 53 | reply_to_msg_id=5544, 54 | message_id=55, 55 | is_reply=False, 56 | downloaded_media_info=None, 57 | found_on='UT FOUND 7' 58 | ) 59 | 60 | with mock.patch('TEx.notifier.notifier_engine.DiscordNotifier', return_value=discord_notifier_mockup): 61 | with mock.patch('TEx.notifier.notifier_engine.ElasticSearchNotifier', return_value=elastic_notifier_mockup): 62 | target.configure(config=self.config) 63 | loop = asyncio.get_event_loop() 64 | loop.run_until_complete( 65 | target.run( 66 | notifiers=['NOTIFIER.DISCORD.NOT_001', 'NOTIFIER.DISCORD.NOT_002', 'NOTIFIER.ELASTIC_SEARCH.UT_01'], 67 | entity=message_entity, 68 | rule_id='RULE_UT_01', 69 | source='+15558987453' 70 | ) 71 | ) 72 | 73 | discord_notifier_mockup.run.assert_has_awaits([ 74 | call(entity=message_entity, rule_id='RULE_UT_01', source='+15558987453'), 75 | call(entity=message_entity, rule_id='RULE_UT_01', source='+15558987453') 76 | ]) 77 | 78 | elastic_notifier_mockup.run.assert_has_awaits([ 79 | call(entity=message_entity, rule_id='RULE_UT_01', source='+15558987453') 80 | ]) 81 | -------------------------------------------------------------------------------- /TEx/core/mapper/telethon_channel_mapper.py: -------------------------------------------------------------------------------- 1 | """Telethon Channel Entity Mapper.""" 2 | from __future__ import annotations 3 | 4 | from typing import Dict, Union 5 | 6 | from telethon.tl.types import Channel, Chat, User 7 | 8 | 9 | class TelethonChannelEntityMapper: 10 | """Telethon Channel Entity Mapper.""" 11 | 12 | @staticmethod 13 | def to_database_dict(entity: Union[Chat, Channel, User], target_phone_numer: str) -> Dict: 14 | """Map Telethon Entity to TEx Dict to Insert into DB.""" 15 | # Build Model 16 | 17 | # Common Props 18 | values: Dict = { 19 | 'id': entity.id, 20 | 'constructor_id': entity.CONSTRUCTOR_ID, 21 | 'source': target_phone_numer, 22 | } 23 | 24 | # Apply Specific Mappers 25 | if isinstance(entity, Channel): 26 | values.update(TelethonChannelEntityMapper.__map_channel(entity)) 27 | 28 | elif isinstance(entity, Chat): 29 | values.update(TelethonChannelEntityMapper.__map_chat(entity)) 30 | 31 | elif isinstance(entity, User): 32 | values.update(TelethonChannelEntityMapper.__map_user(entity)) 33 | 34 | return values 35 | 36 | @staticmethod 37 | def __map_channel(entity: Channel) -> Dict: 38 | """Map Telethon Channel to TEx Dict to Insert into DB.""" 39 | return { 40 | 'gigagroup': entity.gigagroup if entity.gigagroup else False, 41 | 'has_geo': entity.has_geo if entity.has_geo else False, 42 | 'participants_count': entity.participants_count if entity.participants_count else 0, 43 | 'title': entity.title if entity.title else '', 44 | 'access_hash': str(entity.access_hash), 45 | 'fake': entity.fake if entity.fake else False, 46 | 'restricted': entity.restricted if entity.restricted else False, 47 | 'scam': entity.scam if entity.scam else False, 48 | 'group_username': entity.username if entity.username else '', 49 | 'verified': entity.verified if entity.verified else False, 50 | } 51 | 52 | @staticmethod 53 | def __map_chat(entity: Chat) -> Dict: 54 | """Map Telethon Chat to TEx Dict to Insert into DB.""" 55 | return { 56 | 'gigagroup': False, 57 | 'has_geo': False, 58 | 'participants_count': entity.participants_count if entity.participants_count else 0, 59 | 'title': entity.title if entity.title else '', 60 | 'access_hash': '', 61 | 'fake': False, 62 | 'restricted': False, 63 | 'scam': False, 64 | 'group_username': '', 65 | 'verified': False, 66 | } 67 | 68 | @staticmethod 69 | def __map_user(entity: User) -> Dict: 70 | """Map Telethon User to TEx Dict to Insert into DB.""" 71 | return { 72 | 'gigagroup': False, 73 | 'has_geo': False, 74 | 'participants_count': 0, 75 | 'title': entity.username if entity.username else (entity.phone if entity.phone else ''), 76 | 'access_hash': str(entity.access_hash), 77 | 'fake': entity.fake if entity.fake else False, 78 | 'restricted': entity.restricted if entity.restricted else False, 79 | 'scam': entity.scam if entity.scam else False, 80 | 'group_username': entity.username if entity.username else '', 81 | 'verified': entity.verified if entity.verified else False, 82 | } 83 | -------------------------------------------------------------------------------- /TEx/modules/telegram_maintenance/telegram_purge_old_data.py: -------------------------------------------------------------------------------- 1 | """Telegram Maintenance - Purge old Data Manager.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | import os.path 6 | from configparser import ConfigParser 7 | from typing import Dict, List, cast 8 | 9 | from TEx.core.base_module import BaseModule 10 | from TEx.database.telegram_group_database import TelegramGroupDatabaseManager, TelegramMediaDatabaseManager, TelegramMessageDatabaseManager 11 | from TEx.models.database.telegram_db_model import TelegramGroupOrmEntity, TelegramMediaOrmEntity 12 | 13 | logger = logging.getLogger('TelegramExplorer') 14 | 15 | 16 | class TelegramMaintenancePurgeOldData(BaseModule): 17 | """Telegram Maintenance - Purge old Data Manager.""" 18 | 19 | async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool: 20 | """ 21 | Abstract Method for Module Activation Function. 22 | 23 | :return: 24 | """ 25 | return cast(bool, args['purge_old_data']) 26 | 27 | async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None: 28 | """Execute Module.""" 29 | if not await self.can_activate(config, args, data): 30 | logger.debug('\t\tModule is Not Enabled...') 31 | return 32 | 33 | # Load Groups from DB 34 | groups: List[TelegramGroupOrmEntity] = TelegramGroupDatabaseManager.get_all_by_phone_number( 35 | config['CONFIGURATION']['phone_number']) 36 | logger.info(f'\t\tFound {len(groups)} Groups') 37 | 38 | for group in groups: 39 | try: 40 | await self.__process_group( 41 | group_id=group.id, 42 | group_name=group.title, 43 | max_age=int(args['limit_days']), 44 | media_root_path=config['CONFIGURATION']['data_path'], 45 | ) 46 | except ValueError as ex: 47 | logger.info('\t\t\tUnable to Purge Old Messages...') 48 | logger.error(ex) 49 | 50 | # Compress DB 51 | TelegramMediaDatabaseManager.apply_db_maintenance() 52 | logger.info('\t\t\tDB Optimized Successfully') 53 | 54 | async def __process_group(self, group_id: int, group_name: str, max_age: int, media_root_path: str) -> None: 55 | """Process and Remove Old Messages and Medias from a Single Group.""" 56 | logger.info(f'\t\tPurging ({group_id}) "{group_name}"') 57 | 58 | # Get all Old Medias 59 | all_medias: List[TelegramMediaOrmEntity] = TelegramMediaDatabaseManager.get_all_medias_by_age( 60 | group_id=group_id, 61 | media_limit_days=max_age, 62 | ) 63 | media_count: int = len(all_medias) 64 | logger.info(f'\t\t\t{len(all_medias)} Medias to be Removed') 65 | 66 | if media_count > 0: 67 | 68 | for media in all_medias: 69 | 70 | # Remove from Disk 71 | media_file_name: str = os.path.join(media_root_path, 'media', str(media.group_id), media.file_name) 72 | logger.info(f'\t\t\t\t{media_file_name}') 73 | 74 | if os.path.exists(media_file_name): 75 | os.remove(media_file_name) 76 | 77 | # Remove from DB 78 | TelegramMediaDatabaseManager.delete_media_by_id(media_id=media.id) 79 | 80 | # Delete all Old Messages 81 | total_messages: int = TelegramMessageDatabaseManager.remove_all_messages_by_age( 82 | group_id=group_id, 83 | limit_days=max_age, 84 | ) 85 | logger.info(f'\t\t\t{total_messages} Messages Removed') 86 | -------------------------------------------------------------------------------- /tests/modules/test_telegram_groups_list.py: -------------------------------------------------------------------------------- 1 | """Telegram Groups List Tests.""" 2 | 3 | import asyncio 4 | import logging 5 | import unittest 6 | from configparser import ConfigParser 7 | from typing import Dict 8 | 9 | from TEx.database.telegram_group_database import TelegramGroupDatabaseManager 10 | from TEx.modules.telegram_groups_list import TelegramGroupList 11 | from TEx.modules.telegram_groups_scrapper import TelegramGroupScrapper 12 | from tests.modules.common import TestsCommon 13 | 14 | 15 | class TelegramGroupListTest(unittest.TestCase): 16 | 17 | def setUp(self) -> None: 18 | 19 | self.config = ConfigParser() 20 | self.config.read('../../config.ini') 21 | 22 | TestsCommon.basic_test_setup() 23 | 24 | # Add Group 1 - Without Any Message 25 | TelegramGroupDatabaseManager.insert_or_update({ 26 | 'id': 1, 'constructor_id': 'A', 'access_hash': 'AAAAAA', 27 | 'fake': False, 'gigagroup': False, 'has_geo': False, 28 | 'participants_count': 1, 'restricted': False, 29 | 'scam': False, 'group_username': 'UN-A', 30 | 'verified': False, 'title': 'UT-01', 'source': '5526986587745' 31 | }) 32 | 33 | # Add Group 2 - With Previous Messages 34 | TelegramGroupDatabaseManager.insert_or_update({ 35 | 'id': 2, 'constructor_id': 'B', 'access_hash': 'BBBBBB', 36 | 'fake': False, 'gigagroup': False, 'has_geo': False, 37 | 'participants_count': 2, 'restricted': False, 38 | 'scam': False, 'group_username': 'UN-b', 39 | 'verified': False, 'title': 'UT-02', 'source': '5526986587745' 40 | }) 41 | 42 | def test_run(self): 43 | """Test Run Method.""" 44 | 45 | target: TelegramGroupScrapper = TelegramGroupList() 46 | args: Dict = { 47 | 'list_groups': True, 48 | 'config': 'unittest_configfile.config', 49 | } 50 | data: Dict = {} 51 | 52 | TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data) 53 | 54 | with self.assertLogs() as captured: 55 | loop = asyncio.get_event_loop() 56 | loop.run_until_complete( 57 | target.run( 58 | config=self.config, 59 | args=args, 60 | data=data 61 | ) 62 | ) 63 | 64 | # Check Logs 65 | self.assertEqual(4, len(captured.records)) 66 | self.assertEqual(' Found 2 Groups', captured.records[0].message) 67 | self.assertEqual(' ID Username Title', captured.records[1].message) 68 | self.assertEqual(' 1 UN-A UT-01', captured.records[2].message) 69 | self.assertEqual(' 2 UN-b UT-02', captured.records[3].message) 70 | 71 | def test_run_disabled(self): 72 | """Test Run Method Disabled.""" 73 | 74 | target: TelegramGroupScrapper = TelegramGroupList() 75 | args: Dict = { 76 | 'list_groups': False, 77 | 'config': 'unittest_configfile.config', 78 | } 79 | data: Dict = {} 80 | 81 | TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data) 82 | 83 | with self.assertLogs('TelegramExplorer', level=logging.DEBUG) as captured: 84 | loop = asyncio.get_event_loop() 85 | loop.run_until_complete( 86 | target.run( 87 | config=self.config, 88 | args=args, 89 | data=data 90 | ) 91 | ) 92 | 93 | # Check Logs 94 | self.assertEqual(1, len(captured.records)) 95 | self.assertEqual(' Module is Not Enabled...', captured.records[0].message) 96 | -------------------------------------------------------------------------------- /docs/configuration/complete_configuration_file_example.md: -------------------------------------------------------------------------------- 1 | # Complete Configuration File Example 2 | 3 | This is an example of a complete configuration file with four finder rules using three discord hooks, two elastic search connector and signals configuration. 4 | 5 | ```ini 6 | [CONFIGURATION] 7 | api_id=12555896 8 | api_hash=dead1f29db5d1fa56cc42757acbabeef 9 | phone_number=15552809753 10 | data_path=/usr/home/tex_data/ 11 | device_model=AMD64 12 | timeout=30 13 | 14 | [PROXY] 15 | type=HTTP 16 | address=127.0.0.1 17 | port=3128 18 | username=proxy username 19 | password=proxy password 20 | rdns=true 21 | 22 | [MEDIA.DOWNLOAD] 23 | default=ALLOW 24 | max_download_size_bytes=256000000 25 | 26 | [FINDER] 27 | enabled=true 28 | 29 | [FINDER.RULE.MessagesWithURL] 30 | type=regex 31 | regex=/^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)$/ 32 | notifier=NOTIFIER.DISCORD.MY_HOOK_1 33 | 34 | [FINDER.RULE.FindMessagesWithCreditCard] 35 | type=regex 36 | regex=(^4[0-9]{12}(?:[0-9]{3})?$)|(^(?:5[1-5][0-9]{2}|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}$)|(3[47][0-9]{13})|(^3(?:0[0-5]|[68][0-9])[0-9]{11}$)|(^6(?:011|5[0-9]{2})[0-9]{12}$)|(^(?:2131|1800|35\d{3})\d{11}$) 37 | notifier=NOTIFIER.DISCORD.MY_HOOK_2,NOTIFIER.ELASTIC_SEARCH.GENERAL 38 | 39 | [FINDER.RULE.FindMessagesWithEmail] 40 | type=regex 41 | regex=^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$ 42 | notifier=NOTIFIER.DISCORD.MY_HOOK_1,NOTIFIER.DISCORD.MY_HOOK_2 43 | 44 | [FINDER.RULE.CatchAll] 45 | type=all 46 | notifier=NOTIFIER.ELASTIC_SEARCH.GENERAL 47 | exporter=EXPORTER.ROLLING_PANDAS.EXPORT_ALL_MESSAGES 48 | 49 | [NOTIFIER.DISCORD.MY_HOOK_1] 50 | webhook=https://discord.com/api/webhooks/1157896186751897357/o7foobar4txvAvKSdeadHiI-9XYeXaGlQtd-5PtrrX_eCE0XElWktpPqjrZ0KbeefPtQC 51 | prevent_duplication_for_minutes=240 52 | timeout_seconds=30 53 | media_attachments_enabled=false 54 | 55 | [NOTIFIER.DISCORD.MY_HOOK_2] 56 | webhook=https://discord.com/api/webhooks/1128765187657681875/foobarqOMFp_4tM2ic2mbeefNPOZqJnBZZdfaubQv2vJgbYzfdeadZd5aqGX6FmCmbNjX 57 | prevent_duplication_for_minutes=240 58 | media_attachments_enabled=false 59 | 60 | [NOTIFIER.DISCORD.SIGNALS_HOOK] 61 | webhook=https://discord.com/api/webhooks/1128765187657681875/foobarqOMFp_457EDs2mbeefNPPeqJnBZZdfaubQvOKIUHYzfdeadZd5aqGX6FmCmbNjv 62 | prevent_duplication_for_minutes=0 63 | media_attachments_enabled=true 64 | media_attachments_max_size_bytes=10000000 65 | 66 | [NOTIFIER.ELASTIC_SEARCH.GENERAL] 67 | address=https://localhost:9200 68 | api_key=bHJtVEg0c0JnNkwwTnYtFFDEADlo6NS1rXzd6NVFSUmEtQ21mQldiUjEwUQ== 69 | verify_ssl_cert=False 70 | index_name=index-name 71 | pipeline_name=ent-search-generic-ingestion 72 | 73 | [NOTIFIER.ELASTIC_SEARCH.SIGNALS] 74 | address=https://localhost:9200 75 | api_key=bHJtVEg0c0JnNkwwTnYtFFDEADlo6NS1rXzd6NVFSUmEtQ21mQldiUjEwUQ== 76 | verify_ssl_cert=False 77 | index_name=index-name-for-signals 78 | pipeline_name=ent-search-generic-ingestion 79 | 80 | [EXPORTER.ROLLING_PANDAS.EXPORT_ALL_MESSAGES] 81 | file_root_path=/path/to/export/folder/ 82 | rolling_every_minutes=5 83 | fields=date_time,raw_text,group_name,group_id,from_id,to_id,reply_to_msg_id,message_id,is_reply,found_on 84 | use_header=true 85 | output_format=json 86 | keep_last_files=20 87 | 88 | [OCR] 89 | enabled=true 90 | type=tesseract 91 | 92 | [OCR.TESSERACT] 93 | tesseract_cmd=/path/to/tesseract/cmd 94 | language=eng 95 | 96 | [SIGNALS] 97 | enabled=true 98 | keep_alive_interval=300 99 | 100 | keep_alive_notifer=NOTIFIER.ELASTIC_SEARCH.SIGNALS 101 | initialization_notifer=NOTIFIER.ELASTIC_SEARCH.SIGNALS 102 | shutdown_notifer=NOTIFIER.ELASTIC_SEARCH.SIGNALS 103 | new_group_notifer=NOTIFIER.DISCORD.SIGNALS_HOOK,NOTIFIER.ELASTIC_SEARCH.SIGNALS 104 | ``` 105 | -------------------------------------------------------------------------------- /TEx/report_templates/default_report.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 24 | 25 | 26 | 27 |
28 |
29 |

TEx - Telegram Explorer - {{groupname}} ({{groupusername}})

30 |
31 | 32 |
33 | 34 | 35 | 36 | 37 | {% for item in messages %} 38 | 39 | 66 | 67 | {% endfor %} 68 |
Messages
40 | 41 | 42 | 43 | 44 | {% autoescape false %} 45 | {{item.date_time}} UTC {{item.to_from_information}} 46 |
{{item.message|replace("\r\n", "
")|replace("\n", "
")}} 47 | {% endautoescape %} 48 | 49 | {% if item.media_is_image +%} 50 | {% autoescape false %} 51 |
52 | {% endautoescape %} 53 | {% elif item.media_mime_type == 'application/vnd.geo' +%} 54 |
GeoLocation: {{item.media_geo}} 55 | {% elif item.media_mime_type == 'video/mp4' +%} 56 |
57 | 61 |
Download: {{item.media_filename.split('/')[1]}} 62 | {% elif item.media_mime_type +%} 63 |
Download: {{item.media_filename.split('/')[1]}} 64 | {% endif %} 65 |
69 |
70 |
71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /tests/core/ocr/test_ocr_engine_factory.py: -------------------------------------------------------------------------------- 1 | """OcrEngineFactory Tests.""" 2 | 3 | import unittest 4 | from unittest import mock 5 | from configparser import ConfigParser 6 | from typing import Dict 7 | 8 | from TEx.core.ocr.dummy_ocr_engine import DummyOcrEngine 9 | from TEx.core.ocr.ocr_engine_base import OcrEngineBase 10 | from TEx.core.ocr.ocr_engine_factory import OcrEngineFactory 11 | from TEx.core.ocr.tesseract_ocr_engine import TesseractOcrEngine 12 | from tests.modules.common import TestsCommon 13 | 14 | 15 | class OcrEngineFactoryTest(unittest.TestCase): 16 | 17 | def setUp(self) -> None: 18 | self.config = ConfigParser() 19 | self.config.read('../../config.ini') 20 | 21 | @mock.patch('TEx.core.ocr.tesseract_ocr_engine.os') 22 | def test_get_instance_tesseract(self, mocked_os_lib): 23 | """Test get_instance_method returning Tesseract Engine.""" 24 | 25 | # Call Test Target Method 26 | args: Dict = { 27 | 'config': 'unittest_configfile.config' 28 | } 29 | data: Dict = {} 30 | 31 | # Configure Mock 32 | mocked_os_lib.path = mock.MagicMock() 33 | mocked_os_lib.path.exists = mock.MagicMock(return_value=True) 34 | 35 | TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data) 36 | 37 | self.config['OCR']['enabled'] = 'true' 38 | self.config['OCR']['type'] = 'tesseract' 39 | 40 | self.config['OCR.TESSERACT']['tesseract_cmd'] = '/folder/file' 41 | self.config['OCR.TESSERACT']['language'] = 'eng+osd' 42 | 43 | h_result: OcrEngineBase = OcrEngineFactory.get_instance(self.config) 44 | self.assertTrue(isinstance(h_result, TesseractOcrEngine)) 45 | 46 | def test_get_instance_no_ocr_config(self): 47 | """Test get_instance_method without OCR Setting on config file.""" 48 | 49 | # Call Test Target Method 50 | args: Dict = { 51 | 'config': 'unittest_configfile.config' 52 | } 53 | data: Dict = {} 54 | 55 | TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data) 56 | 57 | self.config.remove_section('OCR') 58 | self.config.remove_section('TESSERACT') 59 | 60 | h_result: OcrEngineBase = OcrEngineFactory.get_instance(self.config) 61 | self.assertTrue(isinstance(h_result, DummyOcrEngine)) 62 | 63 | def test_get_instance_disabled_ocr_engine(self): 64 | """Test get_instance_method with OCR engine disabled on config file.""" 65 | 66 | # Call Test Target Method 67 | args: Dict = { 68 | 'config': 'unittest_configfile.config' 69 | } 70 | data: Dict = {} 71 | 72 | TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data) 73 | 74 | self.config['OCR']['enabled'] = 'false' 75 | self.config.remove_section('TESSERACT') 76 | 77 | h_result: OcrEngineBase = OcrEngineFactory.get_instance(self.config) 78 | self.assertTrue(isinstance(h_result, DummyOcrEngine)) 79 | 80 | def test_get_instance_without_engine_ocr_engine(self): 81 | """Test get_instance_method with OCR engine enabled but without engine settings on config file.""" 82 | 83 | # Call Test Target Method 84 | args: Dict = { 85 | 'config': 'unittest_configfile.config' 86 | } 87 | data: Dict = {} 88 | 89 | TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data) 90 | 91 | self.config['OCR']['enabled'] = 'true' 92 | del self.config['OCR']['type'] 93 | self.config.remove_section('TESSERACT') 94 | 95 | with self.assertRaises(AttributeError) as context: 96 | OcrEngineFactory.get_instance(self.config) 97 | -------------------------------------------------------------------------------- /tests/report_templates/default_report.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 24 | 25 | 26 | 27 |
28 |
29 |

TEx - Telegram Explorer - {{groupname}} ({{groupusername}})

30 |
31 | 32 |
33 | 34 | 35 | 36 | 37 | {% for item in messages %} 38 | 39 | 66 | 67 | {% endfor %} 68 |
Messages
40 | 41 | 42 | 43 | 44 | {% autoescape false %} 45 | {{item.date_time}} UTC {{item.to_from_information}} 46 |
{{item.message|replace("\r\n", "
")|replace("\n", "
")}} 47 | {% endautoescape %} 48 | 49 | {% if item.media_is_image +%} 50 | {% autoescape false %} 51 |
52 | {% endautoescape %} 53 | {% elif item.media_mime_type == 'application/vnd.geo' +%} 54 |
GeoLocation: {{item.media_geo}} 55 | {% elif item.media_mime_type == 'video/mp4' +%} 56 |
57 | 61 |
Download: {{item.media_filename.split('/')[1]}} 62 | {% elif item.media_mime_type +%} 63 |
Download: {{item.media_filename.split('/')[1]}} 64 | {% endif %} 65 |
69 |
70 |
71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /TEx/notifier/signals_engine.py: -------------------------------------------------------------------------------- 1 | """Signals Notification Engine.""" 2 | from __future__ import annotations 3 | 4 | from configparser import ConfigParser 5 | from datetime import datetime 6 | from typing import List 7 | 8 | import pytz 9 | 10 | from TEx.core.mapper.keep_alive_entity_mapper import SignalEntityMapper 11 | from TEx.models.facade.signal_entity_model import SignalEntity 12 | from TEx.models.facade.signal_notification_model import SignalNotificationEntityModel 13 | from TEx.notifier.notifier_engine import NotifierEngine 14 | 15 | 16 | class SignalsEngineFactory: 17 | """Signals Notification Engine Factory.""" 18 | 19 | @staticmethod 20 | def get_instance(config: ConfigParser, notification_engine: NotifierEngine, source: str) -> SignalsEngine: 21 | """Get the Signals Engine Instance.""" 22 | return SignalsEngine( 23 | entity=SignalEntityMapper.to_entity(section_proxy=config['SIGNALS'] if config.has_section('SIGNALS') else None), 24 | notification_engine=notification_engine, 25 | source=source, 26 | ) 27 | 28 | 29 | class SignalsEngine: 30 | """Signals Notification Engine.""" 31 | 32 | def __init__(self, entity: SignalEntity, notification_engine: NotifierEngine, source: str) -> None: 33 | """Initialize the Signals Engine.""" 34 | self.signal_entity: SignalEntity = entity 35 | self.messages_sent: int = 0 36 | self.notification_engine: NotifierEngine = notification_engine 37 | self.source: str = source 38 | 39 | @property 40 | def keep_alive_interval(self) -> int: 41 | """Return the Keep Alive Engine Interval.""" 42 | return self.signal_entity.keep_alive_interval 43 | 44 | def inc_messages_sent(self) -> None: 45 | """Increment the Messages Sent Counter.""" 46 | self.messages_sent += 1 47 | 48 | async def keep_alive(self) -> None: 49 | """Send the Keep Alive.""" 50 | await self.__send_signal( 51 | entity=SignalNotificationEntityModel( 52 | date_time=datetime.now(tz=pytz.UTC), 53 | content=f'Messages Processed in Period: {self.messages_sent}', 54 | signal='KEEP-ALIVE', 55 | ), 56 | ) 57 | 58 | # Reset Messages Sent Counter 59 | self.messages_sent = 0 60 | 61 | async def shutdown(self) -> None: 62 | """Send the Shutdown.""" 63 | await self.__send_signal( 64 | entity=SignalNotificationEntityModel( 65 | date_time=datetime.now(tz=pytz.UTC), 66 | content=f'Last Messages Processed in Period: {self.messages_sent}', 67 | signal='SHUTDOWN', 68 | ), 69 | ) 70 | 71 | async def init(self) -> None: 72 | """Send the Shutdown.""" 73 | await self.__send_signal( 74 | entity=SignalNotificationEntityModel( 75 | date_time=datetime.now(tz=pytz.UTC), 76 | content='', 77 | signal='INITIALIZATION', 78 | ), 79 | ) 80 | 81 | async def new_group(self, group_id: str, group_title: str) -> None: 82 | """Send the New Group Event.""" 83 | await self.__send_signal( 84 | entity=SignalNotificationEntityModel( 85 | date_time=datetime.now(tz=pytz.UTC), 86 | content=f'ID: {group_id} | Title: "{group_title}"', 87 | signal='NEW-GROUP', 88 | ), 89 | ) 90 | 91 | async def __send_signal(self, entity: SignalNotificationEntityModel) -> None: 92 | """Send the Signal.""" 93 | signal_notifiers: List[str] = self.signal_entity.notifiers[entity.signal] 94 | 95 | if len(signal_notifiers) == 0: 96 | return 97 | 98 | await self.notification_engine.run( 99 | notifiers=signal_notifiers, 100 | entity=entity, 101 | rule_id='SIGNALS', 102 | source=self.source, 103 | ) 104 | -------------------------------------------------------------------------------- /docs/how_use/usage_message_listener.md: -------------------------------------------------------------------------------- 1 | # Listen Messages 2 | 3 | The Message Listener are the core of Telegram Explorer. This command starts a process to listen all messages provided by Telegram servers. 4 | 5 | > The Message Listener performs an Automatically Groups and Users Synchronization. 6 | 7 | Once started, the Telegram Explorer runner do not stops or terminate until the Telegram servers disconnect the client, or, the running process receives a SIGTERM to stop the process. 8 | 9 | **Full Command:** 10 | 11 | ```bash 12 | python3 -m TEx listen --config CONFIGURATION_FILE_PATH --ignore_media --group_id 1234,5678 13 | ``` 14 | 15 | **Basic Command:** 16 | ```bash 17 | python3 -m TEx listen --config CONFIGURATION_FILE_PATH 18 | ``` 19 | 20 | **Parameters** 21 | 22 | * **config** > Required - Created Configuration File Path 23 | * **ignore_media** > Optional - If present, don't Download any Media 24 | * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's. Comma Separated 25 | 26 | 27 | *Output Example:* 28 | ```bash 29 | TEx - Telegram Explorer 30 | Version 0.2.12 31 | By: Th3 0bservator 32 | 33 | 2023-10-01 20:46:53,880 - INFO - [*] Loading Configurations: 34 | 2023-10-01 20:46:53,880 - INFO - [*] Installed Modules: 35 | 2023-10-01 20:46:53,880 - INFO - data_structure_handler.py 36 | 2023-10-01 20:46:53,880 - INFO - database_handler.py 37 | 2023-10-01 20:46:53,880 - INFO - execution_configuration_handler.py 38 | 2023-10-01 20:46:53,880 - INFO - telegram_connection_manager.py 39 | 2023-10-01 20:46:53,880 - INFO - telegram_groups_list.py 40 | 2023-10-01 20:46:53,880 - INFO - telegram_groups_scrapper.py 41 | 2023-10-01 20:46:53,880 - INFO - telegram_maintenance 42 | 2023-10-01 20:46:53,880 - INFO - telegram_messages_listener.py 43 | 2023-10-01 20:46:53,880 - INFO - telegram_messages_scrapper.py 44 | 2023-10-01 20:46:53,881 - INFO - telegram_report_generator 45 | 2023-10-01 20:46:53,881 - INFO - telegram_stats_generator.py 46 | 2023-10-01 20:46:53,891 - INFO - [*] Loading Execution Configurations: 47 | 2023-10-01 20:46:54,179 - INFO - [*] Executing Pipeline: 48 | 2023-10-01 20:46:54,179 - INFO - [+] telegram_connection_manager.TelegramConnector 49 | 2023-10-01 20:46:55,763 - INFO - User Authorized on Telegram: True 50 | 2023-10-01 20:46:55,775 - INFO - [+] telegram_messages_listener.TelegramGroupMessageListener 51 | 2023-10-01 20:46:55,912 - INFO - Listening Past Messages... 52 | 2023-10-01 20:46:55,912 - INFO - Listening New Messages... 53 | 2023-10-01 20:46:55,923 - INFO - Downloading Photo from Message 20436 at 2023-09-30 00:58:35 54 | 2023-10-01 20:46:56,774 - INFO - Downloading Photo from Message 788 at 2023-09-30 09:48:51 55 | 2023-10-01 20:46:56,805 - INFO - Downloading Photo from Message 20438 at 2023-09-30 11:18:12 56 | 2023-10-01 20:46:56,807 - INFO - Downloading Photo from Message 37345 at 2023-09-30 04:39:54 57 | 2023-10-01 20:46:56,823 - INFO - Downloading Photo from Message 37346 at 2023-09-30 13:12:39 58 | 2023-10-01 20:46:58,053 - INFO - Downloading Photo from Message 725 at 2023-09-30 15:07:38 59 | 2023-10-01 20:46:58,105 - INFO - Downloading Photo from Message 727 at 2023-09-30 15:16:05 60 | 2023-10-01 20:46:58,148 - INFO - Downloading Photo from Message 20440 at 2023-09-30 14:52:21 61 | 2023-10-01 20:46:58,149 - INFO - Downloading Photo from Message 37347 at 2023-09-30 15:23:33 62 | 2023-10-01 20:46:58,743 - WARNING - Group "1246578969" not found on DB. Performing automatic synchronization. Consider execute "load_groups" command to perform a full group synchronization (Members and Group Cover Photo). 63 | 2023-10-01 20:46:58,751 - INFO - Downloading Photo from Message 13855 at 2023-09-30 21:00:09 64 | 2023-10-01 20:46:58,752 - INFO - Downloading Media from Message 12587 (9739.13 Kbytes) as video/mp4 at 2023-09-30 21:37:30 65 | 2023-10-01 20:46:58,779 - INFO - Downloading Photo from Message 37348 at 2023-09-30 22:10:03 66 | 2023-10-01 20:46:59,062 - WARNING - User "1254788963" was not found on DB. Performing automatic synchronization. 67 | 2023-10-01 20:46:59,110 - INFO - Downloading Photo from Message 13856 at 2023-10-01 02:08:19 68 | 2023-10-01 20:46:59,111 - INFO - Downloading Photo from Message 13857 at 2023-10-01 02:08:19 69 | ``` -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Welcome to Telegram Explorer 2 | 3 | [![](https://img.shields.io/github/last-commit/guibacellar/TEx)](https://github.com/guibacellar/TEx/tree/main) 4 | [![](https://img.shields.io/github/languages/code-size/guibacellar/TEx)](https://github.com/guibacellar/TEx/tree/main) 5 | [![](https://img.shields.io/badge/Python-3.8+-green.svg)](https://www.python.org/downloads/) 6 | [![](https://github.com/guibacellar/TEx/actions/workflows/cy.yml/badge.svg?branch=main)](https://github.com/guibacellar/TEx/actions/workflows/cy.yml) 7 | [![](https://telegramexplorer.readthedocs.io/en/latest/?badge=latest)](https://telegramexplorer.readthedocs.io/en/latest/) 8 | [![](https://img.shields.io/badge/maintainer-Th3%200bservator-blue)](https://theobservator.net/) 9 | ![](https://img.shields.io/github/v/release/guibacellar/TeX) 10 | 11 | 12 | ## About The Project 13 | 14 | TEx is a Telegram Explorer tool created to help Researchers, Investigators and Law Enforcement Agents to Collect and Process the Huge Amount of Data Generated from Criminal, Fraud, Security and Others Telegram Groups. 15 | 16 | Repository: [https://github.com/guibacellar/TEx](https://github.com/guibacellar/TEx) 17 | 18 | !!! warning "BETA VERSION" 19 | 20 | Please note that V0.3.0 are the latest beta version for this project, so it is possible that you may encounter bugs that have not yet been mapped out. 21 | I kindly ask you to report the bugs at: [https://github.com/guibacellar/TEx/issues](https://github.com/guibacellar/TEx/issues) 22 | 23 | 24 | ## Requirements 25 | - Python 3.8.1+ (⚠️ Deprecated. Consider using version 3.10+ ⚠️) 26 | - Windows x64 or Linux x64 27 | 28 | 29 | ## Features 30 | - Connection Manager (Handle Telegram Connection) 31 | - Group Information Scrapper 32 | - List Groups (Scrap info for all groups, including members, members info and profile pic) 33 | - Automatic Group Information Sync 34 | - Automatic Users Information Sync 35 | - Messages Listener (Listen all Incoming Messages) 36 | - Messages Scrapper (Scrap all Group Messages, since the first one) 37 | - Download Media (Including fine media settings like size, groups and/or media type) 38 | - HTML Report Generation 39 | - Export Downloaded Files 40 | - Export Messages 41 | - Message Finder System (Allow to Find, using terms or RegEx) patterns on messages 42 | - Message Notification System (Send alert's, finds, or all messages to Discord) 43 | - Elastic Search 8+ Native Integration 44 | - Image OCR using Tesseract 45 | - Signals for Helping Monitoring 46 | 47 | 48 | ## Know Limitations 49 | 50 | Although we do not currently know the limitations of using the tool, it is important to announce the limits to which we test the platform. 51 | 52 | Currently, **one TeX process can support at least** (per configuration file/per phone numer): 53 | 54 | **Per Group** 55 | 56 | - 50,000 messages 57 | - 7,000 users per group 58 | - 8 GB of downloaded files 59 | 60 | **Total** 61 | 62 | - 400 groups 63 | - 800,000 messages 64 | - 50,000 unique users 65 | - 150 GB of total downloaded files 66 | 67 | 68 | ## How Telegram Explorer Works 69 | Telegram Explorer works using one configuration file per target phone number to be used. 70 | 71 | ![how_text_works.png](media/how_text_works.png) 72 | 73 | So, you can deploy 1 or several Telegram Explorer runners in one machine, using on configuration file for each instance. You also can deploy the runner using Linux Containers or Docker containers. 74 | 75 | !!! info "IMPORTANT" 76 | 77 | Depending on the security level and your account settings, you may be asked to enter a security code that will be sent to your Telegram, or some authentication information.

This way, the application will ask (only at the time of the first connection) for you to enter this value in the terminal (TTY). 78 | 79 | 80 | ## Installing 81 | Telegram Explorer is available through *pip*, so, just use pip install in order to fully install TeX. 82 | 83 | ```bash 84 | pip install TelegramExplorer 85 | ``` 86 | 87 | 88 | ## Upgrading 89 | To upgrade TeX to the latest version, just use *pip install upgrade* command. 90 | 91 | ```bash 92 | pip install --upgrade TelegramExplorer 93 | ``` 94 | -------------------------------------------------------------------------------- /TEx/notifier/elastic_search_notifier.py: -------------------------------------------------------------------------------- 1 | """Elastic Search Notifier.""" 2 | from __future__ import annotations 3 | 4 | from configparser import SectionProxy 5 | from typing import Dict, Optional, Union 6 | 7 | import pytz 8 | from elasticsearch import AsyncElasticsearch 9 | 10 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity 11 | from TEx.models.facade.signal_notification_model import SignalNotificationEntityModel 12 | from TEx.notifier.notifier_base import BaseNotifier 13 | 14 | 15 | class ElasticSearchNotifier(BaseNotifier): 16 | """Basic Elastic Search Notifier.""" 17 | 18 | def __init__(self) -> None: 19 | """Initialize Elastic Search Notifier.""" 20 | super().__init__() 21 | self.url: str = '' 22 | self.client: Optional[AsyncElasticsearch] = None 23 | self.index: str = '' 24 | self.pipeline: str = '' 25 | 26 | def configure(self, config: SectionProxy) -> None: 27 | """Configure the Notifier.""" 28 | hosts_list: Optional[str] = config.get('address', fallback=None) 29 | 30 | self.client = AsyncElasticsearch( 31 | hosts=hosts_list.split(',') if hosts_list else None, # type: ignore 32 | api_key=config.get('api_key', fallback=None), 33 | verify_certs=config.get('verify_ssl_cert', fallback='True') == 'True', 34 | cloud_id=config.get('cloud_id', fallback=None), 35 | request_timeout=30, 36 | max_retries=10, 37 | ssl_show_warn=False, 38 | ) 39 | self.index = config['index_name'] 40 | self.pipeline = config['pipeline_name'] 41 | 42 | async def run(self, entity: Union[FinderNotificationMessageEntity, SignalNotificationEntityModel], rule_id: str, source: str) -> None: 43 | """Run Elastic Search Notifier.""" 44 | if not self.client: 45 | return 46 | 47 | content: Dict 48 | 49 | if isinstance(entity, FinderNotificationMessageEntity): 50 | content = await self.__get_dict_for_finder_notification( 51 | entity=entity, 52 | rule_id=rule_id, 53 | source=source, 54 | ) 55 | else: 56 | content = await self.__get_dict_for_signal_notification( 57 | entity=entity, 58 | source=source, 59 | ) 60 | 61 | await self.client.index( 62 | index=self.index, 63 | pipeline=self.pipeline, 64 | document=content, 65 | ) 66 | 67 | async def __get_dict_for_finder_notification(self, entity: FinderNotificationMessageEntity, rule_id: str, source: str) -> Dict: 68 | """Return the Dict for Finder Notifications.""" 69 | content: Dict = { 70 | 'time': entity.date_time.astimezone(tz=pytz.utc), 71 | 'source': source, 72 | 'rule': rule_id, 73 | 'raw': entity.raw_text, 74 | 'group_name': entity.group_name, 75 | 'group_id': entity.group_id, 76 | 'from_id': entity.from_id, 77 | 'to_id': entity.to_id, 78 | 'reply_to_msg_id': entity.reply_to_msg_id, 79 | 'message_id': entity.message_id, 80 | 'is_reply': entity.is_reply, 81 | 'found_on': entity.found_on, 82 | } 83 | 84 | if entity.downloaded_media_info: 85 | content['has_media'] = True 86 | content['media_mime_type'] = entity.downloaded_media_info.content_type 87 | content['media_size'] = entity.downloaded_media_info.size_bytes 88 | else: 89 | content['has_media'] = False 90 | content['media_mime_type'] = None 91 | content['media_size'] = None 92 | 93 | return content 94 | 95 | async def __get_dict_for_signal_notification(self, entity: SignalNotificationEntityModel, source: str) -> Dict: 96 | """Return the Dict for Signal Notifications.""" 97 | content: Dict = { 98 | 'time': entity.date_time.astimezone(tz=pytz.utc), 99 | 'source': source, 100 | 'signal': entity.signal, 101 | 'content': entity.content, 102 | } 103 | 104 | return content 105 | 106 | --------------------------------------------------------------------------------