├── tests
    ├── core
    │   ├── __init__.py
    │   └── ocr
    │   │   ├── __init__.py
    │   │   ├── test_dummy_ocr_engine.py
    │   │   ├── unitest_echo_ocr_engine.py
    │   │   └── test_ocr_engine_factory.py
    ├── exporter
    │   └── __init__.py
    ├── finder
    │   ├── __init__.py
    │   └── test_all_messages_finder.py
    ├── modules
    │   ├── __init__.py
    │   ├── telegram_maintenance
    │   │   └── __init__.py
    │   ├── telegram_report_generator
    │   │   └── __init__.py
    │   ├── test_state_file_handler.py
    │   ├── common.py
    │   ├── test_input_args_handler.py
    │   └── test_telegram_groups_list.py
    ├── notifier
    │   ├── __init__.py
    │   └── test_notifier_engine.py
    ├── resources
    │   ├── 1645024499642.txt
    │   ├── mat.pdf
    │   ├── demo.apk
    │   ├── unknow.mp4
    │   ├── sticker.webp
    │   ├── AnimatedSticker.tgs
    │   ├── 122761750_387013276008970_8208112669996447119_n.jpg
    │   └── expected_generated_file_content
    │   │   ├── test_pandas_rolling_exporter_json_expected_15558987453_202311221005.data
    │   │   └── test_pandas_rolling_exporter_csv_expected_15558987453_202311221007.data
    ├── pytest.ini
    ├── test_TEx.py
    ├── __init__.py
    ├── logging.conf
    ├── config.ini
    ├── report_templates
    │   ├── default_index.html
    │   └── default_report.html
    └── unittest_configfile.config
├── TEx
    ├── __init__.py
    ├── py.typed
    ├── core
    │   ├── ocr
    │   │   ├── __init__.py
    │   │   ├── ocr_engine_base.py
    │   │   ├── dummy_ocr_engine.py
    │   │   ├── ocr_engine_factory.py
    │   │   └── tesseract_ocr_engine.py
    │   ├── __init__.py
    │   ├── mapper
    │   │   ├── __init__.py
    │   │   ├── telethon_user_mapper.py
    │   │   ├── keep_alive_entity_mapper.py
    │   │   ├── telethon_message_mapper.py
    │   │   └── telethon_channel_mapper.py
    │   ├── media_download_handling
    │   │   ├── __init__.py
    │   │   ├── do_nothing_media_downloader.py
    │   │   ├── photo_media_downloader.py
    │   │   └── std_media_downloader.py
    │   ├── media_metadata_handling
    │   │   ├── __init__.py
    │   │   ├── do_nothing_media_handler.py
    │   │   ├── geo_handler.py
    │   │   ├── photo_handler.py
    │   │   ├── pdf_handler.py
    │   │   ├── text_handler.py
    │   │   ├── generic_binary_handler.py
    │   │   ├── sticker_handler.py
    │   │   ├── mp4_handler.py
    │   │   └── webimage_handler.py
    │   ├── dir_manager.py
    │   ├── base_module.py
    │   ├── state_file.py
    │   └── temp_file.py
    ├── models
    │   ├── __init__.py
    │   ├── facade
    │   │   ├── __init__.py
    │   │   ├── signal_entity_model.py
    │   │   ├── signal_notification_model.py
    │   │   ├── finder_notification_facade_entity.py
    │   │   ├── media_handler_facade_entity.py
    │   │   ├── telegram_message_report_facade_entity.py
    │   │   └── telegram_group_report_facade_entity.py
    │   └── database
    │   │   ├── __init__.py
    │   │   └── temp_db_models.py
    ├── modules
    │   ├── __init__.py
    │   ├── telegram_maintenance
    │   │   ├── __init__.py
    │   │   └── telegram_purge_old_data.py
    │   ├── telegram_report_generator
    │   │   ├── __init__.py
    │   │   └── telegram_report_sent_telegram.py
    │   ├── temp_file_manager.py
    │   ├── execution_configuration_handler.py
    │   ├── database_handler.py
    │   ├── data_structure_handler.py
    │   ├── state_file_handler.py
    │   └── telegram_groups_list.py
    ├── exporter
    │   ├── __init__.py
    │   ├── exporter_base.py
    │   └── exporter_engine.py
    ├── finder
    │   ├── __init__.py
    │   ├── base_finder.py
    │   ├── all_messages_finder.py
    │   └── regex_finder.py
    ├── notifier
    │   ├── __init__.py
    │   ├── notifier_base.py
    │   ├── notifier_engine.py
    │   ├── signals_engine.py
    │   └── elastic_search_notifier.py
    ├── database
    │   ├── __init__.py
    │   ├── db_initializer.py
    │   ├── db_manager.py
    │   └── db_migration.py
    ├── __main__.py
    ├── logging.conf
    ├── config.ini
    └── report_templates
    │   ├── default_index.html
    │   └── default_report.html
├── requirements.txt
├── docs
    ├── requirements.txt
    ├── media
    │   ├── auth_required.png
    │   ├── code_provided.png
    │   ├── report_stats.png
    │   ├── how_text_works.png
    │   ├── export_files_list.png
    │   ├── html_report_files.png
    │   ├── html_report_index.png
    │   ├── text_report_files.png
    │   ├── html_report_content.png
    │   ├── text_report_content.png
    │   └── ocr_tensorflow_tessdata_folder.png
    ├── secret_chats.md
    ├── authentication.md
    ├── contact.md
    ├── configuration
    │   ├── proxy.md
    │   ├── media_download_configuration.md
    │   ├── basic.md
    │   ├── media_download_examples.md
    │   ├── ocr.md
    │   ├── scenario_based_examples.md
    │   └── complete_configuration_file_example.md
    ├── report
    │   ├── report_status.md
    │   ├── report_html.md
    │   ├── report_text.md
    │   └── report_export_files.md
    ├── finder
    │   ├── finder_catchall.md
    │   ├── finder_regex.md
    │   └── configuration.md
    ├── maintenance
    │   └── purge_old_data.md
    ├── how_use
    │   ├── how_to_use_basic.md
    │   ├── usage_connection.md
    │   ├── usage_list_groups.md
    │   ├── usage_load_groups.md
    │   ├── usage_download_messages.md
    │   └── usage_message_listener.md
    ├── notification
    │   ├── notification_elasticsearch_signals_template.md
    │   ├── notification_elasticsearch.md
    │   ├── notification_elasticsearch_index_template.md
    │   ├── signals.md
    │   └── notification_discord.md
    ├── exporting
    │   └── pandas_rolling.md
    ├── changelog
    │   └── v030.md
    └── index.md
├── coverage.rc
├── .readthedocs.yaml
├── mypy.ini
├── .github
    └── workflows
    │   ├── cy.yml
    │   └── cy_deploy.yml
├── .gitignore
├── tox.ini
├── README.md
└── mkdocs.yml


/tests/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/core/ocr/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/exporter/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/finder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/notifier/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/TEx/__init__.py:
--------------------------------------------------------------------------------
1 | """TEx Root."""
2 | 


--------------------------------------------------------------------------------
/TEx/py.typed:
--------------------------------------------------------------------------------
1 | # Marker file for PEP 561


--------------------------------------------------------------------------------
/TEx/core/ocr/__init__.py:
--------------------------------------------------------------------------------
1 | """OCR Modules."""
2 | 


--------------------------------------------------------------------------------
/TEx/models/__init__.py:
--------------------------------------------------------------------------------
1 | """Models Package."""
2 | 


--------------------------------------------------------------------------------
/TEx/modules/__init__.py:
--------------------------------------------------------------------------------
1 | """OSIx Modules."""
2 | 


--------------------------------------------------------------------------------
/tests/modules/telegram_maintenance/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/TEx/exporter/__init__.py:
--------------------------------------------------------------------------------
1 | """Exporter Modules."""
2 | 


--------------------------------------------------------------------------------
/TEx/finder/__init__.py:
--------------------------------------------------------------------------------
1 | """TEx Finder Modules."""
2 | 


--------------------------------------------------------------------------------
/TEx/notifier/__init__.py:
--------------------------------------------------------------------------------
1 | """Notifier Modules."""
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # QA
2 | tox==4.7.0
3 | poetry==1.5.1


--------------------------------------------------------------------------------
/tests/modules/telegram_report_generator/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/resources/1645024499642.txt:
--------------------------------------------------------------------------------
1 | FILE CONTENT HERE


--------------------------------------------------------------------------------
/TEx/models/facade/__init__.py:
--------------------------------------------------------------------------------
1 | """Facade Objects."""
2 | 


--------------------------------------------------------------------------------
/TEx/core/__init__.py:
--------------------------------------------------------------------------------
1 | """OSIx Core Modules and Classes."""
2 | 


--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | testpaths =
3 |     tests
4 | 


--------------------------------------------------------------------------------
/TEx/core/mapper/__init__.py:
--------------------------------------------------------------------------------
1 | """Centralized Entity Mappers."""
2 | 


--------------------------------------------------------------------------------
/TEx/models/database/__init__.py:
--------------------------------------------------------------------------------
1 | """Database Models Module."""
2 | 


--------------------------------------------------------------------------------
/TEx/core/media_download_handling/__init__.py:
--------------------------------------------------------------------------------
1 | """Media Download Module."""
2 | 


--------------------------------------------------------------------------------
/TEx/modules/telegram_maintenance/__init__.py:
--------------------------------------------------------------------------------
1 | """Maintenance Modules."""
2 | 


--------------------------------------------------------------------------------
/TEx/core/media_metadata_handling/__init__.py:
--------------------------------------------------------------------------------
1 | """Telegram Media Handling Files."""
2 | 


--------------------------------------------------------------------------------
/TEx/modules/telegram_report_generator/__init__.py:
--------------------------------------------------------------------------------
1 | """Telegram Report Modules."""
2 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | # QA
2 | tox==4.7.0
3 | poetry==1.5.1
4 | mkdocs-material==9.4.2


--------------------------------------------------------------------------------
/tests/resources/mat.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/tests/resources/mat.pdf


--------------------------------------------------------------------------------
/tests/resources/demo.apk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/tests/resources/demo.apk


--------------------------------------------------------------------------------
/tests/resources/unknow.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/tests/resources/unknow.mp4


--------------------------------------------------------------------------------
/docs/media/auth_required.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/auth_required.png


--------------------------------------------------------------------------------
/docs/media/code_provided.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/code_provided.png


--------------------------------------------------------------------------------
/docs/media/report_stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/report_stats.png


--------------------------------------------------------------------------------
/tests/resources/sticker.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/tests/resources/sticker.webp


--------------------------------------------------------------------------------
/docs/media/how_text_works.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/how_text_works.png


--------------------------------------------------------------------------------
/docs/media/export_files_list.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/export_files_list.png


--------------------------------------------------------------------------------
/docs/media/html_report_files.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/html_report_files.png


--------------------------------------------------------------------------------
/docs/media/html_report_index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/html_report_index.png


--------------------------------------------------------------------------------
/docs/media/text_report_files.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/text_report_files.png


--------------------------------------------------------------------------------
/docs/media/html_report_content.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/html_report_content.png


--------------------------------------------------------------------------------
/docs/media/text_report_content.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/text_report_content.png


--------------------------------------------------------------------------------
/tests/resources/AnimatedSticker.tgs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/tests/resources/AnimatedSticker.tgs


--------------------------------------------------------------------------------
/docs/secret_chats.md:
--------------------------------------------------------------------------------
1 | # A Note About Secret Chats
2 | 
3 | Currently, Telegram Explorer do not offer support for Secret Chats.


--------------------------------------------------------------------------------
/docs/media/ocr_tensorflow_tessdata_folder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/docs/media/ocr_tensorflow_tessdata_folder.png


--------------------------------------------------------------------------------
/tests/resources/122761750_387013276008970_8208112669996447119_n.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guibacellar/TEx/HEAD/tests/resources/122761750_387013276008970_8208112669996447119_n.jpg


--------------------------------------------------------------------------------
/tests/test_TEx.py:
--------------------------------------------------------------------------------
 1 | """TEx Main Module Tests."""
 2 | 
 3 | import unittest
 4 | 
 5 | 
 6 | class TexTest(unittest.TestCase):
 7 | 
 8 |     def test_foo(self):
 9 |         assert True
10 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | data_dir: str = os.path.join(os.getcwd(), "_data")
 4 | db_file: str = os.path.join(data_dir, 'local.db')
 5 | 
 6 | if not os.path.exists(data_dir):
 7 |     os.mkdir(data_dir)
 8 | 
 9 | if os.path.exists(db_file):
10 |     os.remove(db_file)
11 | 


--------------------------------------------------------------------------------
/TEx/finder/base_finder.py:
--------------------------------------------------------------------------------
 1 | """Base Class for All Finders."""
 2 | from __future__ import annotations
 3 | 
 4 | import abc
 5 | 
 6 | 
 7 | class BaseFinder:
 8 |     """Base Finder Class."""
 9 | 
10 |     @abc.abstractmethod
11 |     async def find(self, raw_text: str) -> bool:
12 |         """Apply Find Logic."""
13 | 


--------------------------------------------------------------------------------
/coverage.rc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = True
 3 | 
 4 | [report]
 5 | omit =
 6 |     *venv*,
 7 |     *.tox*,
 8 |     *runner.py,
 9 |     *__init__.py,
10 |     *__main__.py,
11 |     *core/chrome_driver_manager.py,
12 |     *core/constants.py,
13 |     *core/decorator.py,
14 |     *core/http_manager.py,
15 |     *models/facade/*.py,
16 | 


--------------------------------------------------------------------------------
/TEx/models/facade/signal_entity_model.py:
--------------------------------------------------------------------------------
 1 | """Signal Entity."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict
 5 | 
 6 | from pydantic import BaseModel, ConfigDict
 7 | 
 8 | 
 9 | class SignalEntity(BaseModel):
10 |     """Signal Entity."""
11 | 
12 |     model_config = ConfigDict(extra='forbid')
13 | 
14 |     enabled: bool
15 |     keep_alive_interval: int
16 |     notifiers: Dict
17 | 


--------------------------------------------------------------------------------
/docs/authentication.md:
--------------------------------------------------------------------------------
 1 | # Authentication
 2 | 
 3 | If you are asked to provide an additional authentication during the first connection, the Telegram Explorer Runner will ask on prompt/tty and waits until you provide the authentication challenge response.
 4 | 
 5 | **Code Request**
 6 | ![auth_required.png](media/auth_required.png)
 7 | 
 8 | **Authentication Code sent to Telegram Account**
 9 | ![code_provided.png](media/code_provided.png)
10 | 


--------------------------------------------------------------------------------
/TEx/core/media_metadata_handling/do_nothing_media_handler.py:
--------------------------------------------------------------------------------
 1 | """Do Nothing Media Handler."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict, Optional
 5 | 
 6 | from telethon.tl.patched import Message
 7 | 
 8 | 
 9 | class DoNothingHandler:
10 |     """Do Nothing Media Handler."""
11 | 
12 |     @staticmethod
13 |     def handle_metadata(message: Message) -> Optional[Dict]:
14 |         """Handle Media Metadata."""
15 |         return None
16 | 


--------------------------------------------------------------------------------
/TEx/models/facade/signal_notification_model.py:
--------------------------------------------------------------------------------
 1 | """Facade Entities for Signal based Notifications."""
 2 | from __future__ import annotations
 3 | 
 4 | from datetime import datetime
 5 | 
 6 | from pydantic import BaseModel, ConfigDict
 7 | 
 8 | 
 9 | class SignalNotificationEntityModel(BaseModel):
10 |     """Facade Entities for Signal based Notifications."""
11 | 
12 |     model_config = ConfigDict(extra='forbid')
13 | 
14 |     signal: str
15 |     date_time: datetime
16 |     content: str
17 | 


--------------------------------------------------------------------------------
/TEx/finder/all_messages_finder.py:
--------------------------------------------------------------------------------
 1 | """All Messages Finder."""
 2 | from configparser import SectionProxy
 3 | 
 4 | from TEx.finder.base_finder import BaseFinder
 5 | 
 6 | 
 7 | class AllMessagesFinder(BaseFinder):
 8 |     """All Messages Based Finder."""
 9 | 
10 |     def __init__(self, config: SectionProxy) -> None:
11 |         """Initialize All Messages Finder."""
12 | 
13 |     async def find(self, raw_text: str) -> bool:
14 |         """Find Message. Always Return True."""
15 |         return True
16 | 


--------------------------------------------------------------------------------
/tests/core/ocr/test_dummy_ocr_engine.py:
--------------------------------------------------------------------------------
 1 | """Test the Dummy OCR Engine."""
 2 | 
 3 | import unittest
 4 | 
 5 | from TEx.core.ocr.dummy_ocr_engine import DummyOcrEngine
 6 | from TEx.core.ocr.ocr_engine_base import OcrEngineBase
 7 | 
 8 | 
 9 | class DummyOcrEngineTest(unittest.TestCase):
10 | 
11 |     def test_all(self):
12 |         """Test Dummy Engine."""
13 | 
14 |         target: OcrEngineBase = DummyOcrEngine()
15 |         target.configure(config=None)
16 |         self.assertIsNone(target.run(file_path='/folder/path'))
17 | 


--------------------------------------------------------------------------------
/TEx/core/dir_manager.py:
--------------------------------------------------------------------------------
 1 | """Directory Manager."""
 2 | 
 3 | import os
 4 | 
 5 | 
 6 | class DirectoryManagerUtils:
 7 |     """Directory Manager."""
 8 | 
 9 |     @staticmethod
10 |     def ensure_dir_struct(path: str) -> None:
11 |         """Ensure That Directory Exists.
12 | 
13 |         :param path:
14 |         :return:
15 |         """
16 |         target_path: str = os.path.abspath(os.path.join(os.getcwd(), path))
17 | 
18 |         if not os.path.exists(target_path):
19 |             os.makedirs(target_path, exist_ok=True)
20 | 


--------------------------------------------------------------------------------
/tests/logging.conf:
--------------------------------------------------------------------------------
 1 | [loggers]
 2 | keys=root,sqlalchemy
 3 | 
 4 | [handlers]
 5 | keys=consoleHandler
 6 | 
 7 | [formatters]
 8 | keys=simpleFormatter
 9 | 
10 | [logger_root]
11 | level=DEBUG
12 | handlers=consoleHandler
13 | 
14 | [logger_sqlalchemy]
15 | level=ERROR
16 | handlers=consoleHandler
17 | qualname=''
18 | 
19 | [handler_consoleHandler]
20 | class=StreamHandler
21 | level=DEBUG
22 | formatter=simpleFormatter
23 | args=(sys.stdout,)
24 | 
25 | [formatter_simpleFormatter]
26 | format=%(asctime)s - %(levelname)s - %(message)s
27 | 


--------------------------------------------------------------------------------
/TEx/core/media_download_handling/do_nothing_media_downloader.py:
--------------------------------------------------------------------------------
 1 | """Do Nothing Media Downloader."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict
 5 | 
 6 | from telethon.tl.patched import Message
 7 | 
 8 | 
 9 | class DoNothingMediaDownloader:
10 |     """Do Nothing Media Downloader."""
11 | 
12 |     @staticmethod
13 |     async def download(message: Message, media_metadata: Dict, data_path: str) -> None:
14 |         """Download the Media, Update MetadaInfo and Return the ID from DB Record.
15 | 
16 |         :param message:
17 |         :param media_metadata:
18 |         :return:
19 |         """
20 |         return
21 | 


--------------------------------------------------------------------------------
/TEx/core/ocr/ocr_engine_base.py:
--------------------------------------------------------------------------------
 1 | """Base Class for OCR Engine."""
 2 | from __future__ import annotations
 3 | 
 4 | import abc
 5 | from configparser import SectionProxy
 6 | from typing import Optional
 7 | 
 8 | 
 9 | class OcrEngineBase:
10 |     """Base Class for OCR Engine."""
11 | 
12 |     def __init__(self) -> None:
13 |         """Initialize Base Class."""
14 | 
15 |     @abc.abstractmethod
16 |     def configure(self, config: Optional[SectionProxy]) -> None:
17 |         """Configure Abstract Method."""
18 | 
19 |     @abc.abstractmethod
20 |     def run(self, file_path: str) -> Optional[str]:
21 |         """Extract Text from Image."""
22 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the OS, Python version and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.8"
13 |     # You can also specify other tool versions:
14 |     # nodejs: "19"
15 |     # rust: "1.64"
16 |     # golang: "1.19"
17 | 
18 | mkdocs:
19 |   configuration: mkdocs.yml
20 | 
21 | # Optionally declare the Python requirements required to build your docs
22 | python:
23 |    install:
24 |    - requirements: docs/requirements.txt


--------------------------------------------------------------------------------
/TEx/database/__init__.py:
--------------------------------------------------------------------------------
 1 | """Database Module."""
 2 | from cachetools import Cache, TTLCache
 3 | 
 4 | 
 5 | class NoneSupportedTTLCache(TTLCache):
 6 |     """Cache Customization to not Save None Values in Memory."""
 7 | 
 8 |     def __setitem__(self, key, value, cache_setitem=Cache.__setitem__) -> None:  # type: ignore
 9 |         """Customize __setitem__  to do not save nullable values."""
10 |         if value:
11 |             super().__setitem__(key, value, cache_setitem)  # type: ignore
12 | 
13 | 
14 | GROUPS_CACHE: NoneSupportedTTLCache = NoneSupportedTTLCache(maxsize=256, ttl=300)
15 | USERS_CACHE: NoneSupportedTTLCache = NoneSupportedTTLCache(maxsize=2048, ttl=300)
16 | 


--------------------------------------------------------------------------------
/TEx/core/ocr/dummy_ocr_engine.py:
--------------------------------------------------------------------------------
 1 | """Dummy OCR Engine."""
 2 | from __future__ import annotations
 3 | 
 4 | from configparser import SectionProxy
 5 | from typing import Optional
 6 | 
 7 | from TEx.core.ocr.ocr_engine_base import OcrEngineBase
 8 | 
 9 | 
10 | class DummyOcrEngine(OcrEngineBase):
11 |     """Dummy OCR Engine."""
12 | 
13 |     def __init__(self) -> None:
14 |         """Initialize Dummy Engine."""
15 |         super().__init__()
16 | 
17 |     def configure(self, config: Optional[SectionProxy]) -> None:
18 |         """Configure Dummy Engine."""
19 | 
20 |     def run(self, file_path: str) -> Optional[str]:
21 |         """Do Nothing."""
22 |         return None
23 | 


--------------------------------------------------------------------------------
/docs/contact.md:
--------------------------------------------------------------------------------
1 | # Contact
2 | 
3 | **Th3 0bservator** [https://www.theobservator.net/](https://www.theobservator.net/)
4 | 
5 | [![Foo](https://img.shields.io/badge/RSS-FFA500?style=for-the-badge&logo=rss&logoColor=white)](https://www.theobservator.net/) 
6 | [![Foo](https://img.shields.io/badge/Twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://twitter.com/th3_0bservator) 
7 | [![Foo](https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white)](https://github.com/guibacellar/) 
8 | [![Foo](https://img.shields.io/badge/LinkedIn-0077B5?style=for-the-badge&logo=linkedin&logoColor=white)](https://www.linkedin.com/in/guilherme-bacellar/)
9 | 


--------------------------------------------------------------------------------
/TEx/core/media_download_handling/photo_media_downloader.py:
--------------------------------------------------------------------------------
 1 | """Photo Media Downloader."""
 2 | from __future__ import annotations
 3 | 
 4 | import os
 5 | from typing import Dict
 6 | 
 7 | from telethon.tl.patched import Message
 8 | 
 9 | 
10 | class PhotoMediaDownloader:
11 |     """Photo Media Downloader."""
12 | 
13 |     @staticmethod
14 |     async def download(message: Message, media_metadata: Dict, data_path: str) -> None:
15 |         """Download the Media and Update MetadaInfo.
16 | 
17 |         :param message:
18 |         :param media_metadata:
19 |         :return:
20 |         """
21 |         # Download Media
22 |         await message.download_media(os.path.join(data_path, media_metadata['file_name']))
23 | 


--------------------------------------------------------------------------------
/TEx/core/base_module.py:
--------------------------------------------------------------------------------
 1 | """OSIx Base Module."""
 2 | from __future__ import annotations
 3 | 
 4 | import abc
 5 | from configparser import ConfigParser
 6 | from typing import Dict
 7 | 
 8 | 
 9 | class BaseModule:
10 |     """Base Module Declaration."""
11 | 
12 |     @abc.abstractmethod
13 |     async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None:
14 |         """
15 |         Abstract Base Run Description.
16 | 
17 |         :return: None
18 |         """
19 | 
20 |     @abc.abstractmethod
21 |     async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool:
22 |         """
23 |         Abstract Method for Module Activation Function.
24 | 
25 |         :return:
26 |         """
27 | 


--------------------------------------------------------------------------------
/docs/configuration/proxy.md:
--------------------------------------------------------------------------------
 1 | # Proxy
 2 | If you need to use a proxy server, you can configure this behavior within the configuration file. If not, just omit this section from your file.
 3 | 
 4 | ```ini
 5 | [PROXY]
 6 | type=HTTP
 7 | address=127.0.0.1
 8 | port=3128
 9 | username=proxy username
10 | password=proxy password
11 | rdns=true
12 | ```
13 | 
14 | * **type** > Required - Protocol to use (HTTP, SOCKS5 or SOCKS4)
15 | * **address** > Required - Proxy Address
16 | * **port** > Required - Proxy IP Port
17 | * **username** > Optional - Username if the proxy requires auth
18 | * **password** > Optional - Password if the proxy requires auth
19 | * **rdns** > Optional - Whether to use remote or local resolve, default remote
20 | 


--------------------------------------------------------------------------------
/docs/report/report_status.md:
--------------------------------------------------------------------------------
 1 | # Internal Status Report
 2 | 
 3 | Telegram Explorer allow you to generate HTML report containing messages, assets (images, videos, binaries, etc) from groups. Also, you may specify groups, period and message filters to generate a more customized report.
 4 | 
 5 | **Full Command:**
 6 | 
 7 | ```bash
 8 | python3 -m TEx stats --config CONFIGURATION_FILE_PATH --report_folder REPORT_FOLDER_PATH --limit_days 3
 9 | ```
10 | 
11 | **Parameters**
12 | 
13 |   * **config** > Required - Created Configuration File Path
14 |   * **report_folder** > Required - Defines the Report Files Folder
15 |   * **limit_days** > Optional - Number of Days of past to filter the Report
16 | 
17 | *Output Example:*
18 | ![report_stats.png](../media/report_stats.png)


--------------------------------------------------------------------------------
/docs/finder/finder_catchall.md:
--------------------------------------------------------------------------------
 1 | # Message Finder System - Catch All Messages
 2 | 
 3 | **Compatibility:** Message Listener Command
 4 | 
 5 | Telegram Explorer allows to catch all messages and redirect to one or more notifications connectior. 
 6 | 
 7 | **Configuration Spec:**
 8 | 
 9 | For each rule to be used, you must set a configuration using the default name schema *FINDER.RULE.<RULE_NAME>*
10 | 
11 | **Parameters:**
12 | 
13 |   * **type** > Required - Fixed Value 'all'
14 |   * **notifier** > Required - Name of notifiers to be used to notify the triggered message (comma separated).
15 | 
16 | **Changes on Configuration File**
17 | ```ini
18 | [FINDER]
19 | enabled=true
20 | 
21 | [FINDER.RULE.CatchAll]
22 | type=all
23 | notifier=NOTIFIER.ELASTIC_SEARCH.GENERAL
24 | ```


--------------------------------------------------------------------------------
/tests/core/ocr/unitest_echo_ocr_engine.py:
--------------------------------------------------------------------------------
 1 | """Echo OCR Engine for Unittest only."""
 2 | from configparser import SectionProxy
 3 | from typing import Optional
 4 | 
 5 | from TEx.core.ocr.ocr_engine_base import OcrEngineBase
 6 | 
 7 | 
 8 | class UnitTestEchoOcrEngine(OcrEngineBase):
 9 |     """Dummy OCR Engine."""
10 | 
11 |     def __init__(self, echo_message: Optional[str]) -> None:
12 |         """Echo OCR Engine for Unittest only."""
13 |         super().__init__()
14 |         self.echo: Optional[str] = echo_message
15 | 
16 |     def configure(self, config: Optional[SectionProxy]) -> None:
17 |         """Configure Dummy Engine."""
18 |         pass
19 | 
20 |     def run(self, file_path: str) -> Optional[str]:
21 |         """Do Nothing."""
22 |         return self.echo
23 | 


--------------------------------------------------------------------------------
/TEx/__main__.py:
--------------------------------------------------------------------------------
 1 | """Main Executor for python -m TEx."""
 2 | 
 3 | import sys
 4 | import os
 5 | 
 6 | # If we are running from a wheel, add the wheel to sys.path
 7 | if __package__ == "TEx":
 8 | 
 9 |     # __file__ is OSIx/__main__.py
10 |     # first dirname call strips of '/__main__.py'
11 |     # Resulting path is the name of the wheel itself
12 |     # Add that to sys.path so we can import pip
13 |     path = os.path.dirname(__file__)
14 |     sys.path.insert(0, path)
15 |     os.chdir(os.path.dirname(__file__))
16 | 
17 | if __name__ == "__main__":
18 |     # Work around the error reported in #9540, pending a proper fix.
19 |     # Note: It is essential the warning filter is set *before* importing
20 |     #       pip, as the deprecation happens at import time, not runtime.
21 |     from TEx.runner import TelegramMonitorRunner
22 |     sys.exit(TelegramMonitorRunner().main())
23 | 


--------------------------------------------------------------------------------
/tests/finder/test_all_messages_finder.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import unittest
 3 | from configparser import ConfigParser
 4 | 
 5 | from TEx.finder.all_messages_finder import AllMessagesFinder
 6 | 
 7 | 
 8 | class AllMessagesFinderTest(unittest.TestCase):
 9 | 
10 |     def setUp(self) -> None:
11 |         self.config = ConfigParser()
12 |         self.config.read('../../config.ini')
13 | 
14 |     def test_find_true(self):
15 |         """Test the always true return."""
16 | 
17 |         target: AllMessagesFinder = AllMessagesFinder(config=self.config)
18 | 
19 |         loop = asyncio.get_event_loop()
20 |         tasks = target.find(raw_text='foo'), target.find(raw_text=None)
21 | 
22 |         h_result_content, h_result_none = loop.run_until_complete(
23 |             asyncio.gather(*tasks)
24 |         )
25 | 
26 |         self.assertTrue(h_result_content)
27 |         self.assertTrue(h_result_none)
28 | 
29 | 


--------------------------------------------------------------------------------
/TEx/models/facade/finder_notification_facade_entity.py:
--------------------------------------------------------------------------------
 1 | """Facade Entities for Finder e Notification Engine Modules."""
 2 | from __future__ import annotations
 3 | 
 4 | from datetime import datetime
 5 | from typing import Optional
 6 | 
 7 | from pydantic import BaseModel, ConfigDict
 8 | 
 9 | from TEx.models.facade.media_handler_facade_entity import MediaHandlingEntity
10 | 
11 | 
12 | class FinderNotificationMessageEntity(BaseModel):
13 |     """Facade Entity for Finder and Notification."""
14 | 
15 |     model_config = ConfigDict(extra='forbid')
16 | 
17 |     date_time: datetime
18 |     raw_text: str
19 |     group_name: Optional[str]
20 |     group_id: Optional[int]
21 |     from_id: Optional[int]
22 |     to_id: Optional[int]
23 |     reply_to_msg_id: Optional[int]
24 |     message_id: Optional[int]
25 |     is_reply: Optional[bool]
26 |     downloaded_media_info: Optional[MediaHandlingEntity]
27 |     found_on: str
28 | 


--------------------------------------------------------------------------------
/docs/maintenance/purge_old_data.md:
--------------------------------------------------------------------------------
 1 | # Maintenance - Purge Old Data
 2 | 
 3 | As any system or application that uses a database to store information, Telegram Explorer needs, eventually, database maintenance to ensure proper work and remove old data.
 4 | 
 5 | Our maintenance command purge all old messages and media from database and filesystem.
 6 | n messages.
 7 | 
 8 | > NOTE: While other commands can be executed side-by-side, or, simultaneously, the 'purge_old_data' command needs to be executed alone, so, stop all TeX instances that uses the same configuration file, specially the 'listen' command before perform the maintenance.
 9 | 
10 | **Full Command:**
11 | 
12 | ```bash
13 | python3 -m TEx purge_old_data --config CONFIGURATION_FILE_PATH --limit_days 30
14 | ```
15 | **Parameters**
16 | 
17 |   * **config** > Required - Created Configuration File Path
18 |   * **limit_days** > Optional - Number of Days of past to remove the messages and files.
19 | 


--------------------------------------------------------------------------------
/TEx/database/db_initializer.py:
--------------------------------------------------------------------------------
 1 | """TEx Database Initializer."""
 2 | from TEx.database.db_manager import DbManager
 3 | from TEx.database.db_migration import DatabaseMigrator
 4 | from TEx.models.database.telegram_db_model import TelegramDataBaseDeclarativeBase
 5 | from TEx.models.database.temp_db_models import TempDataBaseDeclarativeBase
 6 | 
 7 | 
 8 | class DbInitializer:
 9 |     """Central Database Initializer."""
10 | 
11 |     @staticmethod
12 |     def init(data_path: str) -> None:
13 |         """Initialize DB and Structure."""
14 |         # Initialize Main DB
15 |         DbManager.init_db(data_path=data_path)
16 | 
17 |         # Initialize Main DB
18 |         TempDataBaseDeclarativeBase.metadata.create_all(DbManager.SQLALCHEMY_BINDS['temp'], checkfirst=True)
19 |         TelegramDataBaseDeclarativeBase.metadata.create_all(DbManager.SQLALCHEMY_BINDS['data'], checkfirst=True)
20 | 
21 |         # Migrations
22 |         DatabaseMigrator.apply_migrations()
23 | 


--------------------------------------------------------------------------------
/TEx/modules/temp_file_manager.py:
--------------------------------------------------------------------------------
 1 | """Temporary Files Manager."""
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | from configparser import ConfigParser
 6 | from typing import Dict
 7 | 
 8 | from TEx.core.base_module import BaseModule
 9 | from TEx.core.temp_file import TempFileHandler
10 | 
11 | logger = logging.getLogger('TelegramExplorer')
12 | 
13 | 
14 | class TempFileManager(BaseModule):
15 |     """Temporary File Manager."""
16 | 
17 |     async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool:
18 |         """
19 |         Abstract Method for Module Activation Function.
20 | 
21 |         :return:
22 |         """
23 |         return True
24 | 
25 |     async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None:
26 |         """Execute Module."""
27 |         if args['purge_temp_files']:
28 |             TempFileHandler.purge()
29 | 
30 |         else:
31 |             TempFileHandler.remove_expired_entries()
32 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | plugins                     = sqlalchemy.ext.mypy.plugin, pydantic.mypy
 3 | 
 4 | ignore_missing_imports      = True
 5 | 
 6 | check_untyped_defs          = True
 7 | 
 8 | disallow_any_explicit       = True
 9 | disallow_any_unimported     = False
10 | disallow_any_expr           = False
11 | disallow_any_decorated      = False
12 | disallow_any_generics       = False
13 | disallow_subclassing_any    = True
14 | disallow_untyped_calls      = True
15 | disallow_untyped_defs       = True
16 | disallow_incomplete_defs    = True
17 | disallow_untyped_decorators = False
18 | 
19 | warn_redundant_casts    = True
20 | warn_unused_ignores     = True
21 | warn_no_return          = True
22 | warn_return_any         = True
23 | warn_unreachable        = True
24 | 
25 | namespace_packages      = True
26 | 
27 | follow_imports = normal
28 | 
29 | files = TEx/**/*.py
30 | 
31 | [pydantic-mypy]
32 | init_forbid_extra = True
33 | init_typed = True
34 | warn_required_dynamic_aliases = True
35 | 


--------------------------------------------------------------------------------
/TEx/core/media_metadata_handling/geo_handler.py:
--------------------------------------------------------------------------------
 1 | """Geo Media Handler."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict, Optional
 5 | 
 6 | from telethon.tl.patched import Message
 7 | from telethon.tl.types import MessageMediaGeo
 8 | 
 9 | 
10 | class GeoMediaHandler:
11 |     """Geo Media Handler."""
12 | 
13 |     @staticmethod
14 |     def handle_metadata(message: Message) -> Optional[Dict]:
15 |         """Handle Media Metadata."""
16 |         # Get Media
17 |         geo: MessageMediaGeo = message.geo
18 | 
19 |         # Create Data Dict
20 |         return {
21 |             'file_name': 'geo.bin',
22 | 
23 |             'telegram_id': None,
24 |             'extension': None,
25 |             'height': None,
26 |             'width': None,
27 |             'date_time': None,
28 |             'mime_type': 'application/vnd.geo',
29 |             'size_bytes': None,
30 |             'title': f'{geo.lat}|{geo.long}',
31 |             'name': None,
32 |             }
33 | 


--------------------------------------------------------------------------------
/TEx/core/media_metadata_handling/photo_handler.py:
--------------------------------------------------------------------------------
 1 | """Photo Media Handler."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict, Optional
 5 | 
 6 | from telethon.tl.patched import Message
 7 | from telethon.tl.types import MessageMediaPhoto
 8 | 
 9 | 
10 | class PhotoMediaHandler:
11 |     """Photo Media Handler."""
12 | 
13 |     @staticmethod
14 |     def handle_metadata(message: Message) -> Optional[Dict]:
15 |         """Handle Media Metadata."""
16 |         media: MessageMediaPhoto = message.media
17 | 
18 |         return {
19 |             'file_name': f'photo{message.file.ext}',
20 |             'telegram_id': media.photo.id,
21 |             'extension': message.file.ext,
22 |             'height': message.file.height,
23 |             'width': message.file.width,
24 |             'date_time': media.photo.date,
25 |             'mime_type': message.file.mime_type,
26 |             'size_bytes': message.file.size,
27 |             'title': None,
28 |             'name': None,
29 |             }
30 | 


--------------------------------------------------------------------------------
/TEx/core/media_metadata_handling/pdf_handler.py:
--------------------------------------------------------------------------------
 1 | """PDF Media Handler."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict, Optional
 5 | 
 6 | from telethon.tl.patched import Message
 7 | from telethon.tl.types import DocumentAttributeFilename, MessageMediaPhoto
 8 | 
 9 | 
10 | class PdfMediaHandler:
11 |     """Photo Media Handler."""
12 | 
13 |     @staticmethod
14 |     def handle_metadata(message: Message) -> Optional[Dict]:
15 |         """Handle Media Metadata."""
16 |         media: MessageMediaPhoto = message.media
17 | 
18 |         return {
19 |             'file_name': [item for item in media.document.attributes if isinstance(item, DocumentAttributeFilename)][0].file_name,
20 |             'telegram_id': media.document.id,
21 |             'extension': None,
22 |             'height': None,
23 |             'width': None,
24 |             'date_time': media.document.date,
25 |             'mime_type': media.document.mime_type,
26 |             'size_bytes': media.document.size,
27 |             'title': None,
28 |             'name': None,
29 |             }
30 | 


--------------------------------------------------------------------------------
/TEx/finder/regex_finder.py:
--------------------------------------------------------------------------------
 1 | """Regex Finder."""
 2 | from __future__ import annotations
 3 | 
 4 | import re
 5 | from configparser import SectionProxy
 6 | from typing import List
 7 | 
 8 | from TEx.finder.base_finder import BaseFinder
 9 | 
10 | 
11 | class RegexFinder(BaseFinder):
12 |     """Regex Based Finder."""
13 | 
14 |     def __init__(self, config: SectionProxy) -> None:
15 |         """Initialize RegEx Finder."""
16 |         raw_regex_content: str = config['regex']
17 |         regex_conf_list: List[str] = [
18 |             item for item in raw_regex_content.split('\n') if item and item != ''
19 |         ] if '\n' in raw_regex_content else [raw_regex_content]
20 | 
21 |         self.regex_patterns: List[re.Pattern] = [
22 |             re.compile(regex_conf, flags=re.IGNORECASE | re.MULTILINE) for regex_conf in regex_conf_list
23 |         ]
24 | 
25 |     async def find(self, raw_text: str) -> bool:
26 |         """Apply Find Logic."""
27 |         if not raw_text or len(raw_text) == 0:
28 |             return False
29 | 
30 |         return any(len(pattern.findall(raw_text)) > 0 for pattern in self.regex_patterns)
31 | 


--------------------------------------------------------------------------------
/TEx/logging.conf:
--------------------------------------------------------------------------------
 1 | [loggers]
 2 | keys=root,sqlalchemy,TelegramExplorer,elasticsearch,elastic_transport.transport
 3 | 
 4 | #######################
 5 | 
 6 | [handlers]
 7 | keys=consoleHandler
 8 | 
 9 | #######################
10 | 
11 | [formatters]
12 | keys=simpleFormatter
13 | 
14 | #######################
15 | 
16 | [logger_root]
17 | level=INFO
18 | handlers=consoleHandler
19 | 
20 | [logger_elasticsearch]
21 | level=ERROR
22 | handlers=consoleHandler
23 | qualname=elasticsearch
24 | 
25 | [logger_elastic_transport.transport]
26 | level=ERROR
27 | handlers=consoleHandler
28 | qualname=elastic_transport.transport
29 | 
30 | [logger_TelegramExplorer]
31 | level=INFO
32 | handlers=consoleHandler
33 | qualname=TelegramExplorer
34 | propagate=0
35 | 
36 | [logger_sqlalchemy]
37 | level=ERROR
38 | handlers=consoleHandler
39 | qualname=''
40 | 
41 | #######################
42 | 
43 | [handler_consoleHandler]
44 | class=StreamHandler
45 | level=INFO
46 | formatter=simpleFormatter
47 | args=(sys.stdout,)
48 | 
49 | 
50 | #######################
51 | 
52 | [formatter_simpleFormatter]
53 | format=	%(asctime)s - %(levelname)s - %(message)s
54 | 


--------------------------------------------------------------------------------
/TEx/core/mapper/telethon_user_mapper.py:
--------------------------------------------------------------------------------
 1 | """Telethon User Entity Mapper."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict
 5 | 
 6 | from telethon.tl.types import User
 7 | 
 8 | 
 9 | class TelethonUserEntiyMapper:
10 |     """Telethon User Entity Mapper."""
11 | 
12 |     @staticmethod
13 |     def to_database_dict(member: User) -> Dict:
14 |         """Map Telethon User to TeX Dict to Insert on DB."""
15 |         # Build Model
16 |         value: Dict = {
17 |             'id': member.id,
18 |             'is_bot': member.bot,
19 |             'is_fake': member.fake,
20 |             'is_self': member.is_self,
21 |             'is_scam': member.scam,
22 |             'is_verified': member.verified,
23 |             'first_name': member.first_name,
24 |             'last_name': member.last_name,
25 |             'username': member.username,
26 |             'phone_number': member.phone,
27 |             'photo_id': None,  # Reserved for Future Version
28 |             'photo_base64': None,  # Reserved for Future Version
29 |             'photo_name': None,  # Reserved for Future Version
30 |             }
31 | 
32 |         return value
33 | 


--------------------------------------------------------------------------------
/TEx/core/media_metadata_handling/text_handler.py:
--------------------------------------------------------------------------------
 1 | """Plain Text Media Handler."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict, Optional
 5 | 
 6 | from telethon.tl.patched import Message
 7 | from telethon.tl.types import DocumentAttributeFilename, MessageMediaDocument
 8 | 
 9 | 
10 | class TextPlainHandler:
11 |     """Plain Text Media Handler - text/plain."""
12 | 
13 |     @staticmethod
14 |     def handle_metadata(message: Message) -> Optional[Dict]:
15 |         """Handle Media Metadata."""
16 |         media: MessageMediaDocument = message.media
17 |         return {
18 |             'file_name':
19 |                 [item for item in message.media.document.attributes if isinstance(item, DocumentAttributeFilename)][
20 |                     0].file_name,
21 |             'telegram_id': media.document.id,
22 |             'extension': None,
23 |             'height': None,
24 |             'width': None,
25 |             'date_time': media.document.date,
26 |             'mime_type': media.document.mime_type,
27 |             'size_bytes': media.document.size,
28 |             'title': None,
29 |             'name': None,
30 |             }
31 | 


--------------------------------------------------------------------------------
/TEx/modules/execution_configuration_handler.py:
--------------------------------------------------------------------------------
 1 | """Execution Configuration Loader."""
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | import os.path
 6 | from configparser import ConfigParser
 7 | from typing import Dict
 8 | 
 9 | from TEx.core.base_module import BaseModule
10 | 
11 | logger = logging.getLogger('TelegramExplorer')
12 | 
13 | 
14 | class ExecutionConfigurationHandler(BaseModule):
15 |     """Module That Handle the Input Arguments."""
16 | 
17 |     async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool:
18 |         """
19 |         Abstract Method for Module Activation Function.
20 | 
21 |         :return:
22 |         """
23 |         return True
24 | 
25 |     async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None:
26 |         """Load Configuration for Execution."""
27 |         logger.info('[*] Loading Execution Configurations:')
28 | 
29 |         if not os.path.exists(args['config']):
30 |             logger.fatal(f'[?] CONFIGURATION FILE NOT FOUND AT \"{args["config"]}\"')
31 |             data['internals']['panic'] = True
32 |             return
33 | 
34 |         config.read(args['config'])
35 | 


--------------------------------------------------------------------------------
/TEx/core/media_metadata_handling/generic_binary_handler.py:
--------------------------------------------------------------------------------
 1 | """Generic Binary Media Handler."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict, List, Optional
 5 | 
 6 | from telethon.tl.patched import Message
 7 | from telethon.tl.types import DocumentAttributeFilename, MessageMediaDocument
 8 | 
 9 | 
10 | class GenericBinaryMediaHandler:
11 |     """Generic Binary Media Handler."""
12 | 
13 |     @staticmethod
14 |     def handle_metadata(message: Message) -> Optional[Dict]:
15 |         """Handle Media Metadata."""
16 |         media: MessageMediaDocument = message.media
17 |         fn_attr: List = [item for item in media.document.attributes if isinstance(item, DocumentAttributeFilename)]
18 | 
19 |         return {
20 |             'file_name': fn_attr[0].file_name if len(fn_attr) > 0 else 'unknow.bin',
21 |             'telegram_id': media.document.id,
22 |             'extension': None,
23 |             'height': None,
24 |             'width': None,
25 |             'date_time': media.document.date,
26 |             'mime_type': media.document.mime_type,
27 |             'size_bytes': media.document.size,
28 |             'title': None,
29 |             'name': None,
30 |         }
31 | 


--------------------------------------------------------------------------------
/TEx/modules/database_handler.py:
--------------------------------------------------------------------------------
 1 | """Database Handler."""
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | import os
 6 | from configparser import ConfigParser
 7 | from typing import Dict
 8 | 
 9 | from TEx.core.base_module import BaseModule
10 | from TEx.core.temp_file import TempFileHandler
11 | from TEx.database.db_initializer import DbInitializer
12 | 
13 | logger = logging.getLogger('TelegramExplorer')
14 | 
15 | 
16 | class DatabaseHandler(BaseModule):
17 |     """Module That Handle the Internal DB."""
18 | 
19 |     async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool:
20 |         """
21 |         Abstract Method for Module Activation Function.
22 | 
23 |         :return:
24 |         """
25 |         return True
26 | 
27 |     async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None:
28 |         """Execute."""
29 |         if not os.path.exists(config['CONFIGURATION']['data_path']):
30 |             os.mkdir(config['CONFIGURATION']['data_path'])
31 | 
32 |         # Initialize DB
33 |         DbInitializer.init(config['CONFIGURATION']['data_path'])
34 | 
35 |         # Expire Temp Files
36 |         TempFileHandler.remove_expired_entries()
37 | 


--------------------------------------------------------------------------------
/docs/configuration/media_download_configuration.md:
--------------------------------------------------------------------------------
 1 | # Media Download - Configuration
 2 | 
 3 | You can customize (fully enabled, disable or selective enable) media download, just specify these settings on  configuration file.
 4 | 
 5 | **Enable / Disable Default Media Download Behaviour**
 6 | ```ini
 7 | [MEDIA.DOWNLOAD]
 8 | default=ALLOW
 9 | max_download_size_bytes=256000000
10 | ```
11 | 
12 | * **default** > Required - Set the default behaviour. Enable (ALLOW) of Disable (DISALLOW)
13 | * **max_download_size_bytes** > Optional - Max download size for all medias in bytes
14 |      * Default: 256000000
15 | 
16 | **Per Media Setting**
17 | Use *MEDIA.DOWNLOAD.<content-type>* to specify the settings for each individual content-type.
18 | ```ini
19 | [MEDIA.DOWNLOAD.<content-type>]
20 | enabled=ALLOW
21 | max_download_size_bytes=256000000
22 | groups=*
23 | ```
24 | 
25 | * **enabled** > Required - Enable/Disable this Content-Type download. Enable (ALLOW) of Disable (DISALLOW)
26 | * **max_download_size_bytes** > Optional - Max download size for this Content-Type
27 |     * Default: 256000000
28 | * **groups** > Optional - If present, Download the Messages only from Specified Groups ID's. Comma Separated. For All Groups, use *
29 |     * Default: * 
30 | 


--------------------------------------------------------------------------------
/TEx/models/database/temp_db_models.py:
--------------------------------------------------------------------------------
 1 | """Temporary Data Model."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Optional
 5 | 
 6 | from sqlalchemy import Integer, String
 7 | from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
 8 | 
 9 | 
10 | class TempDataBaseDeclarativeBase(DeclarativeBase):
11 |     """Global Temporary Declarative Base."""
12 | 
13 | 
14 | class TempDataOrmEntity(TempDataBaseDeclarativeBase):
15 |     """Temporary Data ORM Model."""
16 | 
17 |     __bind_key__ = 'temp'
18 |     __tablename__ = 'temporary_data'
19 | 
20 |     path: Mapped[str] = mapped_column(String(255), primary_key=True)
21 |     module: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
22 |     data: Mapped[str] = mapped_column(String)
23 |     created_at: Mapped[Integer] = mapped_column(Integer)
24 |     valid_at: Mapped[Integer] = mapped_column(Integer)
25 | 
26 | 
27 | class StateFileOrmEntity(TempDataBaseDeclarativeBase):
28 |     """Temporary Data ORM Model."""
29 | 
30 |     __bind_key__ = 'temp'
31 |     __tablename__ = 'state_file'
32 | 
33 |     path: Mapped[str] = mapped_column(String(255), primary_key=True)
34 |     data: Mapped[str] = mapped_column(String)
35 |     created_at: Mapped[Integer] = mapped_column(Integer)
36 | 


--------------------------------------------------------------------------------
/TEx/models/facade/media_handler_facade_entity.py:
--------------------------------------------------------------------------------
 1 | """Facade Entities for Media Handling."""
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class MediaHandlingEntity(BaseModel):
 7 |     """Facade Entities for Media Handling."""
 8 | 
 9 |     media_id: int
10 |     file_name: str
11 |     content_type: str
12 |     size_bytes: int
13 |     disk_file_path: str
14 |     is_ocr_supported: bool
15 | 
16 |     def is_image(self) -> bool:
17 |         """Return if Downloaded Image are an Image."""
18 |         return self.content_type in ['image/gif', 'image/jpeg', 'image/png', 'image/webp', 'application/gif']
19 | 
20 |     def is_video(self) -> bool:
21 |         """Return if Downloaded Image are a Video."""
22 |         return self.content_type in ['application/ogg', 'video/mp4', 'video/quicktime', 'video/webm']
23 | 
24 |     def allow_search_in_text_file(self) -> bool:
25 |         """Return if Allow to Find in the Text File."""
26 |         return self.content_type in [
27 |             'application/atom+xml',
28 |             'application/bittorrent',
29 |             'application/csv',
30 |             'application/html',
31 |             'application/json',
32 |             'application/ld+json',
33 |             'text/csv',
34 |             'text/html',
35 |             'text/plain',
36 |             'text/xml',
37 |         ]
38 | 


--------------------------------------------------------------------------------
/docs/report/report_html.md:
--------------------------------------------------------------------------------
 1 | # Generate Report - HTML
 2 | 
 3 | Telegram Explorer exports a internal status report containing statistics about message and users count for each group, also a media info with size and content-type.
 4 | 
 5 | **Full Command:**
 6 | 
 7 | ```bash
 8 | python3 -m TEx export_text --config CONFIGURATION_FILE_PATH --order_desc --limit_days 3 --regex REGEX --report_folder REPORT_FOLDER_PATH --group_id 12547,1256698
 9 | ```
10 | 
11 | **Basic Command:**
12 | 
13 | ```bash
14 | python3 -m TEx export_text --config CONFIGURATION_FILE_PATH --limit_days 3 --regex REGEX --report_folder REPORT_FOLDER_PATH
15 | ```
16 | **Parameters**
17 | 
18 |   * **config** > Required - Created Configuration File Path
19 |   * **report_folder** > Required - Defines the Report Files Folder
20 |   * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's
21 |   * **limit_days** > Optional - Number of Days of past to filter the Messages
22 |   * **regex** > Required - Regex to find the messages. 
23 |     * Ex: Export Links from Messages (.\*http://.\*),(.\*https://.\*)
24 | 
25 | *Output Example Using "*(.\*http://.\*),(.\*https://.\*)*" Regular Expression:*
26 | 
27 | *Report Folder*
28 | ![text_report_files.png](../media/text_report_files.png)
29 | 
30 | *File Content*
31 | ![text_report_content.png](../media/text_report_content.png)


--------------------------------------------------------------------------------
/TEx/core/media_metadata_handling/sticker_handler.py:
--------------------------------------------------------------------------------
 1 | """Sticker Media Handler."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict, List, Optional
 5 | 
 6 | from telethon.tl.patched import Message
 7 | from telethon.tl.types import DocumentAttributeFilename, DocumentAttributeImageSize, MessageMediaDocument
 8 | 
 9 | 
10 | class MediaStickerHandler:
11 |     """Sticker Media Handler - application/x-tgsticker."""
12 | 
13 |     @staticmethod
14 |     def handle_metadata(message: Message) -> Optional[Dict]:
15 |         """Handle Media Metadata."""
16 |         media: MessageMediaDocument = message.media
17 |         fn_attr_img: List = [item for item in media.document.attributes if isinstance(item, DocumentAttributeImageSize)]
18 | 
19 |         return {
20 |             'file_name': [item for item in message.media.document.attributes if isinstance(item, DocumentAttributeFilename)][0].file_name,
21 |             'telegram_id': media.document.id,
22 |             'extension': None,
23 |             'height': fn_attr_img[0].h if len(fn_attr_img) > 0 else None,
24 |             'width': fn_attr_img[0].w if len(fn_attr_img) > 0 else None,
25 |             'date_time': media.document.date,
26 |             'mime_type': media.document.mime_type,
27 |             'size_bytes': media.document.size,
28 |             'title': None,
29 |             'name': None,
30 |             }
31 | 


--------------------------------------------------------------------------------
/TEx/core/media_metadata_handling/mp4_handler.py:
--------------------------------------------------------------------------------
 1 | """MP4 Media Handler."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict, List, Optional
 5 | 
 6 | from telethon.tl.patched import Message
 7 | from telethon.tl.types import DocumentAttributeFilename, DocumentAttributeVideo, MessageMediaDocument
 8 | 
 9 | 
10 | class MediaMp4Handler:
11 |     """MP4 Media Handler - video/mp4."""
12 | 
13 |     @staticmethod
14 |     def handle_metadata(message: Message) -> Optional[Dict]:
15 |         """Handle Media Metadata."""
16 |         media: MessageMediaDocument = message.media
17 |         fn_attr: List = [item for item in media.document.attributes if isinstance(item, DocumentAttributeFilename)]
18 |         fn_attr_vid: List = [item for item in media.document.attributes if isinstance(item, DocumentAttributeVideo)]
19 | 
20 |         return {
21 |             'file_name': fn_attr[0].file_name if len(fn_attr) > 0 else 'unknow.mp4',
22 |             'telegram_id': media.document.id,
23 |             'extension': None,
24 |             'height': fn_attr_vid[0].h if len(fn_attr_vid) > 0 else None,
25 |             'width': fn_attr_vid[0].w if len(fn_attr_vid) > 0 else None,
26 |             'date_time': media.document.date,
27 |             'mime_type': media.document.mime_type,
28 |             'size_bytes': media.document.size,
29 |             'title': None,
30 |             'name': None,
31 |             }
32 | 


--------------------------------------------------------------------------------
/docs/report/report_text.md:
--------------------------------------------------------------------------------
 1 | # Generate Report - Text
 2 | 
 3 | Telegram Explorer allow you to export Text content based on Regular Expression Extractors.
 4 | 
 5 | This way, you can generate simple outputs containing any type of information present on messages.
 6 | 
 7 | **Full Command:**
 8 | 
 9 | ```bash
10 | python3 -m TEx export_text --config CONFIGURATION_FILE_PATH --order_desc --limit_days 3 --regex REGEX_CAPTURE_GROUP --report_folder REPORT_FOLDER_PATH --group_id 12547,1256698
11 | ```
12 | 
13 | **Basic Command:**
14 | 
15 | ```bash
16 | python3 -m TEx export_text --config CONFIGURATION_FILE_PATH --limit_days 3 --regex REGEX_CAPTURE_GROUP --report_folder REPORT_FOLDER_PATH
17 | ```
18 | **Parameters**
19 | 
20 |   * **config** > Required - Created Configuration File Path
21 |   * **report_folder** > Required - Defines the Report Files Folder
22 |   * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's
23 |   * **limit_days** > Optional - Number of Days of past to filter the Messages
24 |   * **regex** > Required - Regex Capture Group to find the messages. 
25 |     * Ex: Export Links from Messages (http[s]?:\/\/[^\"\',]*)
26 | 
27 | *Output Example Using "(http[s]?:\/\/[^\"\',]*)" Regular Expression:*
28 | 
29 | *Report Folder*
30 | ![text_report_files.png](../media/text_report_files.png)
31 | 
32 | *File Content*
33 | ![text_report_content.png](../media/text_report_content.png)


--------------------------------------------------------------------------------
/.github/workflows/cy.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 | 
 6 |     paths-ignore:
 7 |       - '**/*.md'
 8 |       - '**/docs/*.*'
 9 |       - '**/.github/workflows/deploy.yml'
10 |       - '**/.github/workflows/publish.yml'
11 | 
12 |   pull_request:
13 |     branches:
14 |       - V*-dev
15 |       - main
16 | 
17 |     paths-ignore:
18 |       - '**/*.md'
19 |       - '**/docs/*.md'
20 | 
21 | jobs:
22 |   CodeQuality:
23 |     runs-on: ubuntu-latest
24 | 
25 |     steps:
26 |     - uses: actions/checkout@v2
27 |     - name: Set up Python 3.8
28 |       uses: actions/setup-python@v2
29 |       with:
30 |         python-version: 3.8
31 | 
32 |     - name: Install dependencies
33 |       run: |
34 |         python -m pip install --upgrade pip
35 |         python -m pip install -r requirements.txt
36 | 
37 |     - name: Run Code Quality
38 |       run: |
39 |         tox -e quality
40 | 
41 |   TestsAndCodeCoverage:
42 |     runs-on: ubuntu-latest
43 |     needs: CodeQuality
44 | 
45 |     steps:
46 |     - uses: actions/checkout@v2
47 |     - name: Set up Python 3.8
48 |       uses: actions/setup-python@v2
49 |       with:
50 |         python-version: 3.8
51 | 
52 |     - name: Install dependencies
53 |       run: |
54 |         python -m pip install --upgrade pip
55 |         python -m pip install -r requirements.txt
56 | 
57 |     - name: Run Unittests and Code Coverage
58 |       run: |
59 |         tox -e coverage
60 | 


--------------------------------------------------------------------------------
/TEx/modules/data_structure_handler.py:
--------------------------------------------------------------------------------
 1 | """Database Handler."""
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | import os
 6 | from configparser import ConfigParser
 7 | from typing import Dict
 8 | 
 9 | from TEx.core.base_module import BaseModule
10 | from TEx.core.dir_manager import DirectoryManagerUtils
11 | 
12 | logger = logging.getLogger('TelegramExplorer')
13 | 
14 | 
15 | class DataStructureHandler(BaseModule):
16 |     """Handle the Basic Directory Structure."""
17 | 
18 |     async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool:
19 |         """
20 |         Abstract Method for Module Activation Function.
21 | 
22 |         :return:
23 |         """
24 |         return 'data_path' in args
25 | 
26 |     async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None:
27 |         """Execute."""
28 |         if not await self.can_activate(config, args, data):
29 |             return
30 | 
31 |         DirectoryManagerUtils.ensure_dir_struct(os.path.join(args['data_path'], 'export'))
32 |         DirectoryManagerUtils.ensure_dir_struct(os.path.join(args['data_path'], 'download'))
33 |         DirectoryManagerUtils.ensure_dir_struct(os.path.join(args['data_path'], 'profile_pic'))
34 |         DirectoryManagerUtils.ensure_dir_struct(os.path.join(args['data_path'], 'media'))
35 |         DirectoryManagerUtils.ensure_dir_struct(os.path.join(args['data_path'], 'session'))
36 | 


--------------------------------------------------------------------------------
/TEx/core/media_download_handling/std_media_downloader.py:
--------------------------------------------------------------------------------
 1 | """Standard Media Downloader."""
 2 | from __future__ import annotations
 3 | 
 4 | import os
 5 | from typing import Dict, List
 6 | 
 7 | from telethon.tl.patched import Message
 8 | 
 9 | 
10 | class StandardMediaDownloader:
11 |     """Standard Media Downloader."""
12 | 
13 |     @staticmethod
14 |     async def download(message: Message, media_metadata: Dict, data_path: str) -> None:
15 |         """Download the Media and Update MetadaInfo.
16 | 
17 |         :param message:
18 |         :param media_metadata:
19 |         :return:
20 |         """
21 |         if not media_metadata:
22 |             return
23 | 
24 |         # Download Media
25 |         target_path: str = os.path.join(data_path, StandardMediaDownloader.__sanitize_media_filename(media_metadata['file_name']))
26 |         generated_path: str = await message.download_media(target_path)
27 |         media_metadata['extension'] = os.path.splitext(generated_path)[1]
28 | 
29 |     @staticmethod
30 |     def __sanitize_media_filename(filename: str) -> str:
31 |         """Sanitize Media Filename."""
32 |         sanit_charts: List[str] = [char for char in filename if not char.isalpha() and char != ' ' and not char.isalnum() and char != '.' and char != '-']
33 |         h_result: str = filename
34 | 
35 |         for sanit_item in sanit_charts:
36 |             h_result = h_result.replace(sanit_item, '_')
37 | 
38 |         return h_result
39 | 


--------------------------------------------------------------------------------
/docs/report/report_export_files.md:
--------------------------------------------------------------------------------
 1 | # Export Files
 2 | 
 3 | Telegram Explorer also allow you to export all downloaded files from all groups. It is important to highlight that the export process automatically prevents duplicate export of files based on their md5 hash signature.
 4 | 
 5 | This feature is especially usefully for malware analysis and video content analysis.
 6 | 
 7 | **Full Command:**
 8 | 
 9 | ```bash
10 | python3 -m TEx export_file --config CONFIGURATION_FILE_PATH -report_folder REPORT_FOLDER_PATH --group_id * --filter * --limit_days 3 --mime_type text/plain
11 | ```
12 | 
13 | **Basic Command:**
14 | ```bash
15 | python3 -m TEx export_file --config CONFIGURATION_FILE_PATH -report_folder REPORT_FOLDER_PATH --group_id * --limit_days 3 --mime_type text/plain
16 | ```
17 | 
18 | **Parameters**
19 | 
20 |   * **config** > Required - Created Configuration File Path
21 |   * **report_folder** > Required - Defines the Report Files Folder
22 |   * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's
23 |   * **filter** > Optional - Simple (Comma Separated) FileName String Terms Filter. Ex: malware, "Bot net"
24 |   * **limit_days** > Optional - Number of Days of past to filter the Messages
25 |   * **mime_type** > Optional - File MIME Type. Ex: application/vnd.android.package-archive
26 |     
27 | *Output Example Using "application/vnd.android.package-archive" as mime_type*
28 | 
29 | ![export_files_list.png](../media/export_files_list.png)
30 | 


--------------------------------------------------------------------------------
/TEx/core/media_metadata_handling/webimage_handler.py:
--------------------------------------------------------------------------------
 1 | """Web Image Media Handler."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict, List, Optional
 5 | 
 6 | from telethon.tl.patched import Message
 7 | from telethon.tl.types import DocumentAttributeFilename, DocumentAttributeImageSize, MessageMediaDocument
 8 | 
 9 | 
10 | class WebImageStickerHandler:
11 |     """Web Image Media Handler - image/webp."""
12 | 
13 |     @staticmethod
14 |     def handle_metadata(message: Message) -> Optional[Dict]:
15 |         """Handle Media Metadata."""
16 |         media: MessageMediaDocument = message.media
17 | 
18 |         fn_attr: List = [item for item in message.media.document.attributes if isinstance(item, DocumentAttributeFilename)]
19 | 
20 |         if not fn_attr or len(fn_attr) == 0:
21 |             return None
22 | 
23 |         return {
24 |             'file_name': fn_attr[0].file_name,
25 |             'telegram_id': media.document.id,
26 |             'extension': None,
27 |             'height': [item for item in message.media.document.attributes if isinstance(item, DocumentAttributeImageSize)][0].h,
28 |             'width': [item for item in message.media.document.attributes if isinstance(item, DocumentAttributeImageSize)][0].w,
29 |             'date_time': media.document.date,
30 |             'mime_type': media.document.mime_type,
31 |             'size_bytes': media.document.size,
32 |             'title': None,
33 |             'name': None,
34 |             }
35 | 


--------------------------------------------------------------------------------
/tests/config.ini:
--------------------------------------------------------------------------------
 1 | [PIPELINE]
 2 | pre_pipeline_sequence   =   input_args_handler.InputArgsHandler
 3 |                             data_structure_handler.DataStructureHandler
 4 |                             database_handler.DatabaseHandler
 5 |                             temp_file_manager.TempFileManager
 6 |                             state_file_handler.LoadStateFileHandler
 7 | 
 8 | pipeline_sequence       =   telegram_connection_manager.TelegramConnector
 9 | 
10 |                             telegram_groups_scrapper.TelegramGroupScrapper
11 |                             telegram_groups_list.TelegramGroupList
12 |                             telegram_messages_scrapper.TelegramGroupMessageScrapper
13 |                             telegram_report_generator.telegram_report_sent_telegram.TelegramReportSentViaTelegram
14 | 
15 |                             telegram_connection_manager.TelegramDisconnector
16 | 
17 |                             telegram_report_generator.telegram_html_report_generator.TelegramReportGenerator
18 |                             telegram_report_generator.telegram_export_text_generator.TelegramExportTextGenerator
19 |                             telegram_report_generator.telegram_export_file_generator.TelegramExportFileGenerator
20 | 
21 | 
22 | post_pipeline_sequence  =   state_file_handler.SaveStateFileHandler
23 | 
24 | ########## Modules Config ##########
25 | 
26 | [MODULE_LoadStateFileHandler]
27 | file_name   = state/{0}.json
28 | 
29 | [MODULE_SaveStateFileHandler]
30 | file_name   = state/{0}.json
31 | 
32 | [Telegram]
33 | 


--------------------------------------------------------------------------------
/TEx/models/facade/telegram_message_report_facade_entity.py:
--------------------------------------------------------------------------------
 1 | """Facade Entity for Report Generation."""
 2 | from __future__ import annotations
 3 | 
 4 | import datetime
 5 | from typing import Optional
 6 | 
 7 | from TEx.models.database.telegram_db_model import TelegramMessageOrmEntity
 8 | 
 9 | 
10 | class TelegramMessageReportFacadeEntity:
11 |     """Facade Entity for Report Generation."""
12 | 
13 |     id: int
14 |     group_id: int
15 |     media_id: Optional[int]
16 | 
17 |     date_time: datetime.datetime
18 |     message: str
19 |     raw: str
20 | 
21 |     from_id: Optional[int]
22 |     from_type: Optional[str]
23 |     to_id: Optional[int]
24 | 
25 |     meta_next: bool
26 |     meta_previous: bool
27 | 
28 | 
29 | class TelegramMessageReportFacadeEntityMapper:
30 |     """Mapper for TelegramMessageReportFacadeEntity."""
31 | 
32 |     @staticmethod
33 |     def create_from_dbentity(source: TelegramMessageOrmEntity) -> TelegramMessageReportFacadeEntity:
34 |         """Map TelegramMessageOrmEntity to TelegramMessageReportFacadeEntity."""
35 |         h_result: TelegramMessageReportFacadeEntity = TelegramMessageReportFacadeEntity()
36 | 
37 |         h_result.id = source.id
38 |         h_result.group_id = source.group_id
39 |         h_result.media_id = source.media_id
40 |         h_result.date_time = source.date_time
41 |         h_result.message = source.message
42 |         h_result.raw = source.raw
43 |         h_result.from_id = source.from_id
44 |         h_result.from_type = source.from_type
45 |         h_result.to_id = source.to_id
46 | 
47 |         return h_result
48 | 


--------------------------------------------------------------------------------
/tests/modules/test_state_file_handler.py:
--------------------------------------------------------------------------------
 1 | """State File Handler Tests."""
 2 | import asyncio
 3 | import unittest
 4 | from configparser import ConfigParser
 5 | from typing import Dict
 6 | 
 7 | from TEx.modules.state_file_handler import LoadStateFileHandler, SaveStateFileHandler
 8 | from tests.modules.common import TestsCommon
 9 | 
10 | 
11 | class StateFileHandlerTest(unittest.TestCase):
12 | 
13 |     def setUp(self) -> None:
14 | 
15 |         self.config = ConfigParser()
16 |         self.config.read('config.ini')
17 | 
18 |         TestsCommon.basic_test_setup()
19 | 
20 |     def test_run(self):
21 | 
22 |         target_load: LoadStateFileHandler = LoadStateFileHandler()
23 |         target_save: SaveStateFileHandler = SaveStateFileHandler()
24 |         args: Dict = {'config': 'unittest_configfile.config'}
25 |         save_data: Dict = {'demo': 1, 'internals': {'panic': False}}
26 | 
27 |         TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=save_data)
28 | 
29 |         loop = asyncio.get_event_loop()
30 | 
31 |         loop.run_until_complete(
32 |             target_save.run(
33 |                 config=self.config,
34 |                 args=args,
35 |                 data=save_data
36 |             )
37 |         )
38 | 
39 |         load_data: Dict = {}
40 |         loop.run_until_complete(
41 |             target_load.run(
42 |                 config=self.config,
43 |                 args=args,
44 |                 data=load_data
45 |             )
46 |         )
47 | 
48 |         self.assertEqual(load_data, save_data)
49 | 
50 | 


--------------------------------------------------------------------------------
/TEx/core/mapper/keep_alive_entity_mapper.py:
--------------------------------------------------------------------------------
 1 | """Signal Entity Mapper."""
 2 | from __future__ import annotations
 3 | 
 4 | from configparser import SectionProxy
 5 | from typing import Optional
 6 | 
 7 | from TEx.models.facade.signal_entity_model import SignalEntity
 8 | 
 9 | 
10 | class SignalEntityMapper:
11 |     """Signal Entity Mapper."""
12 | 
13 |     @staticmethod
14 |     def to_entity(section_proxy: Optional[SectionProxy]) -> SignalEntity:
15 |         """Map the Configuration KEEP_ALIVE to Entity."""
16 |         # Build Model
17 |         if section_proxy:
18 |             return SignalEntity(
19 |                 enabled=section_proxy.get('enabled', fallback='false') == 'true',
20 |                 keep_alive_interval=int(section_proxy.get('keep_alive_interval', fallback='0')),
21 |                 notifiers={
22 |                     'KEEP-ALIVE': section_proxy.get('keep_alive_notifer', fallback='').split(','),
23 |                     'INITIALIZATION': section_proxy.get('initialization_notifer', fallback='').split(','),
24 |                     'SHUTDOWN': section_proxy.get('shutdown_notifer', fallback='').split(','),
25 |                     'NEW-GROUP': section_proxy.get('new_group_notifer', fallback='').split(','),
26 |                 },
27 |             )
28 | 
29 |         return SignalEntity(
30 |             enabled=False,
31 |             keep_alive_interval=300,
32 |             notifiers={
33 |                 'KEEP-ALIVE': [],
34 |                 'INITIALIZATION': [],
35 |                 'SHUTDOWN': [],
36 |                 'NEW-GROUP': [],
37 |             },
38 |         )
39 | 


--------------------------------------------------------------------------------
/TEx/core/ocr/ocr_engine_factory.py:
--------------------------------------------------------------------------------
 1 | """Factory Class for ORC Engines."""
 2 | from __future__ import annotations
 3 | 
 4 | from configparser import ConfigParser, SectionProxy
 5 | from typing import Optional
 6 | 
 7 | from TEx.core.ocr.dummy_ocr_engine import DummyOcrEngine
 8 | from TEx.core.ocr.ocr_engine_base import OcrEngineBase
 9 | from TEx.core.ocr.tesseract_ocr_engine import TesseractOcrEngine
10 | 
11 | 
12 | class OcrEngineFactory:
13 |     """Factory Class for ORC Engines."""
14 | 
15 |     @staticmethod
16 |     def get_instance(config: ConfigParser) -> OcrEngineBase:
17 |         """Configure the Notifier."""
18 |         if not config.has_section('OCR'):
19 |             return DummyOcrEngine()
20 | 
21 |         ocr_settings: SectionProxy = config['OCR']
22 | 
23 |         # Get Activation and Type Settings
24 |         is_enabled: bool = ocr_settings.get('enabled', fallback='false') == 'true'
25 |         if not is_enabled:
26 |             return DummyOcrEngine()
27 | 
28 |         # Get Configurations
29 |         ocr_type: str = ocr_settings.get('type', fallback='none')
30 |         engine: OcrEngineBase
31 |         ocr_engine_settings: Optional[SectionProxy]
32 | 
33 |         # Return Tesseract Engine
34 |         if ocr_type == 'tesseract':
35 |             engine = TesseractOcrEngine()
36 |             ocr_engine_settings = config['OCR.TESSERACT']
37 |         else:
38 |             error_msg: str = f'Invalid OCR Type "{ocr_type}"'
39 |             raise AttributeError(error_msg)
40 | 
41 |         # Configure Engine
42 |         engine.configure(config=ocr_engine_settings)
43 | 
44 |         return engine
45 | 


--------------------------------------------------------------------------------
/docs/configuration/basic.md:
--------------------------------------------------------------------------------
 1 | # Configuration
 2 | The basic configuration contains exactly 4 settings:
 3 | 
 4 | ```ini
 5 | [CONFIGURATION]
 6 | api_id=my_api_id
 7 | api_hash=my_api_hash
 8 | phone_number=my_phone_number
 9 | data_path=my_data_path
10 | device_model=device_model_name
11 | timeout=30
12 | ```
13 | 
14 | * **api_id** > Required - Telegram API ID. From https://my.telegram.org/ > login > API development tools 
15 | * **api_hash** > Required - Telegram API Hash. From https://my.telegram.org/ > login > API development tools
16 | * **phone_number** > Required - Target Phone Number
17 | * **data_path** > Required - Defines the Path Folder for the SQLite Databases and Dowloaded Files
18 | * **device_model** > Optional - Defines which device model is passed to Telegram Servers.
19 |     * If Blank or Absent - Uses 'TeX' for backwards compatibility
20 |     * If set as 'AUTO' - Uses the computer/system device model
21 | * **timeout** > Optional - Defines the Timeout in seconds for Telegram Client.
22 |     * Default: 10
23 | 
24 |   
25 | !!! warning "Note about 'device_model'"
26 | 
27 |     If you are using versions prior to 0.2.15 or have already connected to Telegram and have not configured the 'device_model' parameter, do not make the change, as Telegram may interpret this operation as an attack on your account.
28 | 
29 | Place the configuration file anywhere you want with .config extension.
30 | 
31 | **EXAMPLE (myconfig.config)**
32 | ```ini
33 | [CONFIGURATION]
34 | api_id=12555896
35 | api_hash=dead1f29db5d1fa56cc42757acbabeef
36 | phone_number=15552809753
37 | data_path=/usr/home/tex_data/
38 | device_model=AMD64
39 | timeout=15
40 | ```
41 | 


--------------------------------------------------------------------------------
/docs/how_use/how_to_use_basic.md:
--------------------------------------------------------------------------------
 1 | # Basic Usage
 2 | 
 3 | ## The Basics
 4 | Considering a *my_TEx_config.config* file created at */usr/my_TEx_config.config* with follow:
 5 | 
 6 | ```ini
 7 | [CONFIGURATION]
 8 | api_id=12555896
 9 | api_hash=dead1f29db5d1fa56cc42757acbabeef
10 | phone_number=15552809753
11 | data_path=/usr/home/tex_data/
12 | ```
13 | 
14 | Execute the first 2 commands to configure and sync TEx and the last one to activate the listener module.
15 | 
16 | ```bash
17 | python3 -m TEx connect --config /usr/my_TEx_config.config
18 | python3 -m TEx load_groups --config /usr/my_TEx_config.config
19 | python3 -m TEx listen --config /usr/my_TEx_config.config
20 | ```
21 | 
22 | <!-- Command Line -->
23 | ## Command Line
24 | 
25 | ### Connect to Telegram Servers
26 | ```bash
27 | python3 -m TEx connect --config CONFIGURATION_FILE_PATH
28 | ```
29 |   * **config** > Required - Created Configuration File Path
30 | 
31 | ### Update Groups List (Optional, but Recommended)
32 | ```bash
33 | python3 -m TEx load_groups --config CONFIGURATION_FILE_PATH --refresh_profile_photos
34 | ```
35 | 
36 |   * **config** > Required - Created Configuration File Path
37 |   * **refresh_profile_photos** > Optional - If present, forces the Download and Update all Channels Members Profile Photo
38 | 
39 | ### Listen Messages (Start the Message Listener)
40 | ```bash
41 | python3 -m TEx listen --config CONFIGURATION_FILE_PATH --group_id 1234,5678
42 | ```
43 | 
44 |   * **config** > Required - Created Configuration File Path
45 |   * **ignore_media** > Optional - If present, don't Download any Media
46 |   * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's
47 | 


--------------------------------------------------------------------------------
/.github/workflows/cy_deploy.yml:
--------------------------------------------------------------------------------
 1 | name: CI-Deploy
 2 | 
 3 | on:
 4 |   push:
 5 | 
 6 |     tags:
 7 |       - V*
 8 | 
 9 | jobs:
10 |   CodeQuality:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |     - uses: actions/checkout@v2
15 |     - name: Set up Python 3.8
16 |       uses: actions/setup-python@v2
17 |       with:
18 |         python-version: 3.8
19 | 
20 |     - name: Install dependencies
21 |       run: |
22 |         python -m pip install --upgrade pip
23 |         python -m pip install -r requirements.txt
24 | 
25 |     - name: Run Code Quality
26 |       run: |
27 |         tox -e quality
28 | 
29 |   TestsAndCodeCoverage:
30 |     runs-on: ubuntu-latest
31 |     needs: CodeQuality
32 | 
33 |     steps:
34 |     - uses: actions/checkout@v2
35 |     - name: Set up Python 3.8
36 |       uses: actions/setup-python@v2
37 |       with:
38 |         python-version: 3.8
39 | 
40 |     - name: Install dependencies
41 |       run: |
42 |         python -m pip install --upgrade pip
43 |         python -m pip install -r requirements.txt
44 | 
45 |     - name: Run Unittests and Code Coverage
46 |       run: |
47 |         tox -e coverage
48 | 
49 |   PublishPypi:
50 |     runs-on: ubuntu-latest
51 |     needs: TestsAndCodeCoverage
52 | 
53 |     steps:
54 |     - uses: actions/checkout@v2
55 |     - name: Set up Python 3.8
56 |       uses: actions/setup-python@v2
57 |       with:
58 |         python-version: 3.8
59 | 
60 |     - name: Install dependencies
61 |       run: |
62 |         python -m pip install --upgrade pip
63 |         python -m pip install -r requirements.txt
64 | 
65 |     - name: Deployment
66 |       env:
67 |         PYPI_DEPLOY_TOKEN: ${{ secrets.PYPI_DEPLOY_TOKEN }}
68 |       run: |
69 |         tox -e deploy


--------------------------------------------------------------------------------
/tests/modules/common.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from sqlalchemy import delete
 4 | 
 5 | from TEx.core.dir_manager import DirectoryManagerUtils
 6 | from TEx.database.db_initializer import DbInitializer
 7 | from TEx.database.db_manager import DbManager
 8 | from TEx.models.database.telegram_db_model import (
 9 |     TelegramGroupOrmEntity,
10 |     TelegramMediaOrmEntity, TelegramMessageOrmEntity, TelegramUserOrmEntity, )
11 | from TEx.modules.execution_configuration_handler import ExecutionConfigurationHandler
12 | 
13 | 
14 | class TestsCommon:
15 | 
16 |     @staticmethod
17 |     def basic_test_setup():
18 |         """Execute Basic Tasks for Tests."""
19 | 
20 |         DirectoryManagerUtils.ensure_dir_struct('_data')
21 |         DirectoryManagerUtils.ensure_dir_struct('_data/resources')
22 |         DirectoryManagerUtils.ensure_dir_struct('_data/media')
23 | 
24 |         DbInitializer.init(data_path='_data/')
25 | 
26 |         # Reset SQLlite Groups
27 |         DbManager.SESSIONS['data'].execute(delete(TelegramMessageOrmEntity))
28 |         DbManager.SESSIONS['data'].execute(delete(TelegramGroupOrmEntity))
29 |         DbManager.SESSIONS['data'].execute(delete(TelegramMediaOrmEntity))
30 |         DbManager.SESSIONS['data'].execute(delete(TelegramUserOrmEntity))
31 |         DbManager.SESSIONS['data'].commit()
32 | 
33 |     @staticmethod
34 |     def execute_basic_pipeline_steps_for_initialization(config, args, data):
35 | 
36 |         execution_configuration_loader: ExecutionConfigurationHandler = ExecutionConfigurationHandler()
37 | 
38 |         loop = asyncio.get_event_loop()
39 |         loop.run_until_complete(
40 |             execution_configuration_loader.run(
41 |                 config=config,
42 |                 args=args,
43 |                 data=data
44 |             )
45 |         )
46 | 


--------------------------------------------------------------------------------
/docs/notification/notification_elasticsearch_signals_template.md:
--------------------------------------------------------------------------------
 1 | # Notification System - Elastic Search Connector - Signals Template
 2 | 
 3 | In order to use the Signal Notification with Elastic Search, you should create a new Index Template before start sending the Signals.
 4 | 
 5 | This will help you to get the best of all signals provided.
 6 | 
 7 | **Index Mapping JSON**
 8 | ```json
 9 | {
10 |   "numeric_detection": false,
11 |   "dynamic_date_formats": [
12 |     "strict_date_optional_time",
13 |     "yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"
14 |   ],
15 |   "dynamic": "true",
16 |   "dynamic_templates": [],
17 |   "date_detection": true,
18 |   "properties": {
19 |     "source": {
20 |       "fielddata_frequency_filter": {
21 |         "min": 0.01,
22 |         "max": 1,
23 |         "min_segment_size": 50
24 |       },
25 |       "fielddata": true,
26 |       "type": "text"
27 |     },
28 |     "time": {
29 |       "type": "date"
30 |     },
31 |     "signal": {
32 |       "eager_global_ordinals": false,
33 |       "index_phrases": false,
34 |       "fielddata_frequency_filter": {
35 |         "min": 0.01,
36 |         "max": 1,
37 |         "min_segment_size": 50
38 |       },
39 |       "fielddata": true,
40 |       "norms": true,
41 |       "index": true,
42 |       "store": false,
43 |       "type": "text",
44 |       "index_options": "positions"
45 |     },
46 |     "content": {
47 |       "eager_global_ordinals": false,
48 |       "index_phrases": false,
49 |       "fielddata_frequency_filter": {
50 |         "min": 0.01,
51 |         "max": 1,
52 |         "min_segment_size": 50
53 |       },
54 |       "fielddata": true,
55 |       "norms": true,
56 |       "index": true,
57 |       "store": false,
58 |       "type": "text",
59 |       "index_options": "positions"
60 |     }
61 |   }
62 | }
63 | ```
64 | 


--------------------------------------------------------------------------------
/docs/how_use/usage_connection.md:
--------------------------------------------------------------------------------
 1 | # Connection to Telegram Servers
 2 | 
 3 | First step for every phone number to be used is to connect to Telegram servers. After that, the runner will create a session file under *'data_path'* folder specified in the configuration file.
 4 | 
 5 | **Full Command:**
 6 | 
 7 | ```bash
 8 | python3 -m TEx connect --config CONFIGURATION_FILE_PATH
 9 | ```
10 | 
11 | **Parameters**
12 | 
13 |   * **config** > Required - Created Configuration File Path
14 | 
15 | *Output Example:*
16 | ```bash
17 | TEx - Telegram Explorer
18 | Version 0.2.12
19 | By: Th3 0bservator
20 | 
21 | 2023-10-01 20:07:06,501 - INFO - [*] Loading Configurations:
22 | 2023-10-01 20:07:06,502 - INFO - [*] Installed Modules:
23 | 2023-10-01 20:07:06,502 - INFO - 	data_structure_handler.py
24 | 2023-10-01 20:07:06,502 - INFO - 	database_handler.py
25 | 2023-10-01 20:07:06,502 - INFO - 	execution_configuration_handler.py
26 | 2023-10-01 20:07:06,502 - INFO - 	telegram_connection_manager.py
27 | 2023-10-01 20:07:06,502 - INFO - 	telegram_groups_list.py
28 | 2023-10-01 20:07:06,502 - INFO - 	telegram_groups_scrapper.py
29 | 2023-10-01 20:07:06,502 - INFO - 	telegram_maintenance
30 | 2023-10-01 20:07:06,502 - INFO - 	telegram_messages_listener.py
31 | 2023-10-01 20:07:06,502 - INFO - 	telegram_messages_scrapper.py
32 | 2023-10-01 20:07:06,502 - INFO - 	telegram_report_generator
33 | 2023-10-01 20:07:06,502 - INFO - 	telegram_stats_generator.py
34 | 2023-10-01 20:07:06,987 - INFO - [*] Executing Pipeline:
35 | 2023-10-01 20:07:06,987 - INFO - 	[+] telegram_connection_manager.TelegramConnector
36 | 2023-10-01 20:07:07,392 - INFO - 		Authorizing on Telegram...
37 | 2023-10-01 20:07:13,590 - INFO - 		User Authorized on Telegram: True
38 | 2023-10-01 20:07:13,851 - INFO - [*] Executing Termination:
39 | 2023-10-01 20:07:13,851 - INFO - 	[+] state_file_handler.SaveStateFileHandler
40 | ```


--------------------------------------------------------------------------------
/TEx/config.ini:
--------------------------------------------------------------------------------
 1 | [PIPELINE]
 2 | pre_pipeline_sequence   =   input_args_handler.InputArgsHandler
 3 |                             execution_configuration_handler.ExecutionConfigurationHandler
 4 |                             data_structure_handler.DataStructureHandler
 5 |                             database_handler.DatabaseHandler
 6 |                             temp_file_manager.TempFileManager
 7 |                             state_file_handler.LoadStateFileHandler
 8 | 
 9 | pipeline_sequence       =   telegram_connection_manager.TelegramConnector
10 | 
11 |                             telegram_groups_scrapper.TelegramGroupScrapper
12 |                             telegram_groups_list.TelegramGroupList
13 |                             telegram_messages_scrapper.TelegramGroupMessageScrapper
14 |                             telegram_messages_listener.TelegramGroupMessageListener
15 |                             telegram_report_generator.telegram_report_sent_telegram.TelegramReportSentViaTelegram
16 | 
17 |                             telegram_connection_manager.TelegramDisconnector
18 | 
19 |                             telegram_report_generator.telegram_html_report_generator.TelegramReportGenerator
20 |                             telegram_report_generator.telegram_export_text_generator.TelegramExportTextGenerator
21 |                             telegram_report_generator.telegram_export_file_generator.TelegramExportFileGenerator
22 | 
23 |                             telegram_stats_generator.TelegramStatsGenerator
24 | 
25 |                             telegram_maintenance.telegram_purge_old_data.TelegramMaintenancePurgeOldData
26 | 
27 | 
28 | post_pipeline_sequence  =   state_file_handler.SaveStateFileHandler
29 | 
30 | ########## Modules Config ##########
31 | 
32 | [MODULE_LoadStateFileHandler]
33 | file_name   = state/{0}.json
34 | 
35 | [MODULE_SaveStateFileHandler]
36 | file_name   = state/{0}.json
37 | 


--------------------------------------------------------------------------------
/TEx/core/state_file.py:
--------------------------------------------------------------------------------
 1 | """State File Handle."""
 2 | from datetime import datetime
 3 | from typing import cast
 4 | 
 5 | import pytz
 6 | 
 7 | from TEx.database.db_manager import DbManager
 8 | from TEx.models.database.temp_db_models import StateFileOrmEntity
 9 | 
10 | 
11 | class StateFileHandler:
12 |     """State File Handler."""
13 | 
14 |     @staticmethod
15 |     def file_exist(path: str) -> bool:
16 |         """
17 |         Return if a File Exists.
18 | 
19 |         :param path: File Path
20 |         :return:
21 |         """
22 |         return bool(DbManager.SESSIONS['temp'].query(StateFileOrmEntity).filter_by(path=path).count() > 0)
23 | 
24 |     @staticmethod
25 |     def read_file_text(path: str) -> str:
26 |         """Read All File Content.
27 | 
28 |         :param path: File Path
29 |         :return: File Content
30 |         """
31 |         entity: StateFileOrmEntity = cast(StateFileOrmEntity, DbManager.SESSIONS['temp'].query(StateFileOrmEntity).filter_by(path=path).first())
32 |         return str(entity.data)
33 | 
34 |     @staticmethod
35 |     def write_file_text(path: str, content: str) -> None:
36 |         """Write Text Content into File.
37 | 
38 |         :param path: File Path
39 |         :param content: File Content
40 |         :param validate_seconds: File Validation in Seconds
41 |         :return: None
42 |         """
43 |         # Delete if Exists
44 |         DbManager.SESSIONS['temp'].execute(
45 |             StateFileOrmEntity.__table__.delete().where(StateFileOrmEntity.path == path),  # type: ignore
46 |             )
47 | 
48 |         entity: StateFileOrmEntity = StateFileOrmEntity(
49 |             path=path,
50 |             data=content,
51 |             created_at=int(datetime.now(tz=pytz.UTC).timestamp()),
52 |             )
53 |         DbManager.SESSIONS['temp'].add(entity)
54 | 
55 |         # Execute
56 |         DbManager.SESSIONS['temp'].flush()
57 |         DbManager.SESSIONS['temp'].commit()
58 | 


--------------------------------------------------------------------------------
/docs/configuration/media_download_examples.md:
--------------------------------------------------------------------------------
 1 | # Media Download - Examples
 2 | 
 3 | ### Default Behaviour (Download All Medias)
 4 | ```ini
 5 | [MEDIA.DOWNLOAD]
 6 | default=ALLOW
 7 | max_download_size_bytes=256000000
 8 | ```
 9 | 
10 | ### Download Only Images from All Groups
11 | ```ini
12 | [MEDIA.DOWNLOAD]
13 | default=DISALLOW
14 | 
15 | [MEDIA.DOWNLOAD.image/gif]
16 | enabled=ALLOW
17 | max_download_size_bytes=256000000
18 | groups=*
19 | 
20 | [MEDIA.DOWNLOAD.image/jpeg]
21 | enabled=ALLOW
22 | max_download_size_bytes=256000000
23 | groups=*
24 | 
25 | [MEDIA.DOWNLOAD.image/png]
26 | enabled=ALLOW
27 | max_download_size_bytes=256000000
28 | groups=*
29 | 
30 | [MEDIA.DOWNLOAD.image/webp]
31 | enabled=ALLOW
32 | max_download_size_bytes=256000000
33 | groups=*
34 | ```
35 | 
36 | ### Download All Medias, Except Compressed Ones
37 | ```ini
38 | [MEDIA.DOWNLOAD]
39 | default=ALLOW
40 | max_download_size_bytes=256000000
41 | 
42 | [MEDIA.DOWNLOAD.application/rar]
43 | enabled=DISALLOW
44 | 
45 | [MEDIA.DOWNLOAD.application/vnd.rar]
46 | enabled=DISALLOW
47 | 
48 | [MEDIA.DOWNLOAD.application/x-7z-compressed]
49 | enabled=DISALLOW
50 | 
51 | [MEDIA.DOWNLOAD.application/x-compressed-tar]
52 | enabled=DISALLOW
53 | 
54 | [MEDIA.DOWNLOAD.application/application/zip]
55 | enabled=DISALLOW
56 | ```
57 | 
58 | ### Download All Medias, but Compressed Ones only from two groups (id=1234 and id=5678)
59 | ```ini
60 | [MEDIA.DOWNLOAD]
61 | default=ALLOW
62 | max_download_size_bytes=256000000
63 | 
64 | [MEDIA.DOWNLOAD.application/rar]
65 | enabled=DISALLOW
66 | groups=1234,5678
67 | 
68 | [MEDIA.DOWNLOAD.application/vnd.rar]
69 | enabled=DISALLOW
70 | groups=1234,5678
71 | 
72 | [MEDIA.DOWNLOAD.application/x-7z-compressed]
73 | enabled=DISALLOW
74 | groups=1234,5678
75 | 
76 | [MEDIA.DOWNLOAD.application/x-compressed-tar]
77 | enabled=DISALLOW
78 | groups=1234,5678
79 | 
80 | [MEDIA.DOWNLOAD.application/application/zip]
81 | enabled=DISALLOW
82 | groups=1234,5678
83 | ```


--------------------------------------------------------------------------------
/docs/finder/finder_regex.md:
--------------------------------------------------------------------------------
 1 | # Message Finder System - RegEx
 2 | 
 3 | **Compatibility:** Message Listener Command
 4 | 
 5 | Telegram Explorer allows to specify many message finders using Regular Expressions. 
 6 | 
 7 | Each time one Finder rule match, the system automatically uses the Notification System to report that message.
 8 | 
 9 | Every Finder is defined in the configuration files.
10 | 
11 | **Configuration Spec:**
12 | 
13 | For each rule to be used, you must set a configuration using the default name schema *FINDER.RULE.<RULE_NAME>*
14 | 
15 | **Parameters:**
16 | 
17 |   * **type** > Required - Fixed Value 'regex'
18 |   * **regex** > Required - The regular expression. You can also use one regex per Line
19 |   * **notifier** > Required - Name of notifiers to be used to notify the triggered message (comma separated).
20 | 
21 | **Changes on Configuration File**
22 | ```ini
23 | [FINDER]
24 | enabled=true
25 | 
26 | [FINDER.RULE.MessagesWithURL]
27 | type=regex
28 | regex=/^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%%_\+.~#?&\/=]*)$/
29 | notifier=NOTIFIER.DISCORD.MY_HOOK_1
30 | 
31 | [FINDER.RULE.FindMessagesWithCreditCard]
32 | type=regex
33 | regex=(^4[0-9]{12}(?:[0-9]{3})?$)|(^(?:5[1-5][0-9]{2}|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}$)|(3[47][0-9]{13})|(^3(?:0[0-5]|[68][0-9])[0-9]{11}$)|(^6(?:011|5[0-9]{2})[0-9]{12}$)|(^(?:2131|1800|35\d{3})\d{11}$)
34 | notifier=NOTIFIER.DISCORD.MY_HOOK_1,NOTIFIER.DISCORD.MY_HOOK_2
35 | 
36 | [FINDER.RULE.MultipleRegEx]
37 | type=regex
38 | regex=
39 |     /^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%%_\+.~#?&\/=]*)$/
40 |     (^4[0-9]{12}(?:[0-9]{3})?$)|(^(?:5[1-5][0-9]{2}|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}$)|(3[47][0-9]{13})|(^3(?:0[0-5]|[68][0-9])[0-9]{11}$)|(^6(?:011|5[0-9]{2})[0-9]{12}$)|(^(?:2131|1800|35\d{3})\d{11}$)
41 | notifier=NOTIFIER.DISCORD.MY_HOOK_1,NOTIFIER.DISCORD.MY_HOOK_2
42 | ```


--------------------------------------------------------------------------------
/TEx/database/db_manager.py:
--------------------------------------------------------------------------------
 1 | """Database Manager."""
 2 | 
 3 | import os
 4 | from sqlite3 import Connection
 5 | 
 6 | from sqlalchemy import create_engine
 7 | from sqlalchemy.event import listen
 8 | from sqlalchemy.orm import sessionmaker
 9 | from sqlalchemy.pool import _ConnectionRecord
10 | 
11 | 
12 | class DbManager:
13 |     """Main Database Manager."""
14 | 
15 |     SQLALCHEMY_BINDS = {}  # type:ignore
16 |     SESSIONS = {}  # type:ignore
17 | 
18 |     @staticmethod
19 |     def init_db(data_path: str) -> None:
20 |         """Initialize the DB Connection."""
21 |         DbManager.SQLALCHEMY_BINDS = {
22 |             'temp': create_engine(
23 |                 f'sqlite:///{os.path.join(data_path, "temp_local.db")}?nolock=1&check_same_thread=false',
24 |                 connect_args={'check_same_thread': False, 'timeout': 120},
25 |                 echo=False, logging_name='sqlalchemy',
26 |                 ),
27 |             'data': create_engine(
28 |                 f'sqlite:///{os.path.join(data_path, "data_local.db")}?nolock=1&check_same_thread=false',
29 |                 connect_args={'check_same_thread': False, 'timeout': 120},
30 |                 echo=False, logging_name='sqlalchemy',
31 |                 ),
32 |             }
33 | 
34 |         DbManager.SESSIONS = {
35 |             'temp': sessionmaker(autocommit=False, autoflush=True, bind=DbManager.SQLALCHEMY_BINDS['temp'])(),
36 |             'data': sessionmaker(autocommit=False, autoflush=True, bind=DbManager.SQLALCHEMY_BINDS['data'])(),
37 |             }
38 | 
39 |         listen(DbManager.SQLALCHEMY_BINDS['data'], 'connect', DbManager.do_connect)
40 | 
41 |     @staticmethod
42 |     def do_connect(dbapi_connection: Connection, connection_record: _ConnectionRecord) -> None:
43 |         """Disable SQLLite Transaction auto Start."""
44 |         # disable pysqlite's emitting of the BEGIN statement entirely.
45 |         # also stops it from emitting COMMIT before any DDL.
46 |         dbapi_connection.isolation_level = None
47 | 


--------------------------------------------------------------------------------
/docs/finder/configuration.md:
--------------------------------------------------------------------------------
 1 | # Message Finder System
 2 | 
 3 | **Compatibility:** Message Listener Command
 4 | 
 5 | Telegram Explorer allows to specify many message finders. Usually, the finder engine looks at messages, but, they also can look at downloaded text files (plain, csv, xml, json, etc.).
 6 | 
 7 | It's through the Finder engine that you are able to send notifications or export the chat contents (Check the *Notification System* and *Message Export System* for more information).
 8 | 
 9 | **Configuration Spec:**
10 | 
11 | In order to use the finder engine, you must set a configuration to enable-it and configure if you want to allow the engine to find on files.
12 | 
13 | **Parameters:**
14 | 
15 |   * **enabled** > Required - Enable(true)/Disable(false) the finder engine.
16 |   * **find_in_text_files_enabled** > Optional - Enable(true)/Disable(false) the behavior that run the finder engine inside the downloaded files.
17 |     * Default: false
18 |   * **find_in_text_files_max_size_bytes** > Optional - Set the max size in bytes of file that allow the engine to load the file in memory and perform the searches.
19 |     * Default: 10000000
20 |   * **notifier** > Optional - The list of all (comma separated) notifiers that runs when the finder triggers.
21 |   * **exporter** > Optional - The list of all (comma separated) file exporters that runs when the finder triggers.
22 | 
23 | 
24 | **Changes on Configuration File**
25 | ```ini
26 | [FINDER]
27 | enabled=true
28 | find_in_text_files_enabled=true
29 | find_in_text_files_max_size_bytes=20000000
30 | notifier=NOTIFIER.DISCORD.MY_HOOK_1,NOTIFIER.DISCORD.MY_HOOK_2
31 | exporter=EXPORTER.ROLLING_PANDAS.MY_EXPORTER_1,EXPORTER.ROLLING_PANDAS.MY_EXPORTER_2
32 | ```
33 | 
34 | **Files Supported for the Engine:**
35 | 
36 |   * application/atom+xml
37 |   * application/bittorrent
38 |   * application/csv
39 |   * application/html
40 |   * application/json
41 |   * application/ld+json
42 |   * text/csv
43 |   * text/html
44 |   * text/plain
45 |   * text/xml


--------------------------------------------------------------------------------
/docs/notification/notification_elasticsearch.md:
--------------------------------------------------------------------------------
 1 | # Notification System - Elastic Search Connector
 2 | 
 3 | Telegram Explorer allows to send notifications to Elastic Search through ingestion API.
 4 | 
 5 | Every Notification is defined in the configuration files.
 6 | 
 7 | !!! info "Elastic Search Compatibility"
 8 |     
 9 |     Tested on Elastic Search 8+
10 | 
11 | !!! warning "Index Template"
12 |     
13 |     If you want, and we recommend, create a new Index Template before create your indexes. Please, check on "Notification System" > "Elastic Search Connector" > "Index Template" and "Signals Template" for more information.
14 | 
15 | **Configuration Spec:**
16 | 
17 | For each connector you must set a configuration using the default name schema *NOTIFIER.ELASTIC_SEARCH.<NAME\>*
18 | 
19 | **Parameters:**
20 | 
21 |   * **address** > Optional - Elastic Search Address. Multiple values comma separated.
22 |   * **api_key** > Required - Elastic Search API Key.
23 |   * **cloud_id** > Optional - Elastic Search Cloud ID.
24 |   * **verify_ssl_cert** > Optional - Configure if the connector checks the SSL cert. Default=True
25 |   * **index_name** > Required - Elastic Search Index Name.
26 |   * **pipeline_name** > Required - Elastic Search Ingestion Pipeline Name.
27 | 
28 | 
29 | **Changes on Configuration File (with Address)**
30 | ```ini
31 | [NOTIFIER.ELASTIC_SEARCH.ELASTIC_INDEX_01]
32 | address=https://elastic_search_url_1:9200,https://elastic_search_url_2:9200
33 | api_key=bHJtVEg0c0JnNkwwTnYtYTFdeadbeefrXzd6NVFSUmEtQ21mQldiUjEwUQ==
34 | verify_ssl_cert=False
35 | index_name=search-telegram_explorer
36 | pipeline_name=ent-search-generic-ingestion
37 | ```
38 | 
39 | **Changes on Configuration File (with Cloud ID)**
40 | ```ini
41 | [NOTIFIER.ELASTIC_SEARCH.ELASTIC_INDEX_02]
42 | cloud_id=deployment-name:dXMtZWFzdDQuZ2Nw
43 | api_key=bHJtVEg0c0JnNkwwTnYtYTFdeadbeefrXzd6NVFSUmEtQ21mQldiUjEwUQ==
44 | verify_ssl_cert=True
45 | index_name=search-telegram_explorer
46 | pipeline_name=ent-search-generic-ingestion
47 | ```
48 | 


--------------------------------------------------------------------------------
/TEx/modules/state_file_handler.py:
--------------------------------------------------------------------------------
 1 | """State File Handler."""
 2 | from __future__ import annotations
 3 | 
 4 | import json
 5 | import logging
 6 | from configparser import ConfigParser
 7 | from typing import Dict
 8 | 
 9 | from TEx.core.base_module import BaseModule
10 | from TEx.core.state_file import StateFileHandler
11 | 
12 | logger = logging.getLogger('TelegramExplorer')
13 | 
14 | 
15 | class LoadStateFileHandler(BaseModule):
16 |     """Module that Loads Previous Created State File."""
17 | 
18 |     async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool:
19 |         """
20 |         Abstract Method for Module Activation Function.
21 | 
22 |         :return:
23 |         """
24 |         return True
25 | 
26 |     async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None:
27 |         """Execute Module."""
28 |         state_file_name: str = config['MODULE_LoadStateFileHandler']['file_name'].replace('{0}', config['CONFIGURATION']['phone_number'])
29 | 
30 |         if StateFileHandler.file_exist(state_file_name):
31 |             data.update(
32 |                 json.loads(StateFileHandler.read_file_text(state_file_name)),
33 |                 )
34 |             logger.debug('\t\tState File Loaded.')
35 | 
36 | 
37 | class SaveStateFileHandler(BaseModule):
38 |     """Module that Save a New State File."""
39 | 
40 |     async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool:
41 |         """
42 |         Abstract Method for Module Activation Function.
43 | 
44 |         :return:
45 |         """
46 |         return True
47 | 
48 |     async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None:
49 |         """Execute Module."""
50 |         state_file_name: str = config['MODULE_SaveStateFileHandler']['file_name'].replace('{0}', config['CONFIGURATION']['phone_number'])
51 | 
52 |         # Remove Internal Controls
53 |         del data['internals']
54 | 
55 |         StateFileHandler.write_file_text(
56 |             state_file_name,
57 |             json.dumps(data),
58 |             )
59 | 


--------------------------------------------------------------------------------
/TEx/models/facade/telegram_group_report_facade_entity.py:
--------------------------------------------------------------------------------
 1 | """Facade Entity for Report Generation."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Optional
 5 | 
 6 | from TEx.models.database.telegram_db_model import TelegramGroupOrmEntity
 7 | 
 8 | 
 9 | class TelegramGroupReportFacadeEntity:
10 |     """Facade Entity for Report Generation."""
11 | 
12 |     id: int
13 |     constructor_id: str
14 |     access_hash: str
15 |     group_username: str
16 |     title: str
17 | 
18 |     fake: bool
19 |     gigagroup: bool
20 |     has_geo: bool
21 |     restricted: bool
22 |     scam: bool
23 |     verified: bool
24 | 
25 |     participants_count: Optional[int]
26 | 
27 |     photo_id: Optional[int]
28 |     photo_base64: Optional[str]
29 |     photo_name: Optional[str]
30 | 
31 |     source: str
32 | 
33 |     meta_message_count: int
34 | 
35 | 
36 | class TelegramGroupReportFacadeEntityMapper:
37 |     """Mapper for TelegramGroupReportFacadeEntity."""
38 | 
39 |     @staticmethod
40 |     def create_from_dbentity(source: TelegramGroupOrmEntity) -> TelegramGroupReportFacadeEntity:
41 |         """Map TelegramGroupOrmEntity to TelegramGroupReportFacadeEntity."""
42 |         h_result: TelegramGroupReportFacadeEntity = TelegramGroupReportFacadeEntity()
43 | 
44 |         h_result.id = source.id
45 |         h_result.constructor_id = source.constructor_id
46 |         h_result.access_hash = source.access_hash
47 |         h_result.group_username = source.group_username
48 |         h_result.title = source.title
49 | 
50 |         h_result.fake = source.fake
51 |         h_result.gigagroup = source.gigagroup
52 |         h_result.has_geo = source.has_geo
53 |         h_result.restricted = source.restricted
54 |         h_result.scam = source.scam
55 |         h_result.verified = source.verified
56 | 
57 |         h_result.participants_count = source.participants_count
58 | 
59 |         h_result.photo_id = source.photo_id
60 |         h_result.photo_base64 = source.photo_base64
61 |         h_result.photo_name = source.photo_name
62 | 
63 |         h_result.source = source.source
64 | 
65 |         return h_result
66 | 


--------------------------------------------------------------------------------
/docs/exporting/pandas_rolling.md:
--------------------------------------------------------------------------------
 1 | # Message Exporting System - Pandas Rolling Exporter
 2 | 
 3 | Telegram Explorer allows to export messages as CSV, XML, JSON or Pickle Serialized Pandas DataFrame almost at real time.
 4 | 
 5 | This way you can configure many exporters you want, one for each need or category you like.
 6 | 
 7 | !!! warning "NOTE ABOUT THE EXPORTING PROCESS"
 8 |     
 9 |     This specific exporter only writes the output file when the rolling period terminates, and/or, when Telegram Explorer process stops.
10 | 
11 | Every Exporter is defined in the configuration files.
12 | 
13 | **Configuration Spec:**
14 | 
15 | For each Pandas Rolling Exporter you must set a configuration using the default name schema *EXPORTER.ROLLING_PANDAS.<EXPORTER_NAME\>*
16 | 
17 | **Parameters:**
18 | 
19 |   * **file_root_path** > Required - Root path for the exported files.
20 |   * **rolling_every_minutes** > Optional - Time (in minutes) that the system will roll a new file.
21 |     * Default: 30
22 |   * **fields** > Optional - The list (comma separated) with the fields you want to be exported.
23 |     * Default: date_time,raw_text,group_name,group_id,from_id,to_id,reply_to_msg_id,message_id,is_reply,found_on
24 |   * **use_header** > Optional - Enable/Disable the file header on exported file. 
25 |     * Default: true
26 |   * **output_format** > Optional - Specify the output file format (json, csv, xml and pickle).
27 |     * Default: csv
28 |   * **keep_last_files** > Optional - Specify how many files the engine keep on folder before starts to delete the old ones.
29 |     * Default: 20
30 | 
31 | **Changes on Configuration File**
32 | ```ini
33 | [EXPORTER.ROLLING_PANDAS.MY_EXPORTER_1]
34 | file_root_path=/path/to/export/folder/
35 | rolling_every_minutes=5
36 | fields=date_time,raw_text,group_name,group_id,from_id,to_id,reply_to_msg_id,message_id,is_reply,found_on
37 | use_header=true
38 | output_format=json
39 | keep_last_files=20
40 | 
41 | [EXPORTER.ROLLING_PANDAS.MY_EXPORTER_2]
42 | file_root_path=/path/to/export/folder/
43 | rolling_every_minutes=10
44 | fields=date_time,group_id,group_name,raw_text,from_id,to_id,message_id
45 | ```
46 | 


--------------------------------------------------------------------------------
/docs/changelog/v030.md:
--------------------------------------------------------------------------------
 1 | # Changelog - V0.3.0
 2 | 
 3 | !!! warning "Python Version"
 4 |     
 5 |     This are the latest version of Telegram Explorer that supports Python 3.8 and 3.9.
 6 |     
 7 |     Please, consider upgrate to Python 3.10+ as possible.
 8 | 
 9 | **🚀 Features**
10 | 
11 | - Proxy (HTTP, SOCKS4, SOCKS5) support ([#26](https://github.com/guibacellar/TEx/issues/26))
12 | - Discord Notifications now have a source information with account/phone number
13 | - It is now possible to set the connection timeout for the Telegram servers connectors
14 | - Discord Notifications now allow to send downloaded files as attachments ([#41](https://github.com/guibacellar/TEx/issues/41))
15 | - New Message Finder Rule to Catch All Messages
16 | - New Notification connector for ElasticSearch ([#12](https://github.com/guibacellar/TEx/issues/12))
17 | - Fine Control on Media Download Settings ([#37](https://github.com/guibacellar/TEx/issues/37))
18 | - OCR Support with Tesseract for all Downloaded Images ([#39](https://github.com/guibacellar/TEx/issues/39))
19 | - RegEx Finder now Supports Many RegEx at same Finder Configuration, One per Line ([#49](https://github.com/guibacellar/TEx/issues/49))
20 | - Added The Ability to Configure Signals to be Received ([#48](https://github.com/guibacellar/TEx/issues/48))
21 | - Export Messages as CSV, JSON, XML or Pandas Serialized Dataframe ([#53](https://github.com/guibacellar/TEx/issues/53))
22 | 
23 | **🐛 Bug Fixes**
24 | 
25 | - Fix "export_text" command Regex Handling that causes crash on using invalid regex ([#31](https://github.com/guibacellar/TEx/issues/31)) 
26 | 
27 | **⚙️ Internal Improvements**
28 | 
29 | - Replace Pylint, PyDocStyle and Flake8 code quality tools for Ruff ([#22](https://github.com/guibacellar/TEx/issues/22))
30 | - Fix Invalid TypeHint for Message Object from Telethon 
31 | - Changes in message finder and notification system to use a facade objects with Pydantic to reduce cognitive complexity and allow the construction of new connectors more easily
32 | - Improvements on Database Handling + Removed Isolation Level from SQL Handling ([#45](https://github.com/guibacellar/TEx/issues/45))
33 | 


--------------------------------------------------------------------------------
/TEx/core/ocr/tesseract_ocr_engine.py:
--------------------------------------------------------------------------------
 1 | """Tesseract OCR Engine."""
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | import os
 6 | from configparser import SectionProxy
 7 | from typing import Optional, cast
 8 | 
 9 | from pytesseract import pytesseract as tesseract
10 | 
11 | from TEx.core.ocr.ocr_engine_base import OcrEngineBase
12 | 
13 | logger = logging.getLogger('TelegramExplorer')
14 | 
15 | 
16 | class TesseractOcrEngine(OcrEngineBase):
17 |     """Tesseract OCR Engine."""
18 | 
19 |     def __init__(self) -> None:
20 |         """Initialize Discord Notifier."""
21 |         super().__init__()
22 |         self.cmd: str = ''
23 |         self.language: str = ''
24 | 
25 |     def configure(self, config: Optional[SectionProxy]) -> None:
26 |         """Configure the Notifier."""
27 |         if not config:
28 |             error_msg_config: str = 'No [OCR.TESSERACT] config found, but OCR type is "tesseract"'
29 |             raise AttributeError(error_msg_config)
30 | 
31 |         self.cmd = config.get('tesseract_cmd', fallback='')
32 |         self.language = config.get('language', fallback='eng')
33 | 
34 |         # Check if Tesseract CMD property are set
35 |         if self.cmd == '':
36 |             error_msg_cmd: str = '"tesseract_cmd" setting are no properly set, but OCR type is "tesseract"'
37 |             raise AttributeError(error_msg_cmd)
38 | 
39 |         # Check if Tesseract CMD can be Found
40 |         if not os.path.exists(self.cmd):
41 |             error_msg_path: str = f'Tesseract command cannot be found at "{self.cmd}"'
42 |             raise AttributeError(error_msg_path)
43 | 
44 |         # Configure Tesseract Engine
45 |         tesseract.tesseract_cmd = self.cmd
46 | 
47 |     def run(self, file_path: str) -> Optional[str]:
48 |         """Run Tesseract Engine and Return Detected Text."""
49 |         try:
50 | 
51 |             if not os.path.exists(file_path):
52 |                 return ''
53 | 
54 |             return cast(str, tesseract.image_to_string(file_path, lang=self.language))
55 | 
56 |         except Exception as ex:
57 |             logger.exception(msg='OCR Fail', exc_info=ex)
58 | 
59 |             return ''
60 | 


--------------------------------------------------------------------------------
/TEx/notifier/notifier_base.py:
--------------------------------------------------------------------------------
 1 | """Base Class for All Notifiers."""
 2 | from __future__ import annotations
 3 | 
 4 | import abc
 5 | import hashlib
 6 | from configparser import SectionProxy
 7 | from typing import Optional, Tuple, Union
 8 | 
 9 | from cachetools import TTLCache
10 | 
11 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity
12 | from TEx.models.facade.signal_notification_model import SignalNotificationEntityModel
13 | 
14 | 
15 | class BaseNotifier:
16 |     """Base Notifier."""
17 | 
18 |     def __init__(self) -> None:
19 |         """Initialize the Base Notifier."""
20 |         self.cache: Optional[TTLCache] = None
21 |         self.timeout_seconds: int
22 |         self.media_attachments_enabled: bool
23 |         self.media_attachments_max_size_bytes: int
24 | 
25 |     def configure_base(self, config: SectionProxy) -> None:
26 |         """Configure Base Notifier."""
27 |         self.cache = TTLCache(maxsize=4096, ttl=int(config.get('prevent_duplication_for_minutes', fallback='240')) * 60)
28 |         self.timeout_seconds = int(config.get('timeout_seconds', fallback='30'))
29 |         self.media_attachments_enabled = config.get('media_attachments_enabled', fallback='false') == 'true'
30 |         self.media_attachments_max_size_bytes = int(config.get('media_attachments_max_size_bytes', fallback='10000000'))
31 | 
32 |     def check_is_duplicated(self, message: str) -> Tuple[bool, str]:
33 |         """Check if Message is Duplicated on Notifier."""
34 |         if not message or self.cache is None:
35 |             return False, ''
36 | 
37 |         # Compute Deduplication Tag
38 |         tag: str = hashlib.md5(message.encode('UTF-8')).hexdigest()
39 | 
40 |         # If Found, Return True
41 |         if self.cache.get(tag):
42 |             return True, tag
43 | 
44 |         # Otherwise, Just Insert and Return False
45 |         self.cache[tag] = True
46 |         return False, tag
47 | 
48 |     @abc.abstractmethod
49 |     async def run(self, entity: Union[FinderNotificationMessageEntity, SignalNotificationEntityModel], rule_id: str, source: str) -> None:
50 |         """Run the Notification Process."""
51 | 


--------------------------------------------------------------------------------
/docs/how_use/usage_list_groups.md:
--------------------------------------------------------------------------------
 1 | # List Groups
 2 | 
 3 | You can list groups directly in the console/tty output for a quick view of all groups already present in the database.
 4 | 
 5 | **Full Command:**
 6 | 
 7 | ```bash
 8 | python3 -m TEx list_groups --config CONFIGURATION_FILE_PATH 
 9 | ```
10 | 
11 | **Parameters**
12 | 
13 |   * **config** > Required - Created Configuration File Path
14 | 
15 | *Output Example:*
16 | ```bash
17 | TEx - Telegram Explorer
18 | Version 0.2.12
19 | By: Th3 0bservator
20 | 
21 | 2023-10-01 20:41:15,142 - INFO - [*] Loading Configurations:
22 | 2023-10-01 20:41:15,142 - INFO - [*] Installed Modules:
23 | 2023-10-01 20:41:15,143 - INFO - 	data_structure_handler.py
24 | 2023-10-01 20:41:15,143 - INFO - 	database_handler.py
25 | 2023-10-01 20:41:15,143 - INFO - 	execution_configuration_handler.py
26 | 2023-10-01 20:41:15,143 - INFO - 	telegram_connection_manager.py
27 | 2023-10-01 20:41:15,143 - INFO - 	telegram_groups_list.py
28 | 2023-10-01 20:41:15,143 - INFO - 	telegram_groups_scrapper.py
29 | 2023-10-01 20:41:15,143 - INFO - 	telegram_maintenance
30 | 2023-10-01 20:41:15,143 - INFO - 	telegram_messages_listener.py
31 | 2023-10-01 20:41:15,143 - INFO - 	telegram_messages_scrapper.py
32 | 2023-10-01 20:41:15,143 - INFO - 	telegram_report_generator
33 | 2023-10-01 20:41:15,143 - INFO - 	telegram_stats_generator.py
34 | 2023-10-01 20:41:15,484 - INFO - [*] Executing Pipeline:
35 | 2023-10-01 20:41:15,823 - INFO - 	[+] telegram_groups_list.TelegramGroupList
36 | 2023-10-01 20:41:16,535 - INFO - 		Found 2 Groups
37 | 2023-10-01 20:41:16,536 - INFO - 		ID       	Username                     	Title                                                                                  
38 | 2023-10-01 20:41:16,536 - INFO - 		1769587896	mygroup1                    	My Group 1                                                                        
39 | 2023-10-01 20:41:16,536 - INFO - 		1259876541	texbetagroup                   	TeX Beta Group                                                                       
40 | 2023-10-01 20:41:16,703 - INFO - [*] Executing Termination:
41 | 2023-10-01 20:41:16,703 - INFO - 	[+] state_file_handler.SaveStateFileHandler
42 | ```


--------------------------------------------------------------------------------
/TEx/exporter/exporter_base.py:
--------------------------------------------------------------------------------
 1 | """Base Class for All Exporters."""
 2 | from __future__ import annotations
 3 | 
 4 | import abc
 5 | import logging
 6 | import os
 7 | from configparser import SectionProxy
 8 | from pathlib import Path
 9 | from typing import List
10 | 
11 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity
12 | 
13 | logger = logging.getLogger('TelegramExplorer')
14 | 
15 | 
16 | class BaseExporter:
17 |     """Base Notifier."""
18 | 
19 |     def __init__(self) -> None:
20 |         """Initialize the Base Exporter."""
21 |         self.file_root_path: str = ''
22 | 
23 |     def configure_base(self, config: SectionProxy) -> None:
24 |         """Configure Base Exporter."""
25 |         self.file_root_path = config.get('file_root_path')
26 | 
27 |     @abc.abstractmethod
28 |     async def run(self, entity: FinderNotificationMessageEntity, rule_id: str) -> None:
29 |         """Run the Exporting Process."""
30 | 
31 |     @abc.abstractmethod
32 |     def shutdown(self) -> None:
33 |         """Shutdown and Flush all Data into Disk."""
34 | 
35 |     def _keep_last_files_only(self, directory_path: str, file_count: int) -> None:
36 |         """Ensure the Directory Contains Only the 'file_count' newest files. Note: CHAT GPT-4 Assisted Code."""
37 |         if not os.path.exists(directory_path):
38 |             return
39 | 
40 |         # List All Files
41 |         files: List = [
42 |             os.path.join(directory_path, file) for file in os.listdir(directory_path) if
43 |             Path(os.path.join(directory_path, file)).is_file()
44 |             ]
45 | 
46 |         # Check File Limit
47 |         if len(files) <= file_count:
48 |             return
49 | 
50 |         # Sort Files by Date/Time
51 |         files.sort(key=lambda x: Path(x).stat().st_mtime)
52 | 
53 |         # Compute File Remove Counter
54 |         files_to_delete: int = len(files) - file_count
55 | 
56 |         # Remove Old Files
57 |         for i in range(files_to_delete):
58 |             try:
59 |                 os.remove(files[i])
60 |             except Exception as ex:
61 |                 logger.exception(msg=f'Unable to Remove {files[i]}', exc_info=ex)
62 | 


--------------------------------------------------------------------------------
/docs/notification/notification_elasticsearch_index_template.md:
--------------------------------------------------------------------------------
 1 | # Notification System - Elastic Search Connector - Index Template
 2 | 
 3 | If you want, create a new Index Template before create all Telegram Explorer indexes.
 4 | 
 5 | This will help you to get the best of all data provided and allow's to extract many more value and informations from the data.
 6 | 
 7 | **Index Mapping JSON**
 8 | ```json
 9 | {
10 |   "numeric_detection": false,
11 |   "dynamic_date_formats": [
12 |     "strict_date_optional_time",
13 |     "yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"
14 |   ],
15 |   "dynamic": "true",
16 |   "dynamic_templates": [],
17 |   "date_detection": true,
18 |   "properties": {
19 |     "from_id": {
20 |       "type": "long"
21 |     },
22 |     "media_size": {
23 |       "type": "long"
24 |     },
25 |     "group_name": {
26 |       "fielddata_frequency_filter": {
27 |         "min": 0.01,
28 |         "max": 1,
29 |         "min_segment_size": 50
30 |       },
31 |       "fielddata": true,
32 |       "type": "text"
33 |     },
34 |     "reply_to_msg_id": {
35 |       "type": "long"
36 |     },
37 |     "has_media": {
38 |       "type": "boolean"
39 |     },
40 |     "raw": {
41 |       "fielddata_frequency_filter": {
42 |         "min": 0.01,
43 |         "max": 1,
44 |         "min_segment_size": 50
45 |       },
46 |       "fielddata": true,
47 |       "type": "text"
48 |     },
49 |     "rule": {
50 |       "fielddata_frequency_filter": {
51 |         "min": 0.01,
52 |         "max": 1,
53 |         "min_segment_size": 50
54 |       },
55 |       "fielddata": true,
56 |       "type": "text"
57 |     },
58 |     "to_id": {
59 |       "type": "long"
60 |     },
61 |     "message_id": {
62 |       "type": "text"
63 |     },
64 |     "source": {
65 |       "fielddata_frequency_filter": {
66 |         "min": 0.01,
67 |         "max": 1,
68 |         "min_segment_size": 50
69 |       },
70 |       "fielddata": true,
71 |       "type": "text"
72 |     },
73 |     "is_reply": {
74 |       "type": "boolean"
75 |     },
76 |     "found_on": {
77 |       "type": "text"
78 |     },
79 |     "group_id": {
80 |       "type": "long"
81 |     },
82 |     "media_mime_type": {
83 |       "fielddata_frequency_filter": {
84 |         "min": 0.01,
85 |         "max": 1,
86 |         "min_segment_size": 50
87 |       },
88 |       "fielddata": true,
89 |       "type": "text"
90 |     },
91 |     "time": {
92 |       "type": "date"
93 |     }
94 |   }
95 | }
96 | ```
97 | 


--------------------------------------------------------------------------------
/tests/resources/expected_generated_file_content/test_pandas_rolling_exporter_json_expected_15558987453_202311221005.data:
--------------------------------------------------------------------------------
1 | [{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"},{"date_time":"2023-11-22T10:22:00.000Z","raw_text":"Mocked Raw Text","group_name":"Channel 1972142108","group_id":1972142108,"from_id":1234,"to_id":9876,"reply_to_msg_id":5544,"message_id":5975883,"is_reply":false,"found_on":"UT FOUND 6"}]


--------------------------------------------------------------------------------
/tests/resources/expected_generated_file_content/test_pandas_rolling_exporter_csv_expected_15558987453_202311221007.data:
--------------------------------------------------------------------------------
 1 | date_time,raw_text,group_name,group_id,from_id,to_id,reply_to_msg_id,message_id,is_reply,found_on
 2 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
 3 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
 4 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
 5 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
 6 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
 7 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
 8 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
 9 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
10 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
11 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
12 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
13 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
14 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
15 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
16 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
17 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
18 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
19 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
20 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
21 | 2023-11-22 10:07:40.000101,Mocked Raw Text,Channel 1972142108,1972142108,1234,9876,5544,5975883,False,UT FOUND 6
22 | 


--------------------------------------------------------------------------------
/TEx/modules/telegram_groups_list.py:
--------------------------------------------------------------------------------
 1 | """Telegram Group List."""
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | from configparser import ConfigParser
 6 | from typing import Dict, List, cast
 7 | 
 8 | from TEx.core.base_module import BaseModule
 9 | from TEx.database.telegram_group_database import TelegramGroupDatabaseManager
10 | from TEx.models.database.telegram_db_model import TelegramGroupOrmEntity
11 | 
12 | logger = logging.getLogger('TelegramExplorer')
13 | 
14 | 
15 | class TelegramGroupList(BaseModule):
16 |     """List all Groups on Telegram Account."""
17 | 
18 |     async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool:
19 |         """
20 |         Abstract Method for Module Activation Function.
21 | 
22 |         :return:
23 |         """
24 |         return cast(bool, args['list_groups'])
25 | 
26 |     async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None:
27 |         """Execute Module."""
28 |         if not await self.can_activate(config, args, data):
29 |             logger.debug('\t\tModule is Not Enabled...')
30 |             return
31 | 
32 |         # Check Data Dict
33 |         if 'groups' not in data:
34 |             data['groups'] = {}
35 | 
36 |         if 'members' not in data:
37 |             data['members'] = {}
38 | 
39 |         # Get all Groups from DB
40 |         db_groups: List[TelegramGroupOrmEntity] = TelegramGroupDatabaseManager.get_all_by_phone_number(
41 |             config['CONFIGURATION']['phone_number'])
42 |         logger.info(f'\t\tFound {len(db_groups)} Groups')
43 | 
44 |         # Get the Bigger Username Size
45 |         max_username_size: int = max(
46 |             [len(item.group_username) if item.group_username is not None else 0 for item in db_groups],
47 |             )
48 | 
49 |         # Get the Bigger Title Size
50 |         max_title_size: int = max(
51 |             [len(item.title) if item.title is not None else 0 for item in db_groups],
52 |             )
53 | 
54 |         # Print Groups
55 |         logger.info(f'\t\tID       \t{"Username".ljust(max_username_size)}\t{"Title".ljust(max_title_size)}')
56 |         for group in db_groups:
57 |             formatted_username: str = group.group_username.ljust(max_username_size) if group.group_username is not None else 'UNDEFINED'.ljust(max_username_size)
58 |             formatted_title: str = group.title.ljust(max_title_size) if group.title is not None else 'UNDEFINED'.ljust(max_title_size)
59 |             logger.info(f'\t\t{group.id}\t{formatted_username}\t{formatted_title}')
60 | 


--------------------------------------------------------------------------------
/docs/configuration/ocr.md:
--------------------------------------------------------------------------------
 1 | # Configuration - OCR
 2 | 
 3 | Using Tesseract, Telegram Explorer does OCR and extract all texts from any downloaded images.
 4 | 
 5 | By default, Tesseract comes with 2 languages, English and OSD, but you can install additional languages as you wish.
 6 | 
 7 | 
 8 | !!! warning "OCR Results"
 9 |     
10 |     Remember, OCR are not magical thing and the results may vary, especially in the wild enviroment like analize any, uncontrolled, multiple sources, unstandarized, downloaded images from any kind of Telegram groups.
11 | 
12 | ```ini
13 | [OCR]
14 | enabled=true
15 | type=tesseract
16 | 
17 | [OCR.TESSERACT]
18 | tesseract_cmd=/path/to/tesseract/cmd
19 | language=eng
20 | ```
21 | 
22 | * **enabled** > Required - Enable/Disable OCR Feature (true = enable / false = disable) 
23 | * **type** > Required - Engine Type (fixed=tesseract)
24 | * **tesseract_cmd** > Required - Path to Tesseract CMD
25 | * **language** > Required - Tesseract Language, multiple Languages supported (Ex: eng+por)
26 | 
27 | ## OCR Text
28 | 
29 | All extracted content is combined with the original content of the messages, so Telegram Explorer's search and notification mechanisms work seamlessly.
30 | 
31 | Here's a real message example:
32 | 
33 | ```
34 | Yeah, we got compromised by APT29, but luckily MalwareBytes™ FREE AV 
35 | stopped the infection in their tracks! 
36 | 
37 | To be extra safe, we swung by the local Hotel and used their 
38 | WiFi to install it.
39 | 
40 | ====OCR CONTENT====
41 | 
42 | Malwarebytes 4.0
43 | Premium
44 | Real-Time Protectin
45 | 
46 | My Computer Global
47 | 
48 | 17 total
49 | 
50 | Malicious sites 2
51 | Malware PUPs 3
52 | Ransomware 1
53 | Explits 9
54 | ```
55 | 
56 | 
57 | 
58 | 
59 | 
60 | ## Installing Tesseract
61 | 
62 | * **Linux Users**: Follow the "Installation" procedures at [https://tesseract-ocr.github.io/tessdoc/Installation.html](https://tesseract-ocr.github.io/tessdoc/Installation.html)
63 | * **Windows Users**: Get and Install from [https://github.com/UB-Mannheim/tesseract/wiki](https://github.com/UB-Mannheim/tesseract/wiki)
64 | 
65 | ## Adding New Languages
66 | 
67 | Installing new languages are simple as download trained data for the new language and copy the downloaded file to **tessdata** folder into Tesseract installation folder.
68 | 
69 | To obtain the languages, access [https://github.com/tesseract-ocr/tessdata](https://github.com/tesseract-ocr/tessdata)
70 | 
71 | As an example, that are my *tessdata* directory:
72 | 
73 | ![ocr_tensorflow_tessdata_folder.png](../media/ocr_tensorflow_tessdata_folder.png)


--------------------------------------------------------------------------------
/docs/how_use/usage_load_groups.md:
--------------------------------------------------------------------------------
 1 | # Update Groups List
 2 | 
 3 | Despite the fact that the Telegram Explorer performs automatic group synchronization every time when a new group/chat is detected, the automatic system only register the group inside database.
 4 | 
 5 | The Group Load command perform a full group synchronization, including all information about te group (name, pictures and full members list, members photos, etc).
 6 | 
 7 | **Full Command:**
 8 | 
 9 | ```bash
10 | python3 -m TEx load_groups --config CONFIGURATION_FILE_PATH --refresh_profile_photos
11 | ```
12 | 
13 | **Basic Command:**
14 | 
15 | ```bash
16 | python3 -m TEx load_groups --config CONFIGURATION_FILE_PATH
17 | ```
18 | 
19 | **Parameters**
20 | 
21 |   * **config** > Required - Created Configuration File Path
22 |   * **refresh_profile_photos** > Optional - If present, forces the Download and Update all Channels Members Profile Photo
23 | 
24 | *Output Example:*
25 | ```bash
26 | TEx - Telegram Explorer
27 | Version 0.2.12
28 | By: Th3 0bservator
29 | 
30 | 2023-10-01 20:37:14,514 - INFO - [*] Loading Configurations:
31 | 2023-10-01 20:37:14,514 - INFO - [*] Installed Modules:
32 | 2023-10-01 20:37:14,514 - INFO - 	data_structure_handler.py
33 | 2023-10-01 20:37:14,514 - INFO - 	database_handler.py
34 | 2023-10-01 20:37:14,515 - INFO - 	execution_configuration_handler.py
35 | 2023-10-01 20:37:14,515 - INFO - 	telegram_connection_manager.py
36 | 2023-10-01 20:37:14,515 - INFO - 	telegram_groups_list.py
37 | 2023-10-01 20:37:14,515 - INFO - 	telegram_groups_scrapper.py
38 | 2023-10-01 20:37:14,515 - INFO - 	telegram_maintenance
39 | 2023-10-01 20:37:14,515 - INFO - 	telegram_messages_listener.py
40 | 2023-10-01 20:37:14,515 - INFO - 	telegram_messages_scrapper.py
41 | 2023-10-01 20:37:14,515 - INFO - 	telegram_report_generator
42 | 2023-10-01 20:37:14,515 - INFO - 	telegram_stats_generator.py
43 | 2023-10-01 20:37:14,525 - INFO - [*] Loading Execution Configurations:
44 | 2023-10-01 20:37:14,525 - INFO - 	[+] data_structure_handler.DataStructureHandler
45 | 2023-10-01 20:37:14,813 - INFO - [*] Executing Pipeline:
46 | 2023-10-01 20:37:21,361 - INFO - 	[+] telegram_groups_scrapper.TelegramGroupScrapper
47 | 2023-10-01 20:37:21,364 - INFO - 		Enumerating Groups
48 | 2023-10-01 20:37:22,169 - INFO - 		Processing "My Group 1 (1769587896)" Members and Group Profile Picture
49 | 2023-10-01 20:37:27,782 - INFO - 		Processing "TeX Beta Group (1259876541)" Members and Group Profile Picture
50 | 2023-10-01 20:37:27,859 - INFO - [*] Executing Termination:
51 | 2023-10-01 20:07:27,958 - INFO - 	[+] state_file_handler.SaveStateFileHandler
52 | ```


--------------------------------------------------------------------------------
/docs/how_use/usage_download_messages.md:
--------------------------------------------------------------------------------
 1 | # Download Messages (Download since first message for each group)
 2 | 
 3 | Unlike the process of listening to messages, this command downloads messages from Telegram groups from the first message. Essentially downloading every message, every media (if 'ignore_media' was not present).
 4 | 
 5 | We can compare this command with any scrapper.
 6 | 
 7 | > 🚨🚨🚨🚨🚨 **CRITICAL INFORMATION**🚨🚨🚨🚨🚨 </br></br> Download all messages from all groups can lead your account to be banned. So, use carefully only and if necessary.</br></br>**Note:** Extremely recommended to use with the groups filter.
 8 | 
 9 | **Full Command:**
10 | ```bash
11 | python3 -m TEx download_messages --config CONFIGURATION_FILE_PATH --ignore_media --group_id 1234,5678
12 | ```
13 | 
14 | **Basic Command:**
15 | ```bash
16 | python3 -m TEx download_messages --config CONFIGURATION_FILE_PATH
17 | ```
18 | 
19 | **Parameters**
20 | 
21 |   * **config** > Required - Created Configuration File Path
22 |   * **ignore_media** > Optional - If present, don't Download any Media
23 |   * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's
24 | 
25 | 
26 | *Output Example:*
27 | ```bash
28 | 2023-10-01 21:01:35,543 - INFO - [*] Loading Configurations:
29 | 2023-10-01 21:01:35,543 - INFO - [*] Installed Modules:
30 | 2023-10-01 21:01:35,543 - INFO - 	data_structure_handler.py
31 | 2023-10-01 21:01:35,543 - INFO - 	database_handler.py
32 | 2023-10-01 21:01:35,543 - INFO - 	execution_configuration_handler.py
33 | 2023-10-01 21:01:35,543 - INFO - 	telegram_connection_manager.py
34 | 2023-10-01 21:01:35,544 - INFO - 	telegram_groups_list.py
35 | 2023-10-01 21:01:35,544 - INFO - 	telegram_groups_scrapper.py
36 | 2023-10-01 21:01:35,544 - INFO - 	telegram_maintenance
37 | 2023-10-01 21:01:35,544 - INFO - 	telegram_messages_listener.py
38 | 2023-10-01 21:01:35,544 - INFO - 	telegram_messages_scrapper.py
39 | 2023-10-01 21:01:35,544 - INFO - 	telegram_report_generator
40 | 2023-10-01 21:01:35,544 - INFO - 	telegram_stats_generator.py
41 | 2023-10-01 21:01:35,894 - INFO - [*] Executing Pipeline:
42 | 2023-10-01 21:01:42,659 - INFO - 	[+] telegram_messages_scrapper.TelegramGroupMessageScrapper
43 | 2023-10-01 21:01:42,706 - INFO - 		Found 2 Groups
44 | 2023-10-01 21:01:43,468 - INFO - 		Download Messages from "My Group 1" > Last Offset: 3936
45 | 2023-10-01 21:01:54,468 - INFO - 		Download Messages from "TeX Beta Group" > Last Offset: 158742
46 | 2023-10-01 20:37:27,859 - INFO - [*] Executing Termination:
47 | 2023-10-01 20:07:27,958 - INFO - 	[+] state_file_handler.SaveStateFileHandler
48 | ```


--------------------------------------------------------------------------------
/TEx/exporter/exporter_engine.py:
--------------------------------------------------------------------------------
 1 | """Exporter Engine."""
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | from configparser import ConfigParser
 6 | from typing import Dict, List
 7 | 
 8 | from TEx.exporter.exporter_base import BaseExporter
 9 | from TEx.exporter.pandas_rolling_exporter import PandasRollingExporter
10 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity
11 | 
12 | logger = logging.getLogger('TelegramExplorer')
13 | 
14 | 
15 | class ExporterEngine:
16 |     """Primary Export Engine."""
17 | 
18 |     def __init__(self) -> None:
19 |         """Initialize Exporter Engine."""
20 |         self.exporters: Dict = {}
21 | 
22 |     def __load_exporters(self, config: ConfigParser) -> None:
23 |         """Load all Registered Exporters."""
24 |         registered_exporters: List[str] = [item for item in config.sections() if 'EXPORTER.' in item]
25 | 
26 |         for register in registered_exporters:
27 |             if 'ROLLING_PANDAS' in register:
28 | 
29 |                 exporter: PandasRollingExporter = PandasRollingExporter()
30 |                 exporter.configure(config=config[register], source=config['CONFIGURATION']['phone_number'])
31 | 
32 |                 self.exporters.update({
33 |                     register: {'instance': exporter},
34 |                     })
35 | 
36 |     def configure(self, config: ConfigParser) -> None:
37 |         """Configure Finder."""
38 |         self.__load_exporters(config)
39 | 
40 |     async def run(self, exporters: List[str], entity: FinderNotificationMessageEntity, rule_id: str) -> None:
41 |         """Dispatch all Exporting Processes."""
42 |         if len(exporters) == 0:
43 |             return
44 | 
45 |         for dispatcher_name in exporters:
46 | 
47 |             target_exporter: BaseExporter = self.exporters[dispatcher_name]['instance']
48 | 
49 |             try:
50 |                 await target_exporter.run(entity=entity, rule_id=rule_id)
51 | 
52 |             except Exception as _ex:  # Yes, Catch All
53 |                 logging.exception('Unable to Export Data')
54 | 
55 |     async def shutdown(self) -> None:
56 |         """Shutdown all Exporters and Flush all to Disk."""
57 |         for dispatcher_name in self.exporters:
58 | 
59 |             target_exporter: BaseExporter = self.exporters[dispatcher_name]['instance']
60 | 
61 |             try:
62 |                 target_exporter.shutdown()
63 | 
64 |             except Exception as _ex:  # Yes, Catch All
65 |                 logging.exception(f'Unable to Shutdown the "{dispatcher_name}" Exporter Gracefully. Data may be lost.')
66 | 


--------------------------------------------------------------------------------
/docs/configuration/scenario_based_examples.md:
--------------------------------------------------------------------------------
 1 | # Scenario-Based Configuration File Examples
 2 | 
 3 | ### Sent All Messages to Elasticsearch
 4 | ```ini
 5 | [CONFIGURATION]
 6 | api_id=12555896
 7 | api_hash=dead1f29db5d1fa56cc42757acbabeef
 8 | phone_number=15552809753
 9 | data_path=/usr/home/tex_data/
10 | device_model=AMD64
11 | timeout=30
12 | 
13 | [FINDER]
14 | enabled=true
15 | 
16 | [FINDER.RULE.CatchAll]
17 | type=all
18 | notifier=NOTIFIER.ELASTIC_SEARCH.GENERAL
19 | 
20 | [NOTIFIER.ELASTIC_SEARCH.GENERAL]
21 | address=https://localhost:9200
22 | api_key=bHJtVEg0c0JnNkwwTnYtFFDEADlo6NS1rXzd6NVFSUmEtQ21mQldiUjEwUQ==
23 | verify_ssl_cert=False
24 | index_name=index-name
25 | pipeline_name=ent-search-generic-ingestion
26 | ```
27 | 
28 | 
29 | ### Export All Messages as CSV File
30 | ```ini
31 | [CONFIGURATION]
32 | api_id=12555896
33 | api_hash=dead1f29db5d1fa56cc42757acbabeef
34 | phone_number=15552809753
35 | data_path=/usr/home/tex_data/
36 | device_model=AMD64
37 | timeout=30
38 | 
39 | [FINDER]
40 | enabled=true
41 | 
42 | [FINDER.RULE.CatchAll]
43 | type=all
44 | exporter=EXPORTER.ROLLING_PANDAS.EXPORT_ALL_MESSAGES
45 | 
46 | [EXPORTER.ROLLING_PANDAS.EXPORT_ALL_MESSAGES]
47 | file_root_path=/path/to/export/folder/
48 | rolling_every_minutes=5
49 | fields=date_time,raw_text,group_name,group_id,from_id,to_id,reply_to_msg_id,message_id,is_reply,found_on
50 | use_header=true
51 | output_format=json
52 | keep_last_files=20
53 | ```
54 | 
55 | ### Sent Signals to Elasticsearch and Discord
56 | ```ini
57 | [CONFIGURATION]
58 | api_id=12555896
59 | api_hash=dead1f29db5d1fa56cc42757acbabeef
60 | phone_number=15552809753
61 | data_path=/usr/home/tex_data/
62 | device_model=AMD64
63 | timeout=30
64 | 
65 | [FINDER]
66 | enabled=true
67 | 
68 | [NOTIFIER.DISCORD.SIGNALS_HOOK]
69 | webhook=https://discord.com/api/webhooks/1128765187657681875/foobarqOMFp_457EDs2mbeefNPPeqJnBZZdfaubQvOKIUHYzfdeadZd5aqGX6FmCmbNjv
70 | prevent_duplication_for_minutes=0
71 | media_attachments_enabled=true
72 | media_attachments_max_size_bytes=10000000
73 | 
74 | [NOTIFIER.ELASTIC_SEARCH.SIGNALS]
75 | address=https://localhost:9200
76 | api_key=bHJtVEg0c0JnNkwwTnYtFFDEADlo6NS1rXzd6NVFSUmEtQ21mQldiUjEwUQ==
77 | verify_ssl_cert=False
78 | index_name=index-name-for-signals
79 | pipeline_name=ent-search-generic-ingestion
80 | 
81 | [SIGNALS]
82 | enabled=true
83 | keep_alive_interval=300
84 | 
85 | keep_alive_notifer=NOTIFIER.ELASTIC_SEARCH.SIGNALS
86 | initialization_notifer=NOTIFIER.ELASTIC_SEARCH.SIGNALS
87 | shutdown_notifer=NOTIFIER.ELASTIC_SEARCH.SIGNALS
88 | new_group_notifer=NOTIFIER.DISCORD.SIGNALS_HOOK,NOTIFIER.ELASTIC_SEARCH.SIGNALS
89 | ```
90 | 


--------------------------------------------------------------------------------
/docs/notification/signals.md:
--------------------------------------------------------------------------------
 1 | # Notification System - Signals
 2 | 
 3 | Signals are the way that Telegram Explorer report some internal behaviors and events.
 4 | 
 5 | Currently, there are 4 unique signals:
 6 | 
 7 |   - **Initialization** - Happens everytime the Telegram Explorer starts the 'listen' command
 8 |   - **Keep Alive** - Sent every (keep_alive_interval) seconds while the Telegram Explorer are running the 'listen' command
 9 |   - **New Group** - Happen everytime when the 'listen' command receive a new group for first time
10 |   - **Shutdown** - Happens everytime the Telegram Explorer finish the 'listen' command
11 | 
12 | **Configuration Spec:**
13 | 
14 | You are able to fully enable/disable the signal system and have a fine control on each signal. 
15 | 
16 | Also, Signals works like any notification from Telegram Explorer and you can configure each signal individually to be sent on any supported Notification Engines.
17 | 
18 | !!! info "Use Separated Notifiers"
19 |     
20 |     Although you can use the same notifiers that you use for finder mechanisms, we strong recommend to create a dedicated configuration to use the signals, specially if you are going to use on Elastic Search, because Telegram Explorer have a new and dedicated Index Template for this.
21 | 
22 | **Elastic Search Signals Index Template:** [Check the Template Here](notification_elasticsearch_signals_template.md)
23 | 
24 | **Parameters:**
25 | 
26 |   * **enabled** > Required - Enable/Disable the Signals System
27 |   * **keep_alive_interval** > Required - Time (in seconds) that the system goes to sent the KEEP-ALIVE signal
28 |   * **keep_alive_notifer** > Optional - Name of notifiers to be used to receive the KEEP-ALIVE signal (comma separated). Supress to Disable this Signal
29 |   * **initialization_notifer** > Optional - Name of notifiers to be used to receive the INITIALIZATION signal (comma separated). Supress to Disable this Signal
30 |   * **shutdown_notifer** > Optional - Name of notifiers to be used to receive the SHUTDOWN signal (comma separated). Supress to Disable this Signal
31 |   * **new_group_notifer** > Optional - Name of notifiers to be used to receive the NEW-GROUP signal (comma separated). Supress to Disable this Signal
32 | 
33 | 
34 | **Changes on Configuration File**
35 | ```ini
36 | [SIGNALS]
37 | enabled=true
38 | keep_alive_interval=300
39 | 
40 | keep_alive_notifer=NOTIFIER.ELASTIC_SEARCH.ELASTIC_INDEX_01
41 | initialization_notifer=NOTIFIER.ELASTIC_SEARCH.ELASTIC_INDEX_01,NOTIFIER.DISCORD.MY_HOOK_2
42 | shutdown_notifer=NOTIFIER.ELASTIC_SEARCH.ELASTIC_INDEX_01,NOTIFIER.DISCORD.MY_HOOK_2
43 | new_group_notifer=NOTIFIER.DISCORD.MY_HOOK_2 
44 | ```
45 | 


--------------------------------------------------------------------------------
/docs/notification/notification_discord.md:
--------------------------------------------------------------------------------
 1 | # Notification System - Discord Hook
 2 | 
 3 | Telegram Explorer allows to send notifications through Discord WebHooks. Each WebHook is linked to a specific channel.
 4 | 
 5 | This way you can configure many notification hooks, one for each need or category you like.
 6 | 
 7 | Every Notification is defined in the configuration files.
 8 | 
 9 | **Configuration Spec:**
10 | 
11 | For each notification hook you must set a configuration using the default name schema *NOTIFIER.DISCORD.<HOOK_NAME\>*
12 | 
13 | **Parameters:**
14 | 
15 |   * **webhook** > Required - Discord Webhook URI
16 |   * **prevent_duplication_for_minutes** > Required - Time (in minutes) that the system keep track of messages sent to Discord servers to prevent others message with same content to be sent to the webhook. If you don't want to use this feature, just set the parameter to 0.
17 |   * **timeout_seconds** > Optional - Timeout (in seconds) that waits to send the message. If the message sent take more that time, the message will be ignored.
18 |     * Default: 30
19 |   * **media_attachments_enabled** > Optional - Enable/Disable the behavior for sending downloaded medias on messages that have been reported. 
20 |     * Default: false
21 |   * **media_attachments_max_size_bytes** > Optional - Set the max size in bytes to send the medias on the notifications.
22 |     * Default: 10000000
23 | 
24 | =true
25 | media_attachments_max_size_bytes=10000000
26 | **Changes on Configuration File**
27 | ```ini
28 | [NOTIFIER.DISCORD.MY_HOOK_1]
29 | webhook=https://discord.com/api/webhooks/1157896186751897357/o7foobar4txvAvKSdeadHiI-9XYeXaGlQtd-5PtrrX_eCE0XElWktpPqjrZ0KbeefPtQC
30 | prevent_duplication_for_minutes=240
31 | timeout_seconds=30
32 | media_attachments_enabled=true
33 | media_attachments_max_size_bytes=10000000
34 | 
35 | [NOTIFIER.DISCORD.MY_HOOK_2]
36 | webhook=https://discord.com/api/webhooks/1128765187657681875/foobarqOMFp_4tM2ic2mbeefNPOZqJnBZZdfaubQv2vJgbYzfdeadZd5aqGX6FmCmbNjX
37 | prevent_duplication_for_minutes=240
38 | media_attachments_enabled=false
39 | media_attachments_max_size_bytes=10000000
40 | 
41 | [NOTIFIER.DISCORD.MY_HOOK_3]
42 | webhook=https://discord.com/api/webhooks/1256789875462124045/bQ9TZqOzgA05PLVu8E2LU3N5foobarFU8-0nQbeefP5oIgAUOlydeadf7Uc19Hs00OJQ
43 | prevent_duplication_for_minutes=60
44 | timeout_seconds=30
45 | media_attachments_enabled=true
46 | media_attachments_max_size_bytes=25000000
47 | 
48 | [NOTIFIER.DISCORD.MY_HOOK_4]
49 | webhook=https://discord.com/api/webhooks/1487651987651004895/mR0v3zOywH3Z5HvdeadrGEqqndkcYepgCM-Q6foobardjAMXAEbeefuA_F7-h5JcBM4RT
50 | prevent_duplication_for_minutes=240
51 | media_attachments_enabled=true
52 | ```
53 | 


--------------------------------------------------------------------------------
/tests/modules/test_input_args_handler.py:
--------------------------------------------------------------------------------
 1 | """Input Args Handler Tests."""
 2 | 
 3 | import asyncio
 4 | import sys
 5 | import unittest
 6 | from configparser import ConfigParser
 7 | from typing import Dict
 8 | 
 9 | from TEx.modules.input_args_handler import InputArgsHandler
10 | 
11 | 
12 | class InputArgsHandlerTest(unittest.TestCase):
13 | 
14 |     def setUp(self) -> None:
15 | 
16 |         self.config = ConfigParser()
17 |         self.config.read('../../config.ini')
18 | 
19 |     def test_report_commands_complete(self):
20 | 
21 |         sys.argv = [
22 |             '__main__.py',
23 |             'report',
24 |             '--config', '/usr/home/config_file.config',
25 |             '--order_desc',
26 |             '--limit_days', '8',
27 |             '--filter', 'filter1, "Filter 2", Filter3',
28 |             '--report_folder', 'reports/ut01',
29 |             '--around_messages', '7',
30 |             '--group_id', '99,5,78,56987'
31 |             ]
32 | 
33 |         target: InputArgsHandler = InputArgsHandler()
34 |         args: Dict = {}
35 |         data: Dict = {}
36 | 
37 |         loop = asyncio.get_event_loop()
38 |         loop.run_until_complete(
39 |             target.run(
40 |                 config=self.config,
41 |                 args=args,
42 |                 data=data
43 |             )
44 |         )
45 | 
46 |         self.assertEqual('/usr/home/config_file.config', args['config'])
47 |         self.assertTrue(args['order_desc'])
48 |         self.assertEqual('filter1, "Filter 2", Filter3', args['filter'])
49 |         self.assertEqual(8, int(args['limit_days']))
50 |         self.assertEqual('reports/ut01', args['report_folder'])
51 |         self.assertEqual(7, int(args['around_messages']))
52 |         self.assertEqual('99,5,78,56987', args['group_id'])
53 | 
54 |     def test_report_commands_default(self):
55 | 
56 |         sys.argv = [
57 |             '__main__.py',
58 |             'report',
59 |             '--config', '/usr/home/config_file2.config',
60 |         ]
61 | 
62 |         target: InputArgsHandler = InputArgsHandler()
63 |         args: Dict = {}
64 |         data: Dict = {}
65 | 
66 |         loop = asyncio.get_event_loop()
67 |         loop.run_until_complete(
68 |             target.run(
69 |                 config=self.config,
70 |                 args=args,
71 |                 data=data
72 |             )
73 |         )
74 | 
75 |         self.assertEqual('/usr/home/config_file2.config', args['config'])
76 |         self.assertFalse(args['order_desc'])
77 |         self.assertIsNone(args['filter'])
78 |         self.assertEqual(3650, int(args['limit_days']))
79 |         self.assertEqual('reports', args['report_folder'])
80 |         self.assertEqual(1, int(args['around_messages']))
81 |         self.assertEqual('*', args['group_id'])
82 | 


--------------------------------------------------------------------------------
/TEx/notifier/notifier_engine.py:
--------------------------------------------------------------------------------
 1 | """Notifier Modules."""
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | from configparser import ConfigParser
 6 | from typing import Dict, List, Union
 7 | 
 8 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity
 9 | from TEx.models.facade.signal_notification_model import SignalNotificationEntityModel
10 | from TEx.notifier.discord_notifier import DiscordNotifier
11 | from TEx.notifier.elastic_search_notifier import ElasticSearchNotifier
12 | from TEx.notifier.notifier_base import BaseNotifier
13 | 
14 | logger = logging.getLogger('TelegramExplorer')
15 | 
16 | 
17 | class NotifierEngine:
18 |     """Primary Notification Engine."""
19 | 
20 |     def __init__(self) -> None:
21 |         """Initialize Finder Engine."""
22 |         self.notifiers: Dict = {}
23 | 
24 |     def __load_notifiers(self, config: ConfigParser) -> None:
25 |         """Load all Registered Notifiers."""
26 |         registered_notifiers: List[str] = [item for item in config.sections() if 'NOTIFIER.' in item]
27 | 
28 |         for register in registered_notifiers:
29 |             if 'DISCORD' in register:
30 | 
31 |                 notifier: DiscordNotifier = DiscordNotifier()
32 |                 notifier.configure(url=config[register]['webhook'], config=config[register])
33 | 
34 |                 self.notifiers.update({
35 |                     register: {'instance': notifier},
36 |                     })
37 | 
38 |             if 'ELASTIC_SEARCH' in register:
39 |                 notifier_es: ElasticSearchNotifier = ElasticSearchNotifier()
40 |                 notifier_es.configure(config=config[register])
41 | 
42 |                 self.notifiers.update({
43 |                     register: {'instance': notifier_es},
44 |                     })
45 | 
46 |     def configure(self, config: ConfigParser) -> None:
47 |         """Configure Finder."""
48 |         self.__load_notifiers(config)
49 | 
50 |     async def run(self, notifiers: List[str], entity: Union[FinderNotificationMessageEntity, SignalNotificationEntityModel], rule_id: str, source: str) -> None:
51 |         """Dispatch all Notifications.
52 | 
53 |         :param notifiers:
54 |         :param message: Message Object
55 |         :param rule_id: Triggered Rule ID
56 |         :param source: Source Account/Phone Number
57 |         :return:
58 |         """
59 |         if len(notifiers) == 0:
60 |             return
61 | 
62 |         for dispatcher_name in notifiers:
63 | 
64 |             target_notifier: BaseNotifier = self.notifiers[dispatcher_name]['instance']
65 | 
66 |             try:
67 |                 await target_notifier.run(entity=entity, rule_id=rule_id, source=source)
68 | 
69 |             except Exception:  # Yes, Catch All
70 |                 logging.exception('Unable to Send Notification')
71 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Other
  3 | data/
  4 | tests/data/
  5 | tests/_data/
  6 | reports/
  7 | .idea
  8 | .tox
  9 | session.session
 10 | session.session-journal
 11 | assets/chrome_driver.zip
 12 | assets/chromedriver.exe
 13 | assets/chromedriver.so
 14 | assets/chromedriver
 15 | poetry.lock
 16 | compiled_docs/
 17 | 
 18 | # Byte-compiled / optimized / DLL files
 19 | __pycache__/
 20 | *.py[cod]
 21 | *$py.class
 22 | 
 23 | # C extensions
 24 | *.so
 25 | 
 26 | # Distribution / packaging
 27 | .Python
 28 | build/
 29 | develop-eggs/
 30 | dist/
 31 | downloads/
 32 | eggs/
 33 | .eggs/
 34 | lib/
 35 | lib64/
 36 | parts/
 37 | sdist/
 38 | var/
 39 | wheels/
 40 | pip-wheel-metadata/
 41 | share/python-wheels/
 42 | *.egg-info/
 43 | .installed.cfg
 44 | *.egg
 45 | MANIFEST
 46 | 
 47 | # PyInstaller
 48 | #  Usually these files are written by a python script from a template
 49 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 50 | *.manifest
 51 | *.spec
 52 | 
 53 | # Installer logs
 54 | pip-log.txt
 55 | pip-delete-this-directory.txt
 56 | 
 57 | # Unit test / coverage reports
 58 | htmlcov/
 59 | .tox/
 60 | .nox/
 61 | .coverage
 62 | .coverage.*
 63 | .cache
 64 | nosetests.xml
 65 | coverage.xml
 66 | *.cover
 67 | *.py,cover
 68 | .hypothesis/
 69 | .pytest_cache/
 70 | 
 71 | # Translations
 72 | *.mo
 73 | *.pot
 74 | 
 75 | # Django stuff:
 76 | *.log
 77 | local_settings.py
 78 | db.sqlite3
 79 | db.sqlite3-journal
 80 | 
 81 | # Flask stuff:
 82 | instance/
 83 | .webassets-cache
 84 | 
 85 | # Scrapy stuff:
 86 | .scrapy
 87 | 
 88 | # Sphinx documentation
 89 | docs/_build/
 90 | 
 91 | # PyBuilder
 92 | target/
 93 | 
 94 | # Jupyter Notebook
 95 | .ipynb_checkpoints
 96 | 
 97 | # IPython
 98 | profile_default/
 99 | ipython_config.py
100 | 
101 | # pyenv
102 | .python-version
103 | 
104 | # pipenv
105 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
106 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
107 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
108 | #   install all needed dependencies.
109 | #Pipfile.lock
110 | 
111 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
112 | __pypackages__/
113 | 
114 | # Celery stuff
115 | celerybeat-schedule
116 | celerybeat.pid
117 | 
118 | # SageMath parsed files
119 | *.sage.py
120 | 
121 | # Environments
122 | .env
123 | .venv
124 | env/
125 | venv/
126 | ENV/
127 | env.bak/
128 | venv.bak/
129 | 
130 | # Spyder project settings
131 | .spyderproject
132 | .spyproject
133 | 
134 | # Rope project settings
135 | .ropeproject
136 | 
137 | # mkdocs documentation
138 | /site
139 | 
140 | # mypy
141 | .mypy_cache/
142 | .dmypy.json
143 | dmypy.json
144 | 
145 | # Pyre type checker
146 | .pyre/
147 | /tests/_report/
148 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
  1 | [tox]
  2 | envlist=py38,quality,coverage,test,build,deploy,docs
  3 | skipsdist=True
  4 | 
  5 | 
  6 | [testenv:quality]
  7 | allowlist_externals =   poetry
  8 |                         mypy
  9 |                         ruff
 10 | changedir = .
 11 | deps =
 12 |     -rrequirements.txt
 13 | 
 14 | commands =
 15 |     poetry lock --no-update
 16 |     poetry install -v --sync
 17 | 
 18 |     ruff check ./TEx
 19 | 
 20 |     mypy --config-file mypy.ini
 21 | 
 22 | [testenv:coverage]
 23 | allowlist_externals = poetry
 24 | changedir = tests
 25 | deps =
 26 |     -rrequirements.txt
 27 | 
 28 | commands =
 29 |     poetry lock --no-update
 30 |     poetry install -v --sync
 31 |     poetry run coverage erase
 32 |     poetry run coverage run --source='../TEx' -m pytest . {posargs} --color=yes
 33 |     poetry run coverage report --rcfile=../coverage.rc
 34 |     poetry run coverage html --rcfile=../coverage.rc --fail-under=85
 35 | 
 36 | 
 37 | [testenv]
 38 | allowlist_externals = poetry
 39 | changedir = tests
 40 | deps =
 41 |     -rrequirements.txt
 42 | 
 43 | commands =
 44 |     poetry lock --no-update
 45 |     poetry install -v --sync
 46 |     poetry run pytest . {posargs} --verbose --color=yes
 47 | 
 48 | 
 49 | [testenv:build]
 50 | allowlist_externals =   cp
 51 |                         rm
 52 | skip_install = True
 53 | changedir = .
 54 | deps =
 55 |     -rrequirements.txt
 56 | 
 57 | commands =
 58 |     cp README.md TEx
 59 |     cp pyproject.toml TEx
 60 | 
 61 |     poetry lock --no-update
 62 |     poetry install --without dev -v --sync
 63 | 	poetry build -v
 64 | 
 65 |     rm TEx/README.md
 66 |     rm TEx/pyproject.toml
 67 | 
 68 | 
 69 | [testenv:deploy]
 70 | allowlist_externals =   cp
 71 |                         rm
 72 | skip_install = True
 73 | changedir = .
 74 | 
 75 | deps =
 76 |     poetry==1.5.1
 77 | 
 78 | passenv =
 79 |     PYPI_DEPLOY_TOKEN
 80 | 
 81 | commands =
 82 |     cp README.md TEx
 83 |     cp pyproject.toml TEx
 84 | 
 85 |     poetry lock --no-update
 86 |     poetry install --without dev -v --sync
 87 | 	poetry config pypi-token.pypi {env:PYPI_DEPLOY_TOKEN}
 88 | 	poetry publish --build
 89 | 
 90 |     rm TEx/README.md
 91 |     rm TEx/pyproject.toml
 92 | 
 93 | [testenv:docs]
 94 | allowlist_externals = mkdocs
 95 | 
 96 | skip_install = True
 97 | changedir = .
 98 | 
 99 | deps =
100 |     poetry==1.5.1
101 | 
102 | commands =
103 |     poetry lock --no-update
104 |     poetry install -v --sync
105 | 	mkdocs build --clean --site-dir compiled_docs -v
106 | 
107 | [flake8]
108 | ignore=E501,D202,D401,D902,I100,I201,I202
109 | exclude=coverage,codequality,.git,__pycache__,build,dist,venv,.tox,data,assets,htmlcov,.idea,tests
110 | 
111 | max-complexity=15
112 | verbose=2
113 | count=True
114 | hang_closing=True
115 | hang-closing=True
116 | show_source=True
117 | show-source=True
118 | statistics=True
119 | jobs=6
120 | 


--------------------------------------------------------------------------------
/TEx/report_templates/default_index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |     <head>
 4 |         <meta charset="utf-8">
 5 |         <meta name="viewport" content="width=device-width, initial-scale=1">
 6 | 
 7 |         <!-- Bootstrap Styles -->
 8 |         <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha384-HSMxcRTRxnN+Bdg0JdbxYKrThecOKuH5zCYotlSAcp1+c8xmyTe9GYg1l9a69psu" crossorigin="anonymous">
 9 |         <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap-theme.min.css" integrity="sha384-6pzBo3FDv/PJ8r2KRkGHifhEocL+1X2rVCTTkUfGk7/0pbek5mMa1upzvWbrUbOZ" crossorigin="anonymous">
10 | 
11 |         <style>
12 |             .img_col {
13 |                 width: 51px;
14 |             }
15 | 
16 |             .b64_logo{
17 |                 display:block;
18 |                 width:50px;
19 |                 height:50px;
20 |             }
21 | 
22 |         </style>
23 |     </head>
24 | 
25 |     <body>
26 |         <div class="container" style="width: 99%">
27 |             <div class="row">
28 |                 <h1>TEx - Telegram Explorer ({{target_phone}})</h1>
29 |             </div>
30 |             <div class="row">
31 |                 <b>Report generated at</b> {{now}}
32 |                 <br><b>From</b> {{start}} <b>To</b> {{end}}
33 |                 <br><b>Groups:</b> {{groups_filter}}
34 |                 <br><b>Filtering:</b> {{words_filter}}
35 |             </div>
36 |             <div class="row">
37 |                 <table class="table table-striped table-bordered">
38 |                     <tr>
39 |                         <td colspan="2">Groups</td>
40 |                         <td>N. Messages</td>
41 |                     </tr>
42 |                     {% for group in groups %}
43 |                     <tr>
44 |                         <td class="img_col">
45 |                             <a href="result_{{group.group_username}}_{{group.id}}.html" target="_report">
46 |                                 <img class="b64_logo" id="base64image" src="data:image/jpeg;base64, {{group.photo_base64}}" />
47 |                             </a>
48 |                         </td>
49 |                         <td style="vertical-align: middle">
50 |                             <a href="result_{{group.group_username}}_{{group.id}}.html" style="color:black;" target="_report">
51 |                                 <b>{{group.title}}</b> - {{group.group_username}} ({{group.id}})
52 |                             </a>
53 |                         </td>
54 |                         <td style="vertical-align: middle">
55 |                             {{group.meta_message_count}}
56 |                         </td>
57 |                     </tr>
58 |                     {% endfor %}
59 |                 </table>
60 |             </div>
61 |         </div>
62 | 
63 |     </body>
64 | 
65 | </html>


--------------------------------------------------------------------------------
/tests/report_templates/default_index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |     <head>
 4 |         <meta charset="utf-8">
 5 |         <meta name="viewport" content="width=device-width, initial-scale=1">
 6 | 
 7 |         <!-- Bootstrap Styles -->
 8 |         <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha384-HSMxcRTRxnN+Bdg0JdbxYKrThecOKuH5zCYotlSAcp1+c8xmyTe9GYg1l9a69psu" crossorigin="anonymous">
 9 |         <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap-theme.min.css" integrity="sha384-6pzBo3FDv/PJ8r2KRkGHifhEocL+1X2rVCTTkUfGk7/0pbek5mMa1upzvWbrUbOZ" crossorigin="anonymous">
10 | 
11 |         <style>
12 |             .img_col {
13 |                 width: 51px;
14 |             }
15 | 
16 |             .b64_logo{
17 |                 display:block;
18 |                 width:50px;
19 |                 height:50px;
20 |             }
21 | 
22 |         </style>
23 |     </head>
24 | 
25 |     <body>
26 |         <div class="container" style="width: 99%">
27 |             <div class="row">
28 |                 <h1>TEx - Telegram Explorer ({{target_phone}})</h1>
29 |             </div>
30 |             <div class="row">
31 |                 <b>Report generated at</b> {{now}}
32 |                 <br><b>From</b> {{start}} <b>To</b> {{end}}
33 |                 <br><b>Groups:</b> {{groups_filter}}
34 |                 <br><b>Filtering:</b> {{words_filter}}
35 |             </div>
36 |             <div class="row">
37 |                 <table class="table table-striped table-bordered">
38 |                     <tr>
39 |                         <td colspan="2">Groups</td>
40 |                         <td>N. Messages</td>
41 |                     </tr>
42 |                     {% for group in groups %}
43 |                     <tr>
44 |                         <td class="img_col">
45 |                             <a href="result_{{group.group_username}}_{{group.id}}.html" target="_report">
46 |                                 <img class="b64_logo" id='base64image' src='data:image/jpeg;base64, {{group.photo_base64}}' />
47 |                             </a>
48 |                         </td>
49 |                         <td style="vertical-align: middle">
50 |                             <a href="result_{{group.group_username}}_{{group.id}}.html" style="color:black;" target="_report">
51 |                                 <b>{{group.title}}</b> - {{group.group_username}} ({{group.id}})
52 |                             </a>
53 |                         </td>
54 |                         <td style="vertical-align: middle">
55 |                             {{group.meta_message_count}}
56 |                         </td>
57 |                     </tr>
58 |                     {% endfor %}
59 |                 </table>
60 |             </div>
61 |         </div>
62 | 
63 |     </body>
64 | 
65 | </html>


--------------------------------------------------------------------------------
/TEx/core/temp_file.py:
--------------------------------------------------------------------------------
 1 | """Temp File Handle."""
 2 | 
 3 | from datetime import datetime
 4 | from typing import cast
 5 | 
 6 | import pytz
 7 | 
 8 | from TEx.database.db_manager import DbManager
 9 | from TEx.models.database.temp_db_models import TempDataOrmEntity
10 | 
11 | 
12 | class TempFileHandler:
13 |     """Temporary File Hander."""
14 | 
15 |     @staticmethod
16 |     def file_exist(path: str) -> bool:
17 |         """Return if a File Exists.
18 | 
19 |         :param path: File Path
20 |         :return:
21 |         """
22 |         return bool(DbManager.SESSIONS['temp'].query(TempDataOrmEntity).filter_by(path=path).count() > 0)
23 | 
24 |     @staticmethod
25 |     def read_file_text(path: str) -> str:
26 |         """Read All File Content.
27 | 
28 |         :param path: File Path
29 |         :return: File Content
30 |         """
31 |         entity: TempDataOrmEntity = cast(TempDataOrmEntity, DbManager.SESSIONS['temp'].query(TempDataOrmEntity).filter_by(path=path).first())
32 |         return str(entity.data)
33 | 
34 |     @staticmethod
35 |     def remove_expired_entries() -> int:
36 |         """Remove all Expired Entries."""
37 |         total: int = DbManager.SESSIONS['temp'].execute(
38 |             TempDataOrmEntity.__table__.delete().where(  # type: ignore
39 |                 TempDataOrmEntity.valid_at <= int(datetime.now(tz=pytz.UTC).timestamp()),
40 |                 ),
41 |             ).rowcount
42 | 
43 |         DbManager.SESSIONS['temp'].flush()
44 |         DbManager.SESSIONS['temp'].commit()
45 |         return total
46 | 
47 |     @staticmethod
48 |     def purge() -> int:
49 |         """Remove all Entries."""
50 |         total: int = DbManager.SESSIONS['temp'].execute(TempDataOrmEntity.__table__.delete()).rowcount  # type: ignore
51 |         DbManager.SESSIONS['temp'].flush()
52 |         DbManager.SESSIONS['temp'].commit()
53 |         return total
54 | 
55 |     @staticmethod
56 |     def write_file_text(path: str, content: str, validate_seconds: int = 3600) -> None:
57 |         """
58 |         Write Text Content into File.
59 | 
60 |         :param path: File Path
61 |         :param content: File Content
62 |         :param validate_seconds: File Validation in Seconds
63 |         :return: None
64 |         """
65 |         # Delete if Exists
66 |         DbManager.SESSIONS['temp'].execute(
67 |             TempDataOrmEntity.__table__.delete().where(TempDataOrmEntity.path == path),  # type: ignore
68 |             )
69 | 
70 |         entity: TempDataOrmEntity = TempDataOrmEntity(
71 |             path=path,
72 |             data=content,
73 |             created_at=int(datetime.now(tz=pytz.UTC).timestamp()),
74 |             valid_at=int(datetime.now(tz=pytz.UTC).timestamp()) + validate_seconds,
75 |             )
76 |         DbManager.SESSIONS['temp'].add(entity)
77 | 
78 |         # Execute
79 |         DbManager.SESSIONS['temp'].flush()
80 |         DbManager.SESSIONS['temp'].commit()
81 | 


--------------------------------------------------------------------------------
/TEx/database/db_migration.py:
--------------------------------------------------------------------------------
 1 | """DB Migrator."""
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | 
 6 | import sqlalchemy
 7 | from sqlalchemy import Index, MetaData, Table
 8 | 
 9 | from TEx.database.db_manager import DbManager
10 | from TEx.models.database.telegram_db_model import TelegramMediaOrmEntity, TelegramMessageOrmEntity
11 | 
12 | logger = logging.getLogger('TelegramExplorer')
13 | 
14 | 
15 | class DatabaseMigrator:
16 |     """Global Telegram DB Declarative Base."""
17 | 
18 |     @staticmethod
19 |     def apply_migrations() -> None:
20 |         """Apply all Migrations."""
21 |         # Check Data Copy Migration to Shards
22 |         for db_name in ['data']:
23 |             DatabaseMigrator.__apply_migration_for_bind(db_name=db_name)
24 | 
25 |     @staticmethod
26 |     def __apply_migration_for_bind(db_name: str) -> None:
27 |         """Apply Migrations."""
28 |         meta: MetaData = sqlalchemy.MetaData()
29 |         meta.reflect(bind=DbManager.SQLALCHEMY_BINDS[db_name])
30 | 
31 |         # ix_telegram_message_group_id_date - V0.3.0
32 |         DatabaseMigrator.__create_index(
33 |             metadata=meta,
34 |             table_name='telegram_message',
35 |             index_name='ix_telegram_message_group_id_date',
36 |             version='V0.3.0',
37 |             field_spec=(TelegramMessageOrmEntity.group_id, TelegramMessageOrmEntity.date_time.desc()),
38 |             db_name=db_name,
39 |         )
40 | 
41 |         # ix_telegram_media_group_id_date - V0.3.0
42 |         DatabaseMigrator.__create_index(
43 |             metadata=meta,
44 |             table_name='telegram_media',
45 |             index_name='ix_telegram_media_group_id_date',
46 |             version='V0.3.0',
47 |             field_spec=(TelegramMediaOrmEntity.group_id, TelegramMediaOrmEntity.date_time.desc()),
48 |             db_name=db_name,
49 |         )
50 | 
51 |     @staticmethod
52 |     def __create_index(metadata: MetaData, table_name: str, index_name: str, version: str, field_spec: tuple,
53 |                        db_name: str) -> None:
54 | 
55 |         # Messages Table
56 |         table: Table = metadata.tables[table_name]
57 | 
58 |         # ix_telegram_message_group_id_date - V0.3.0
59 |         index_exists: bool = DatabaseMigrator.__check_index_exists(
60 |             table=table,
61 |             index_name=index_name,
62 |         )
63 | 
64 |         if not index_exists:
65 |             logger.info(f'\t[*] APPLYING DB ({db_name}) MIGRATION ({version}) - {index_name}')
66 | 
67 |             new_index: Index = sqlalchemy.Index(
68 |                 index_name,
69 |                 *field_spec,
70 |             )
71 |             new_index.create(bind=DbManager.SQLALCHEMY_BINDS[db_name])
72 | 
73 |     @staticmethod
74 |     def __check_index_exists(table: Table, index_name: str) -> bool:
75 |         """Check if Index Exists on Table."""
76 |         return len([item for item in table.indexes if item.name == index_name]) == 1
77 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Welcome to Telegram Explorer
 2 | 
 3 | [![](https://img.shields.io/github/last-commit/guibacellar/TEx)](https://github.com/guibacellar/TEx/tree/main)
 4 | [![](https://img.shields.io/github/languages/code-size/guibacellar/TEx)](https://github.com/guibacellar/TEx/tree/main)
 5 | [![](https://img.shields.io/badge/Python-3.8+-green.svg)](https://www.python.org/downloads/) 
 6 | [![](https://github.com/guibacellar/TEx/actions/workflows/cy.yml/badge.svg?branch=main)](https://github.com/guibacellar/TEx/actions/workflows/cy.yml)
 7 | [![](https://telegramexplorer.readthedocs.io/en/latest/?badge=latest)](https://telegramexplorer.readthedocs.io/en/latest/)
 8 | [![](https://img.shields.io/badge/maintainer-Th3%200bservator-blue)](https://theobservator.net/)
 9 | ![](https://img.shields.io/github/v/release/guibacellar/TeX)
10 | 
11 | <!-- ABOUT THE PROJECT -->
12 | ## About The Project
13 | 
14 | TEx is a Telegram Explorer tool created to help Researchers, Investigators and Law Enforcement Agents to Collect and Process the Huge Amount of Data Generated from Criminal, Fraud, Security and Others Telegram Groups.
15 | 
16 | > ⚠️ **BETA VERSION** ⚠️
17 | > <br/> Please note that V0.3.0 are the latest beta version for this project, so it is possible that you may encounter bugs that have not yet been mapped out.
18 | > <br/> I kindly ask you to report the bugs at: [https://github.com/guibacellar/TEx/issues](https://github.com/guibacellar/TEx/issues)
19 | 
20 | <!-- REQUIREMENTS -->
21 | ## Requirements
22 | - Python 3.8.1+ (⚠️ Deprecated. Consider using version 3.10+ ⚠️)
23 | - Windows x64 or Linux x64
24 | 
25 | <!-- FEATURES -->
26 | ## Features
27 | - Connection Manager (Handle Telegram Connection)
28 | - Group Information Scrapper
29 | - List Groups (Scrap info for all groups, including members, members info and profile pic)
30 | - Automatic Group Information Sync
31 | - Automatic Users Information Sync
32 | - Messages Listener (Listen all Incoming Messages)
33 | - Messages Scrapper (Scrap all Group Messages, since the first one)
34 | - Download Media (Including fine media settings like size, groups and/or media type)
35 | - HTML Report Generation
36 | - Export Downloaded Files
37 | - Export Messages
38 | - Message Finder System (Allow to Find, using terms or RegEx) patterns on messages
39 | - Message Notification System (Send alert's, finds, or all messages to Discord)
40 | - Elastic Search 8+ Native Integration
41 | - Image OCR using Tesseract
42 | - Signals for Helping Monitoring
43 | 
44 | 
45 | <!-- INSTALLING -->
46 | ## Installing
47 | Telegram Explorer is available through *pip*, so, just use pip install in order to fully install TeX.
48 | 
49 | ```bash
50 | pip install TelegramExplorer
51 | ```
52 | 
53 | <!-- Upgrading -->
54 | ## Upgrading
55 | To upgrade TeX to the latest version, just use *pip install upgrade* command.
56 | 
57 | ```bash
58 | pip install --upgrade TelegramExplorer
59 | ```
60 | 
61 | ## Documentation
62 | [https://telegramexplorer.readthedocs.io/en/latest/](https://telegramexplorer.readthedocs.io/en/latest/)
63 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: Telegram Explorer
 2 | repo_url: https://github.com/guibacellar/TEx/
 3 | copyright: Copyright &copy; 2023 - Th3 0bservator
 4 | 
 5 | theme:
 6 |   name: material
 7 |   highlightjs: true
 8 |   features:
 9 |     - navigation.footer
10 | 
11 | markdown_extensions:
12 |   - admonition
13 |   - pymdownx.details
14 |   - pymdownx.superfences
15 |   - footnotes
16 | 
17 | 
18 | nav:
19 |   - Home: 'index.md'
20 |   - 'Authentication': 'authentication.md'
21 |   - 'Contact': 'contact.md'
22 |   - 'Secret Chats': 'secret_chats.md'
23 |   - 'Configuration':
24 |     - 'Basic Configuration': 'configuration/basic.md'
25 |     - 'Proxy': 'configuration/proxy.md'
26 |     - 'Media Download':
27 |       - 'Configuration': 'configuration/media_download_configuration.md'
28 |       - 'Examples': 'configuration/media_download_examples.md'
29 |       - 'Content-Types': 'configuration/media_download_content_types.md'
30 |     - 'OCR': 'configuration/ocr.md'
31 |     - 'Examples':
32 |       - 'Scenario-Based Examples': 'configuration/scenario_based_examples.md'
33 |       - 'Complete Configuration File Example': 'configuration/complete_configuration_file_example.md'
34 |   - 'How to Use':
35 |       - 'Basic Usage': 'how_use/how_to_use_basic.md'
36 |       - 'Connecting to Telegram Servers': 'how_use/usage_connection.md'
37 |       - 'Download/Update Groups': 'how_use/usage_load_groups.md'
38 |       - 'List Groups': 'how_use/usage_list_groups.md'
39 |       - 'Listen Messages': 'how_use/usage_message_listener.md'
40 |       - 'Download Messages': 'how_use/usage_download_messages.md'
41 |   - 'Message Finder System':
42 |       - 'Configuration': 'finder/configuration.md'
43 |       - 'Catch All': 'finder/finder_catchall.md'
44 |       - 'RegEx Finder': 'finder/finder_regex.md'
45 |   - 'Notification System':
46 |       - 'Discord Notification Hook': 'notification/notification_discord.md'
47 |       - 'Elastic Search Connector':
48 |           - 'Configuration': 'notification/notification_elasticsearch.md'
49 |           - 'Index Template': 'notification/notification_elasticsearch_index_template.md'
50 |           - 'Signals Template': 'notification/notification_elasticsearch_signals_template.md'
51 |       - 'Signals': 'notification/signals.md'
52 |   - 'Message Exporter System':
53 |       - 'Pandas Rolling Exporter': 'exporting/pandas_rolling.md'
54 |   - 'Reports':
55 |       - 'Export Files': 'report/report_export_files.md'
56 |       - 'HTML Report': 'report/report_html.md'
57 |       - 'Status Report': 'report/report_status.md'
58 |       - 'Text Report': 'report/report_text.md'
59 |   - 'Maintenance':
60 |       - 'Purging Old Data': 'maintenance/purge_old_data.md'
61 |   - 'Changelog':
62 |       - 'V0.3.0': 'changelog/v030.md'
63 | 
64 | site_author: Th3 0bservator
65 | 
66 | extra:
67 |   social:
68 |     - icon: fontawesome/brands/twitter
69 |       link: https://twitter.com/th3_0bservator
70 |     - icon: fontawesome/brands/github
71 |       link: https://github.com/guibacellar/
72 |     - icon: fontawesome/brands/linkedin
73 |       link: https://www.linkedin.com/in/guilherme-bacellar/


--------------------------------------------------------------------------------
/TEx/modules/telegram_report_generator/telegram_report_sent_telegram.py:
--------------------------------------------------------------------------------
 1 | """Telegram Report Generator."""
 2 | from __future__ import annotations
 3 | 
 4 | import asyncio
 5 | import datetime
 6 | import logging
 7 | import os
 8 | import zipfile
 9 | from configparser import ConfigParser
10 | from os.path import basename
11 | from typing import Dict, cast
12 | 
13 | import pytz
14 | from telethon import TelegramClient
15 | 
16 | from TEx.core.base_module import BaseModule
17 | 
18 | logger = logging.getLogger('TelegramExplorer')
19 | 
20 | 
21 | class TelegramReportSentViaTelegram(BaseModule):
22 |     """Sent the Report to a Telegram user."""
23 | 
24 |     __USERS_RESOLUTION_CACHE: Dict = {}
25 | 
26 |     async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool:
27 |         """
28 |         Abstract Method for Module Activation Function..
29 | 
30 |         :return:
31 |         """
32 |         return cast(bool, args['sent_report_telegram'])
33 | 
34 |     async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None:
35 |         """Execute Module."""
36 |         if not await self.can_activate(config, args, data):
37 |             logger.debug('\t\tModule is Not Enabled...')
38 |             return
39 | 
40 |         # Check Report and Assets Folder
41 |         report_root_folder: str = args['report_folder']
42 | 
43 |         # Create Report File Name
44 |         attach_name: str = args['attachment_name'].replace('@@now@@', datetime.datetime.strftime(datetime.datetime.now(tz=pytz.UTC), '%y%m%d_%H%M%S')) + '.zip'
45 |         report_filename: str = os.path.join(report_root_folder, attach_name)
46 |         logger.info(f'\t\t\tTarget Report Filename: {report_filename}')
47 | 
48 |         # Create a Zip File
49 |         logger.info('\t\t\tGenerating Report ZIP File')
50 |         with zipfile.ZipFile(report_filename, 'w', compresslevel=9, compression=zipfile.ZIP_DEFLATED) as zip_obj:
51 |             # Iterate over all the files in directory
52 |             for folder_name, _subfolders, filenames in os.walk(report_root_folder):
53 |                 for filename in filenames:
54 |                     file_path = os.path.join(folder_name, filename)
55 | 
56 |                     if file_path == report_filename:
57 |                         continue
58 | 
59 |                     zip_obj.write(file_path, os.path.join(basename(folder_name), filename))
60 | 
61 |         # Sent via Telegram
62 |         client: TelegramClient = data['telegram_client']
63 |         receiver = await client.get_input_entity(args['destination_username'])
64 | 
65 |         # Sent Message
66 |         logger.info('\t\t\tSending Message')
67 |         await client.send_message(
68 |             receiver,
69 |             args['title'].replace(
70 |                 '@@now@@',
71 |                 datetime.datetime.strftime(datetime.datetime.now(tz=pytz.UTC), '%y-%m-%d %H:%M:%S'),
72 |                 ).replace('\\n', '\n'),
73 |             )
74 |         await asyncio.sleep(1)
75 |         # Sent the Report
76 |         await client.send_file(receiver, f'{report_root_folder}/{attach_name}')
77 | 
78 |         # Remove Report File
79 |         os.remove(report_filename)
80 | 


--------------------------------------------------------------------------------
/tests/unittest_configfile.config:
--------------------------------------------------------------------------------
  1 | [CONFIGURATION]
  2 | api_id=12345678
  3 | api_hash=deff1f2587358746548deadbeef58ddd
  4 | phone_number=5526986587745
  5 | data_path=_data
  6 | device_model=UT_DEVICE_01
  7 | timeout=20
  8 | 
  9 | [OCR]
 10 | enabled=true
 11 | type=tesseract
 12 | 
 13 | [OCR.TESSERACT]
 14 | tesseract_cmd=/path/to/folder
 15 | language=eng
 16 | 
 17 | [PROXY]
 18 | type=HTTP
 19 | address=1.2.3.4
 20 | port=4444
 21 | username=ut_username
 22 | password=ut_password
 23 | rdns=true
 24 | 
 25 | [MEDIA.DOWNLOAD]
 26 | default=ALLOW
 27 | max_download_size_bytes=256000000
 28 | 
 29 | [MEDIA.DOWNLOAD.application/json]
 30 | enabled=ALLOW
 31 | max_download_size_bytes=256000000
 32 | groups=*
 33 | 
 34 | [MEDIA.DOWNLOAD.image/jpeg]
 35 | enabled=ALLOW
 36 | max_download_size_bytes=25600000
 37 | groups=*
 38 | 
 39 | [MEDIA.DOWNLOAD.text/plain]
 40 | enabled=ALLOW
 41 | max_download_size_bytes=256000000
 42 | groups=5586,12099,1
 43 | 
 44 | [FINDER]
 45 | enabled=true
 46 | find_in_text_files_enabled=true
 47 | find_in_text_files_max_size_bytes=20000000
 48 | 
 49 | [FINDER.RULE.UT_Finder_Demo]
 50 | type=regex
 51 | regex=term1|term2|term3
 52 | notifier=NOTIFIER.DISCORD.NOT_002
 53 | exporter=EXPORTER.ROLLING_PANDAS.TEST_EXPORTER_001
 54 | 
 55 | [FINDER.RULE.UT_Finder_Demo_MultiLine]
 56 | type=regex
 57 | regex=term1
 58 |     term2
 59 |         term3
 60 | notifier=NOTIFIER.DISCORD.NOT_002
 61 | 
 62 | [FINDER.RULE.UT_Finder_Demo_MultiLine_WithLineBreak]
 63 | type=regex
 64 | regex=
 65 |     term1
 66 |     term2
 67 |     term3
 68 | notifier=NOTIFIER.DISCORD.NOT_002
 69 | 
 70 | [FINDER.RULE.UT_Finder_Demo_MultiLine_UrlAndCreditCard_WithLineBreak]
 71 | type=regex
 72 | regex=
 73 |     /^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%%_\+.~#?&\/=]*)$/
 74 |     (^4[0-9]{12}(?:[0-9]{3})?$)|(^(?:5[1-5][0-9]{2}|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}$)|(3[47][0-9]{13})|(^3(?:0[0-5]|[68][0-9])[0-9]{11}$)|(^6(?:011|5[0-9]{2})[0-9]{12}$)|(^(?:2131|1800|35\d{3})\d{11}$)
 75 | notifier=NOTIFIER.DISCORD.NOT_002
 76 | 
 77 | [NOTIFIER.DISCORD.NOT_001]
 78 | webhook=https://uri.domain.com/webhook/001
 79 | prevent_duplication_for_minutes=240
 80 | media_attachments_enabled=true
 81 | media_attachments_max_size_bytes=10000000
 82 | 
 83 | [NOTIFIER.DISCORD.NOT_002]
 84 | webhook=https://uri.domain.com/webhook/002
 85 | prevent_duplication_for_minutes=240
 86 | media_attachments_enabled=true
 87 | media_attachments_max_size_bytes=10000000
 88 | 
 89 | [NOTIFIER.ELASTIC_SEARCH.UT_01]
 90 | address=https://localhost:666
 91 | api_key=test_api_key
 92 | verify_ssl_cert=False
 93 | index_name=test_index_name
 94 | pipeline_name=test_pipeline_name
 95 | 
 96 | [EXPORTER.ROLLING_PANDAS.TEST_EXPORTER_001]
 97 | file_root_path=_data/export
 98 | rolling_every_minutes=1
 99 | fields=date_time,raw_text,group_name,group_id,from_id,to_id,reply_to_msg_id,message_id,is_reply,found_on
100 | use_header=true
101 | output_format=csv
102 | keep_last_files=30
103 | 
104 | [SIGNALS]
105 | enabled=true
106 | keep_alive_interval=2
107 | 
108 | keep_alive_notifer=NOTIFIER.DISCORD.NOT_001
109 | initialization_notifer=NOTIFIER.ELASTIC_SEARCH.UT_01
110 | shutdown_notifer=NOTIFIER.DISCORD.NOT_001,NOTIFIER.ELASTIC_SEARCH.UT_01
111 | new_group_notifer=NOTIFIER.ELASTIC_SEARCH.UT_01,NOTIFIER.DISCORD.NOT_001


--------------------------------------------------------------------------------
/TEx/core/mapper/telethon_message_mapper.py:
--------------------------------------------------------------------------------
 1 | """Telethon Event Entity Mapper."""
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | from typing import Optional, Union
 6 | 
 7 | from pydantic import BaseModel
 8 | from telethon.errors import ChannelPrivateError
 9 | from telethon.tl.patched import Message
10 | from telethon.tl.types import Channel, Chat, PeerUser, User
11 | 
12 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity
13 | from TEx.models.facade.media_handler_facade_entity import MediaHandlingEntity
14 | 
15 | logger = logging.getLogger('TelegramExplorer')
16 | 
17 | 
18 | class TelethonMessageEntityMapper:
19 |     """Telethon Event Entity Mapper."""
20 | 
21 |     class ChatPropsModel(BaseModel):
22 |         """Model for __map_chat_props method."""
23 | 
24 |         chat_id: int
25 |         chat_title: str
26 | 
27 |     @staticmethod
28 |     async def to_finder_notification_facade_entity(message: Message, downloaded_media_info: Optional[MediaHandlingEntity], ocr_content: Optional[str]) -> \
29 |     Optional[FinderNotificationMessageEntity]:
30 |         """Map Telethon Event to FinderNotificationMessageEntity."""
31 |         if not message:
32 |             return None
33 | 
34 |         try:
35 |             mapped_chat_props: TelethonMessageEntityMapper.ChatPropsModel = TelethonMessageEntityMapper.__map_chat_props(
36 |                 entity=await message.get_chat(),
37 |             )
38 |         except ChannelPrivateError as _ex:
39 |             return None
40 | 
41 |         raw_text: str = message.raw_text
42 |         if ocr_content:
43 |             if raw_text and raw_text != '':
44 |                 raw_text += '\n\n'
45 | 
46 |             raw_text += ocr_content
47 | 
48 |         h_result: FinderNotificationMessageEntity = FinderNotificationMessageEntity(
49 |             date_time=message.date,
50 |             raw_text=raw_text,
51 |             group_name=mapped_chat_props.chat_title,
52 |             group_id=mapped_chat_props.chat_id,
53 |             from_id=message.from_id.user_id if isinstance(message.from_id, PeerUser) else None,
54 |             to_id=message.to_id.channel_id if message.to_id is not None and hasattr(message.to_id, 'channel_id') else None,
55 |             reply_to_msg_id=message.reply_to.reply_to_msg_id if message.is_reply and message.reply_to else None,
56 |             message_id=message.id,
57 |             is_reply=message.is_reply,
58 |             downloaded_media_info=downloaded_media_info,
59 |             found_on='UNDEFINED',
60 |         )
61 | 
62 |         return h_result
63 | 
64 |     @staticmethod
65 |     def __map_chat_props(entity: Union[Channel, User, Chat]) -> TelethonMessageEntityMapper.ChatPropsModel:
66 |         """Map Chat Specific Props."""
67 |         if isinstance(entity, (Channel, Chat)):
68 |             return TelethonMessageEntityMapper.ChatPropsModel(
69 |                 chat_id=entity.id,
70 |                 chat_title=entity.title if entity.title else '',
71 |             )
72 | 
73 |         if isinstance(entity, User):
74 |             return TelethonMessageEntityMapper.ChatPropsModel(
75 |                 chat_id=entity.id,
76 |                 chat_title=entity.username if entity.username else (entity.phone if entity.phone else ''),
77 |             )
78 | 
79 |         raise AttributeError(entity, 'Invalid entity type: ' + str(type(entity)))
80 | 


--------------------------------------------------------------------------------
/tests/notifier/test_notifier_engine.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import unittest
 3 | from configparser import ConfigParser
 4 | from datetime import datetime
 5 | from typing import Dict
 6 | from unittest import mock
 7 | from unittest.mock import call
 8 | 
 9 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity
10 | from TEx.notifier.notifier_engine import NotifierEngine
11 | from tests.modules.common import TestsCommon
12 | from tests.modules.mockups_groups_mockup_data import base_messages_mockup_data
13 | 
14 | 
15 | class NotifierEngineTest(unittest.TestCase):
16 | 
17 |     def setUp(self) -> None:
18 |         self.config = ConfigParser()
19 |         self.config.read('../../config.ini')
20 | 
21 |     def test_run(self):
22 |         """Test Run Method with Telegram Server Connection."""
23 | 
24 |         # Setup Mock
25 |         discord_notifier_mockup = mock.AsyncMock()
26 |         discord_notifier_mockup.run = mock.AsyncMock()
27 | 
28 |         elastic_notifier_mockup = mock.AsyncMock()
29 |         elastic_notifier_mockup.run = mock.AsyncMock()
30 | 
31 |         target: NotifierEngine = NotifierEngine()
32 |         args: Dict = {
33 |             'export_text': True,
34 |             'config': 'unittest_configfile.config',
35 |             'report_folder': '_report',
36 |             'group_id': '2',
37 |             'order_desc': True,
38 |             'filter': 'Message',
39 |             'limit_days': 30,
40 |             'regex': '(.*http://.*),(.*https://.*)'
41 |         }
42 |         data: Dict = {}
43 |         TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data)
44 | 
45 |         # Set Message
46 |         message_entity: FinderNotificationMessageEntity = FinderNotificationMessageEntity(
47 |             date_time=datetime(2023, 10, 1, 9, 58, 22),
48 |             raw_text="Mocked Raw Text",
49 |             group_name="Channel 1972142108",
50 |             group_id=1972142108,
51 |             from_id="1234",
52 |             to_id=9876,
53 |             reply_to_msg_id=5544,
54 |             message_id=55,
55 |             is_reply=False,
56 |             downloaded_media_info=None,
57 |             found_on='UT FOUND 7'
58 |         )
59 | 
60 |         with mock.patch('TEx.notifier.notifier_engine.DiscordNotifier', return_value=discord_notifier_mockup):
61 |             with mock.patch('TEx.notifier.notifier_engine.ElasticSearchNotifier', return_value=elastic_notifier_mockup):
62 |                 target.configure(config=self.config)
63 |                 loop = asyncio.get_event_loop()
64 |                 loop.run_until_complete(
65 |                     target.run(
66 |                         notifiers=['NOTIFIER.DISCORD.NOT_001', 'NOTIFIER.DISCORD.NOT_002', 'NOTIFIER.ELASTIC_SEARCH.UT_01'],
67 |                         entity=message_entity,
68 |                         rule_id='RULE_UT_01',
69 |                         source='+15558987453'
70 |                     )
71 |                 )
72 | 
73 |                 discord_notifier_mockup.run.assert_has_awaits([
74 |                     call(entity=message_entity, rule_id='RULE_UT_01', source='+15558987453'),
75 |                     call(entity=message_entity, rule_id='RULE_UT_01', source='+15558987453')
76 |                 ])
77 | 
78 |                 elastic_notifier_mockup.run.assert_has_awaits([
79 |                     call(entity=message_entity, rule_id='RULE_UT_01', source='+15558987453')
80 |                 ])
81 | 


--------------------------------------------------------------------------------
/TEx/core/mapper/telethon_channel_mapper.py:
--------------------------------------------------------------------------------
 1 | """Telethon Channel Entity Mapper."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Dict, Union
 5 | 
 6 | from telethon.tl.types import Channel, Chat, User
 7 | 
 8 | 
 9 | class TelethonChannelEntityMapper:
10 |     """Telethon Channel Entity Mapper."""
11 | 
12 |     @staticmethod
13 |     def to_database_dict(entity: Union[Chat, Channel, User], target_phone_numer: str) -> Dict:
14 |         """Map Telethon Entity to TEx Dict to Insert into DB."""
15 |         # Build Model
16 | 
17 |         # Common Props
18 |         values: Dict = {
19 |             'id': entity.id,
20 |             'constructor_id': entity.CONSTRUCTOR_ID,
21 |             'source': target_phone_numer,
22 |             }
23 | 
24 |         # Apply Specific Mappers
25 |         if isinstance(entity, Channel):
26 |             values.update(TelethonChannelEntityMapper.__map_channel(entity))
27 | 
28 |         elif isinstance(entity, Chat):
29 |             values.update(TelethonChannelEntityMapper.__map_chat(entity))
30 | 
31 |         elif isinstance(entity, User):
32 |             values.update(TelethonChannelEntityMapper.__map_user(entity))
33 | 
34 |         return values
35 | 
36 |     @staticmethod
37 |     def __map_channel(entity: Channel) -> Dict:
38 |         """Map Telethon Channel to TEx Dict to Insert into DB."""
39 |         return {
40 |             'gigagroup': entity.gigagroup if entity.gigagroup else False,
41 |             'has_geo': entity.has_geo if entity.has_geo else False,
42 |             'participants_count': entity.participants_count if entity.participants_count else 0,
43 |             'title': entity.title if entity.title else '',
44 |             'access_hash': str(entity.access_hash),
45 |             'fake': entity.fake if entity.fake else False,
46 |             'restricted': entity.restricted if entity.restricted else False,
47 |             'scam': entity.scam if entity.scam else False,
48 |             'group_username': entity.username if entity.username else '',
49 |             'verified': entity.verified if entity.verified else False,
50 |             }
51 | 
52 |     @staticmethod
53 |     def __map_chat(entity: Chat) -> Dict:
54 |         """Map Telethon Chat to TEx Dict to Insert into DB."""
55 |         return {
56 |             'gigagroup': False,
57 |             'has_geo': False,
58 |             'participants_count': entity.participants_count if entity.participants_count else 0,
59 |             'title': entity.title if entity.title else '',
60 |             'access_hash': '',
61 |             'fake': False,
62 |             'restricted': False,
63 |             'scam': False,
64 |             'group_username': '',
65 |             'verified': False,
66 |             }
67 | 
68 |     @staticmethod
69 |     def __map_user(entity: User) -> Dict:
70 |         """Map Telethon User to TEx Dict to Insert into DB."""
71 |         return {
72 |             'gigagroup': False,
73 |             'has_geo': False,
74 |             'participants_count': 0,
75 |             'title': entity.username if entity.username else (entity.phone if entity.phone else ''),
76 |             'access_hash': str(entity.access_hash),
77 |             'fake': entity.fake if entity.fake else False,
78 |             'restricted': entity.restricted if entity.restricted else False,
79 |             'scam': entity.scam if entity.scam else False,
80 |             'group_username': entity.username if entity.username else '',
81 |             'verified': entity.verified if entity.verified else False,
82 |             }
83 | 


--------------------------------------------------------------------------------
/TEx/modules/telegram_maintenance/telegram_purge_old_data.py:
--------------------------------------------------------------------------------
 1 | """Telegram Maintenance - Purge old Data Manager."""
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | import os.path
 6 | from configparser import ConfigParser
 7 | from typing import Dict, List, cast
 8 | 
 9 | from TEx.core.base_module import BaseModule
10 | from TEx.database.telegram_group_database import TelegramGroupDatabaseManager, TelegramMediaDatabaseManager, TelegramMessageDatabaseManager
11 | from TEx.models.database.telegram_db_model import TelegramGroupOrmEntity, TelegramMediaOrmEntity
12 | 
13 | logger = logging.getLogger('TelegramExplorer')
14 | 
15 | 
16 | class TelegramMaintenancePurgeOldData(BaseModule):
17 |     """Telegram Maintenance - Purge old Data Manager."""
18 | 
19 |     async def can_activate(self, config: ConfigParser, args: Dict, data: Dict) -> bool:
20 |         """
21 |         Abstract Method for Module Activation Function.
22 | 
23 |         :return:
24 |         """
25 |         return cast(bool, args['purge_old_data'])
26 | 
27 |     async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None:
28 |         """Execute Module."""
29 |         if not await self.can_activate(config, args, data):
30 |             logger.debug('\t\tModule is Not Enabled...')
31 |             return
32 | 
33 |         # Load Groups from DB
34 |         groups: List[TelegramGroupOrmEntity] = TelegramGroupDatabaseManager.get_all_by_phone_number(
35 |             config['CONFIGURATION']['phone_number'])
36 |         logger.info(f'\t\tFound {len(groups)} Groups')
37 | 
38 |         for group in groups:
39 |             try:
40 |                 await self.__process_group(
41 |                     group_id=group.id,
42 |                     group_name=group.title,
43 |                     max_age=int(args['limit_days']),
44 |                     media_root_path=config['CONFIGURATION']['data_path'],
45 |                     )
46 |             except ValueError as ex:
47 |                 logger.info('\t\t\tUnable to Purge Old Messages...')
48 |                 logger.error(ex)
49 | 
50 |         # Compress DB
51 |         TelegramMediaDatabaseManager.apply_db_maintenance()
52 |         logger.info('\t\t\tDB Optimized Successfully')
53 | 
54 |     async def __process_group(self, group_id: int, group_name: str, max_age: int, media_root_path: str) -> None:
55 |         """Process and Remove Old Messages and Medias from a Single Group."""
56 |         logger.info(f'\t\tPurging ({group_id}) "{group_name}"')
57 | 
58 |         # Get all Old Medias
59 |         all_medias: List[TelegramMediaOrmEntity] = TelegramMediaDatabaseManager.get_all_medias_by_age(
60 |             group_id=group_id,
61 |             media_limit_days=max_age,
62 |             )
63 |         media_count: int = len(all_medias)
64 |         logger.info(f'\t\t\t{len(all_medias)} Medias to be Removed')
65 | 
66 |         if media_count > 0:
67 | 
68 |             for media in all_medias:
69 | 
70 |                 # Remove from Disk
71 |                 media_file_name: str = os.path.join(media_root_path, 'media', str(media.group_id), media.file_name)
72 |                 logger.info(f'\t\t\t\t{media_file_name}')
73 | 
74 |                 if os.path.exists(media_file_name):
75 |                     os.remove(media_file_name)
76 | 
77 |                 # Remove from DB
78 |                 TelegramMediaDatabaseManager.delete_media_by_id(media_id=media.id)
79 | 
80 |         # Delete all Old Messages
81 |         total_messages: int = TelegramMessageDatabaseManager.remove_all_messages_by_age(
82 |             group_id=group_id,
83 |             limit_days=max_age,
84 |             )
85 |         logger.info(f'\t\t\t{total_messages} Messages Removed')
86 | 


--------------------------------------------------------------------------------
/tests/modules/test_telegram_groups_list.py:
--------------------------------------------------------------------------------
 1 | """Telegram Groups List Tests."""
 2 | 
 3 | import asyncio
 4 | import logging
 5 | import unittest
 6 | from configparser import ConfigParser
 7 | from typing import Dict
 8 | 
 9 | from TEx.database.telegram_group_database import TelegramGroupDatabaseManager
10 | from TEx.modules.telegram_groups_list import TelegramGroupList
11 | from TEx.modules.telegram_groups_scrapper import TelegramGroupScrapper
12 | from tests.modules.common import TestsCommon
13 | 
14 | 
15 | class TelegramGroupListTest(unittest.TestCase):
16 | 
17 |     def setUp(self) -> None:
18 | 
19 |         self.config = ConfigParser()
20 |         self.config.read('../../config.ini')
21 | 
22 |         TestsCommon.basic_test_setup()
23 | 
24 |         # Add Group 1 - Without Any Message
25 |         TelegramGroupDatabaseManager.insert_or_update({
26 |             'id': 1, 'constructor_id': 'A', 'access_hash': 'AAAAAA',
27 |             'fake': False, 'gigagroup': False, 'has_geo': False,
28 |             'participants_count': 1, 'restricted': False,
29 |             'scam': False, 'group_username': 'UN-A',
30 |             'verified': False, 'title': 'UT-01', 'source': '5526986587745'
31 |         })
32 | 
33 |         # Add Group 2 - With Previous Messages
34 |         TelegramGroupDatabaseManager.insert_or_update({
35 |             'id': 2, 'constructor_id': 'B', 'access_hash': 'BBBBBB',
36 |             'fake': False, 'gigagroup': False, 'has_geo': False,
37 |             'participants_count': 2, 'restricted': False,
38 |             'scam': False, 'group_username': 'UN-b',
39 |             'verified': False, 'title': 'UT-02', 'source': '5526986587745'
40 |         })
41 | 
42 |     def test_run(self):
43 |         """Test Run Method."""
44 | 
45 |         target: TelegramGroupScrapper = TelegramGroupList()
46 |         args: Dict = {
47 |             'list_groups': True,
48 |             'config': 'unittest_configfile.config',
49 |         }
50 |         data: Dict = {}
51 | 
52 |         TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data)
53 | 
54 |         with self.assertLogs() as captured:
55 |             loop = asyncio.get_event_loop()
56 |             loop.run_until_complete(
57 |                 target.run(
58 |                     config=self.config,
59 |                     args=args,
60 |                     data=data
61 |                 )
62 |             )
63 | 
64 |             # Check Logs
65 |             self.assertEqual(4, len(captured.records))
66 |             self.assertEqual('		Found 2 Groups', captured.records[0].message)
67 |             self.assertEqual('		ID       	Username	Title', captured.records[1].message)
68 |             self.assertEqual('		1	UN-A	UT-01', captured.records[2].message)
69 |             self.assertEqual('		2	UN-b	UT-02', captured.records[3].message)
70 | 
71 |     def test_run_disabled(self):
72 |         """Test Run Method Disabled."""
73 | 
74 |         target: TelegramGroupScrapper = TelegramGroupList()
75 |         args: Dict = {
76 |             'list_groups': False,
77 |             'config': 'unittest_configfile.config',
78 |         }
79 |         data: Dict = {}
80 | 
81 |         TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data)
82 | 
83 |         with self.assertLogs('TelegramExplorer', level=logging.DEBUG) as captured:
84 |             loop = asyncio.get_event_loop()
85 |             loop.run_until_complete(
86 |                 target.run(
87 |                     config=self.config,
88 |                     args=args,
89 |                     data=data
90 |                 )
91 |             )
92 | 
93 |             # Check Logs
94 |             self.assertEqual(1, len(captured.records))
95 |             self.assertEqual('		Module is Not Enabled...', captured.records[0].message)
96 | 


--------------------------------------------------------------------------------
/docs/configuration/complete_configuration_file_example.md:
--------------------------------------------------------------------------------
  1 | # Complete Configuration File Example
  2 | 
  3 | This is an example of a complete configuration file with four finder rules using three discord hooks, two elastic search connector and signals configuration.
  4 | 
  5 | ```ini
  6 | [CONFIGURATION]
  7 | api_id=12555896
  8 | api_hash=dead1f29db5d1fa56cc42757acbabeef
  9 | phone_number=15552809753
 10 | data_path=/usr/home/tex_data/
 11 | device_model=AMD64
 12 | timeout=30
 13 | 
 14 | [PROXY]
 15 | type=HTTP
 16 | address=127.0.0.1
 17 | port=3128
 18 | username=proxy username
 19 | password=proxy password
 20 | rdns=true
 21 | 
 22 | [MEDIA.DOWNLOAD]
 23 | default=ALLOW
 24 | max_download_size_bytes=256000000
 25 | 
 26 | [FINDER]
 27 | enabled=true
 28 | 
 29 | [FINDER.RULE.MessagesWithURL]
 30 | type=regex
 31 | regex=/^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)$/
 32 | notifier=NOTIFIER.DISCORD.MY_HOOK_1
 33 | 
 34 | [FINDER.RULE.FindMessagesWithCreditCard]
 35 | type=regex
 36 | regex=(^4[0-9]{12}(?:[0-9]{3})?$)|(^(?:5[1-5][0-9]{2}|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}$)|(3[47][0-9]{13})|(^3(?:0[0-5]|[68][0-9])[0-9]{11}$)|(^6(?:011|5[0-9]{2})[0-9]{12}$)|(^(?:2131|1800|35\d{3})\d{11}$)
 37 | notifier=NOTIFIER.DISCORD.MY_HOOK_2,NOTIFIER.ELASTIC_SEARCH.GENERAL
 38 | 
 39 | [FINDER.RULE.FindMessagesWithEmail]
 40 | type=regex
 41 | regex=^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$
 42 | notifier=NOTIFIER.DISCORD.MY_HOOK_1,NOTIFIER.DISCORD.MY_HOOK_2
 43 | 
 44 | [FINDER.RULE.CatchAll]
 45 | type=all
 46 | notifier=NOTIFIER.ELASTIC_SEARCH.GENERAL
 47 | exporter=EXPORTER.ROLLING_PANDAS.EXPORT_ALL_MESSAGES
 48 | 
 49 | [NOTIFIER.DISCORD.MY_HOOK_1]
 50 | webhook=https://discord.com/api/webhooks/1157896186751897357/o7foobar4txvAvKSdeadHiI-9XYeXaGlQtd-5PtrrX_eCE0XElWktpPqjrZ0KbeefPtQC
 51 | prevent_duplication_for_minutes=240
 52 | timeout_seconds=30
 53 | media_attachments_enabled=false
 54 | 
 55 | [NOTIFIER.DISCORD.MY_HOOK_2]
 56 | webhook=https://discord.com/api/webhooks/1128765187657681875/foobarqOMFp_4tM2ic2mbeefNPOZqJnBZZdfaubQv2vJgbYzfdeadZd5aqGX6FmCmbNjX
 57 | prevent_duplication_for_minutes=240
 58 | media_attachments_enabled=false
 59 | 
 60 | [NOTIFIER.DISCORD.SIGNALS_HOOK]
 61 | webhook=https://discord.com/api/webhooks/1128765187657681875/foobarqOMFp_457EDs2mbeefNPPeqJnBZZdfaubQvOKIUHYzfdeadZd5aqGX6FmCmbNjv
 62 | prevent_duplication_for_minutes=0
 63 | media_attachments_enabled=true
 64 | media_attachments_max_size_bytes=10000000
 65 | 
 66 | [NOTIFIER.ELASTIC_SEARCH.GENERAL]
 67 | address=https://localhost:9200
 68 | api_key=bHJtVEg0c0JnNkwwTnYtFFDEADlo6NS1rXzd6NVFSUmEtQ21mQldiUjEwUQ==
 69 | verify_ssl_cert=False
 70 | index_name=index-name
 71 | pipeline_name=ent-search-generic-ingestion
 72 | 
 73 | [NOTIFIER.ELASTIC_SEARCH.SIGNALS]
 74 | address=https://localhost:9200
 75 | api_key=bHJtVEg0c0JnNkwwTnYtFFDEADlo6NS1rXzd6NVFSUmEtQ21mQldiUjEwUQ==
 76 | verify_ssl_cert=False
 77 | index_name=index-name-for-signals
 78 | pipeline_name=ent-search-generic-ingestion
 79 | 
 80 | [EXPORTER.ROLLING_PANDAS.EXPORT_ALL_MESSAGES]
 81 | file_root_path=/path/to/export/folder/
 82 | rolling_every_minutes=5
 83 | fields=date_time,raw_text,group_name,group_id,from_id,to_id,reply_to_msg_id,message_id,is_reply,found_on
 84 | use_header=true
 85 | output_format=json
 86 | keep_last_files=20
 87 | 
 88 | [OCR]
 89 | enabled=true
 90 | type=tesseract
 91 | 
 92 | [OCR.TESSERACT]
 93 | tesseract_cmd=/path/to/tesseract/cmd
 94 | language=eng
 95 | 
 96 | [SIGNALS]
 97 | enabled=true
 98 | keep_alive_interval=300
 99 | 
100 | keep_alive_notifer=NOTIFIER.ELASTIC_SEARCH.SIGNALS
101 | initialization_notifer=NOTIFIER.ELASTIC_SEARCH.SIGNALS
102 | shutdown_notifer=NOTIFIER.ELASTIC_SEARCH.SIGNALS
103 | new_group_notifer=NOTIFIER.DISCORD.SIGNALS_HOOK,NOTIFIER.ELASTIC_SEARCH.SIGNALS
104 | ```
105 | 


--------------------------------------------------------------------------------
/TEx/report_templates/default_report.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |     <head>
 4 |         <meta charset="utf-8">
 5 |         <meta name="viewport" content="width=device-width, initial-scale=1">
 6 | 
 7 |         <!-- Bootstrap Styles -->
 8 |         <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha384-HSMxcRTRxnN+Bdg0JdbxYKrThecOKuH5zCYotlSAcp1+c8xmyTe9GYg1l9a69psu" crossorigin="anonymous">
 9 |         <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap-theme.min.css" integrity="sha384-6pzBo3FDv/PJ8r2KRkGHifhEocL+1X2rVCTTkUfGk7/0pbek5mMa1upzvWbrUbOZ" crossorigin="anonymous">
10 | 
11 |         <style>
12 |             .marker {
13 |                 background-color: yellow;
14 |             }
15 | 
16 |             .previous {
17 |                 background-color: lightcyan;
18 |             }
19 | 
20 |            .next {
21 |                 background-color: lightyellow;
22 |             }
23 |         </style>
24 |     </head>
25 | 
26 |     <body>
27 |         <div class="container" style="width: 99%">
28 |             <div class="row">
29 |                 <h1>TEx - Telegram Explorer - {{groupname}} ({{groupusername}})</h1>
30 |             </div>
31 | 
32 |             <div class="row">
33 |                 <table class="table table-striped table-bordered">
34 |                     <tr>
35 |                         <td>Messages</td>
36 |                     </tr>
37 |                     {% for item in messages %}
38 |                     <tr>
39 |                         <td class="{{ 'next' if item.meta_next else '' }}{{ 'previous' if item.meta_previous else '' }}">
40 |                             <!-- MimeType: {{item.media_mime_type}} -->
41 |                             <!-- Is Next: {{item.meta_next}} -->
42 |                             <!-- Is Previous: {{item.meta_previous}} -->
43 | 
44 |                             {% autoescape false %}
45 |                             <b>{{item.date_time}} UTC {{item.to_from_information}}</b>
46 |                             <br/>{{item.message|replace("\r\n", "<br/>")|replace("\n", "<br/>")}}
47 |                             {% endautoescape %}
48 | 
49 |                             {% if item.media_is_image +%}
50 |                                 {% autoescape false %}
51 |                                     <br/><b><a href="{{item.media_filename}}" target="_media"><img src="{{item.media_filename}}" width="25%" height="25%"></a></b>
52 |                                 {% endautoescape %}
53 |                             {% elif item.media_mime_type == 'application/vnd.geo' +%}
54 |                                 <br/><b><a href="https://www.google.com/maps/@{{item.media_geo}},15z" target="_geo">GeoLocation: {{item.media_geo}}</a></b>
55 |                             {% elif item.media_mime_type == 'video/mp4' +%}
56 |                                 <br/>
57 |                                 <video controls width="250">
58 |                                     <source src="{{item.media_filename}}" type="video/mp4">
59 |                                     Sorry, your browser doesn't support embedded videos.
60 |                                 </video>
61 |                                 <br/><b><a href="{{item.media_filename}}" target="_media_download">Download: {{item.media_filename.split('/')[1]}}</a></b>
62 |                             {% elif item.media_mime_type +%}
63 |                                 <br/><b><a href="{{item.media_filename}}" target="_media_download">Download: {{item.media_filename.split('/')[1]}}</a></b>
64 |                             {% endif %}
65 |                         </td>
66 |                     </tr>
67 |                     {% endfor %}
68 |                 </table>
69 |             </div>
70 |         </div>
71 | 
72 |     </body>
73 | 
74 | </html>


--------------------------------------------------------------------------------
/tests/core/ocr/test_ocr_engine_factory.py:
--------------------------------------------------------------------------------
 1 | """OcrEngineFactory Tests."""
 2 | 
 3 | import unittest
 4 | from unittest import mock
 5 | from configparser import ConfigParser
 6 | from typing import Dict
 7 | 
 8 | from TEx.core.ocr.dummy_ocr_engine import DummyOcrEngine
 9 | from TEx.core.ocr.ocr_engine_base import OcrEngineBase
10 | from TEx.core.ocr.ocr_engine_factory import OcrEngineFactory
11 | from TEx.core.ocr.tesseract_ocr_engine import TesseractOcrEngine
12 | from tests.modules.common import TestsCommon
13 | 
14 | 
15 | class OcrEngineFactoryTest(unittest.TestCase):
16 | 
17 |     def setUp(self) -> None:
18 |         self.config = ConfigParser()
19 |         self.config.read('../../config.ini')
20 | 
21 |     @mock.patch('TEx.core.ocr.tesseract_ocr_engine.os')
22 |     def test_get_instance_tesseract(self, mocked_os_lib):
23 |         """Test get_instance_method returning Tesseract Engine."""
24 | 
25 |         # Call Test Target Method
26 |         args: Dict = {
27 |             'config': 'unittest_configfile.config'
28 |         }
29 |         data: Dict = {}
30 | 
31 |         # Configure Mock
32 |         mocked_os_lib.path = mock.MagicMock()
33 |         mocked_os_lib.path.exists = mock.MagicMock(return_value=True)
34 | 
35 |         TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data)
36 | 
37 |         self.config['OCR']['enabled'] = 'true'
38 |         self.config['OCR']['type'] = 'tesseract'
39 | 
40 |         self.config['OCR.TESSERACT']['tesseract_cmd'] = '/folder/file'
41 |         self.config['OCR.TESSERACT']['language'] = 'eng+osd'
42 | 
43 |         h_result: OcrEngineBase = OcrEngineFactory.get_instance(self.config)
44 |         self.assertTrue(isinstance(h_result, TesseractOcrEngine))
45 | 
46 |     def test_get_instance_no_ocr_config(self):
47 |         """Test get_instance_method without OCR Setting on config file."""
48 | 
49 |         # Call Test Target Method
50 |         args: Dict = {
51 |             'config': 'unittest_configfile.config'
52 |         }
53 |         data: Dict = {}
54 | 
55 |         TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data)
56 | 
57 |         self.config.remove_section('OCR')
58 |         self.config.remove_section('TESSERACT')
59 | 
60 |         h_result: OcrEngineBase = OcrEngineFactory.get_instance(self.config)
61 |         self.assertTrue(isinstance(h_result, DummyOcrEngine))
62 | 
63 |     def test_get_instance_disabled_ocr_engine(self):
64 |         """Test get_instance_method with OCR engine disabled on config file."""
65 | 
66 |         # Call Test Target Method
67 |         args: Dict = {
68 |             'config': 'unittest_configfile.config'
69 |         }
70 |         data: Dict = {}
71 | 
72 |         TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data)
73 | 
74 |         self.config['OCR']['enabled'] = 'false'
75 |         self.config.remove_section('TESSERACT')
76 | 
77 |         h_result: OcrEngineBase = OcrEngineFactory.get_instance(self.config)
78 |         self.assertTrue(isinstance(h_result, DummyOcrEngine))
79 | 
80 |     def test_get_instance_without_engine_ocr_engine(self):
81 |         """Test get_instance_method with OCR engine enabled but without engine settings on config file."""
82 | 
83 |         # Call Test Target Method
84 |         args: Dict = {
85 |             'config': 'unittest_configfile.config'
86 |         }
87 |         data: Dict = {}
88 | 
89 |         TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data)
90 | 
91 |         self.config['OCR']['enabled'] = 'true'
92 |         del self.config['OCR']['type']
93 |         self.config.remove_section('TESSERACT')
94 | 
95 |         with self.assertRaises(AttributeError) as context:
96 |             OcrEngineFactory.get_instance(self.config)
97 | 


--------------------------------------------------------------------------------
/tests/report_templates/default_report.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |     <head>
 4 |         <meta charset="utf-8">
 5 |         <meta name="viewport" content="width=device-width, initial-scale=1">
 6 | 
 7 |         <!-- Bootstrap Styles -->
 8 |         <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha384-HSMxcRTRxnN+Bdg0JdbxYKrThecOKuH5zCYotlSAcp1+c8xmyTe9GYg1l9a69psu" crossorigin="anonymous">
 9 |         <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap-theme.min.css" integrity="sha384-6pzBo3FDv/PJ8r2KRkGHifhEocL+1X2rVCTTkUfGk7/0pbek5mMa1upzvWbrUbOZ" crossorigin="anonymous">
10 | 
11 |         <style>
12 |             .marker {
13 |                 background-color: yellow;
14 |             }
15 | 
16 |             .previous {
17 |                 background-color: lightcyan;
18 |             }
19 | 
20 |            .next {
21 |                 background-color: lightyellow;
22 |             }
23 |         </style>
24 |     </head>
25 | 
26 |     <body>
27 |         <div class="container" style="width: 99%">
28 |             <div class="row">
29 |                 <h1>TEx - Telegram Explorer - {{groupname}} ({{groupusername}})</h1>
30 |             </div>
31 | 
32 |             <div class="row">
33 |                 <table class="table table-striped table-bordered">
34 |                     <tr>
35 |                         <td>Messages</td>
36 |                     </tr>
37 |                     {% for item in messages %}
38 |                     <tr>
39 |                         <td class="{{ 'next' if item.meta_next else '' }}{{ 'previous' if item.meta_previous else '' }}">
40 |                             <!-- MimeType: {{item.media_mime_type}} -->
41 |                             <!-- Is Next: {{item.meta_next}} -->
42 |                             <!-- Is Previous: {{item.meta_previous}} -->
43 | 
44 |                             {% autoescape false %}
45 |                             <b>{{item.date_time}} UTC {{item.to_from_information}}</b>
46 |                             <br/>{{item.message|replace("\r\n", "<br/>")|replace("\n", "<br/>")}}
47 |                             {% endautoescape %}
48 | 
49 |                             {% if item.media_is_image +%}
50 |                                 {% autoescape false %}
51 |                                     <br/><b><a href="{{item.media_filename}}" target="_media"><img src="{{item.media_filename}}" width="25%" height="25%"></a></b>
52 |                                 {% endautoescape %}
53 |                             {% elif item.media_mime_type == 'application/vnd.geo' +%}
54 |                                 <br/><b><a href="https://www.google.com/maps/@{{item.media_geo}},15z" target="_geo">GeoLocation: {{item.media_geo}}</a></b>
55 |                             {% elif item.media_mime_type == 'video/mp4' +%}
56 |                                 <br/>
57 |                                 <video controls width="250">
58 |                                     <source src="{{item.media_filename}}" type="video/mp4">
59 |                                     Sorry, your browser doesn't support embedded videos.
60 |                                 </video>
61 |                                 <br/><b><a href="{{item.media_filename}}" target="_media_download">Download: {{item.media_filename.split('/')[1]}}</a></b>
62 |                             {% elif item.media_mime_type +%}
63 |                                 <br/><b><a href="{{item.media_filename}}" target="_media_download">Download: {{item.media_filename.split('/')[1]}}</a></b>
64 |                             {% endif %}
65 |                         </td>
66 |                     </tr>
67 |                     {% endfor %}
68 |                 </table>
69 |             </div>
70 |         </div>
71 | 
72 |     </body>
73 | 
74 | </html>


--------------------------------------------------------------------------------
/TEx/notifier/signals_engine.py:
--------------------------------------------------------------------------------
  1 | """Signals Notification Engine."""
  2 | from __future__ import annotations
  3 | 
  4 | from configparser import ConfigParser
  5 | from datetime import datetime
  6 | from typing import List
  7 | 
  8 | import pytz
  9 | 
 10 | from TEx.core.mapper.keep_alive_entity_mapper import SignalEntityMapper
 11 | from TEx.models.facade.signal_entity_model import SignalEntity
 12 | from TEx.models.facade.signal_notification_model import SignalNotificationEntityModel
 13 | from TEx.notifier.notifier_engine import NotifierEngine
 14 | 
 15 | 
 16 | class SignalsEngineFactory:
 17 |     """Signals Notification Engine Factory."""
 18 | 
 19 |     @staticmethod
 20 |     def get_instance(config: ConfigParser, notification_engine: NotifierEngine, source: str) -> SignalsEngine:
 21 |         """Get the Signals Engine Instance."""
 22 |         return SignalsEngine(
 23 |             entity=SignalEntityMapper.to_entity(section_proxy=config['SIGNALS'] if config.has_section('SIGNALS') else None),
 24 |             notification_engine=notification_engine,
 25 |             source=source,
 26 |         )
 27 | 
 28 | 
 29 | class SignalsEngine:
 30 |     """Signals Notification Engine."""
 31 | 
 32 |     def __init__(self, entity: SignalEntity, notification_engine: NotifierEngine, source: str) -> None:
 33 |         """Initialize the Signals Engine."""
 34 |         self.signal_entity: SignalEntity = entity
 35 |         self.messages_sent: int = 0
 36 |         self.notification_engine: NotifierEngine = notification_engine
 37 |         self.source: str = source
 38 | 
 39 |     @property
 40 |     def keep_alive_interval(self) -> int:
 41 |         """Return the Keep Alive Engine Interval."""
 42 |         return self.signal_entity.keep_alive_interval
 43 | 
 44 |     def inc_messages_sent(self) -> None:
 45 |         """Increment the Messages Sent Counter."""
 46 |         self.messages_sent += 1
 47 | 
 48 |     async def keep_alive(self) -> None:
 49 |         """Send the Keep Alive."""
 50 |         await self.__send_signal(
 51 |             entity=SignalNotificationEntityModel(
 52 |                 date_time=datetime.now(tz=pytz.UTC),
 53 |                 content=f'Messages Processed in Period: {self.messages_sent}',
 54 |                 signal='KEEP-ALIVE',
 55 |             ),
 56 |         )
 57 | 
 58 |         # Reset Messages Sent Counter
 59 |         self.messages_sent = 0
 60 | 
 61 |     async def shutdown(self) -> None:
 62 |         """Send the Shutdown."""
 63 |         await self.__send_signal(
 64 |             entity=SignalNotificationEntityModel(
 65 |                 date_time=datetime.now(tz=pytz.UTC),
 66 |                 content=f'Last Messages Processed in Period: {self.messages_sent}',
 67 |                 signal='SHUTDOWN',
 68 |             ),
 69 |         )
 70 | 
 71 |     async def init(self) -> None:
 72 |         """Send the Shutdown."""
 73 |         await self.__send_signal(
 74 |             entity=SignalNotificationEntityModel(
 75 |                 date_time=datetime.now(tz=pytz.UTC),
 76 |                 content='',
 77 |                 signal='INITIALIZATION',
 78 |             ),
 79 |         )
 80 | 
 81 |     async def new_group(self, group_id: str, group_title: str) -> None:
 82 |         """Send the New Group Event."""
 83 |         await self.__send_signal(
 84 |             entity=SignalNotificationEntityModel(
 85 |                 date_time=datetime.now(tz=pytz.UTC),
 86 |                 content=f'ID: {group_id} | Title: "{group_title}"',
 87 |                 signal='NEW-GROUP',
 88 |             ),
 89 |         )
 90 | 
 91 |     async def __send_signal(self, entity: SignalNotificationEntityModel) -> None:
 92 |         """Send the Signal."""
 93 |         signal_notifiers: List[str] = self.signal_entity.notifiers[entity.signal]
 94 | 
 95 |         if len(signal_notifiers) == 0:
 96 |             return
 97 | 
 98 |         await self.notification_engine.run(
 99 |             notifiers=signal_notifiers,
100 |             entity=entity,
101 |             rule_id='SIGNALS',
102 |             source=self.source,
103 |         )
104 | 


--------------------------------------------------------------------------------
/docs/how_use/usage_message_listener.md:
--------------------------------------------------------------------------------
 1 | # Listen Messages
 2 | 
 3 | The Message Listener are the core of Telegram Explorer. This command starts a process to listen all messages provided by Telegram servers.
 4 | 
 5 | > The Message Listener performs an Automatically Groups and Users Synchronization.
 6 | 
 7 | Once started, the Telegram Explorer runner do not stops or terminate until the Telegram servers disconnect the client, or, the running process receives a SIGTERM to stop the process.
 8 | 
 9 | **Full Command:**
10 | 
11 | ```bash
12 | python3 -m TEx listen --config CONFIGURATION_FILE_PATH --ignore_media --group_id 1234,5678
13 | ```
14 | 
15 | **Basic Command:**
16 | ```bash
17 | python3 -m TEx listen --config CONFIGURATION_FILE_PATH
18 | ```
19 | 
20 | **Parameters**
21 | 
22 |   * **config** > Required - Created Configuration File Path
23 |   * **ignore_media** > Optional - If present, don't Download any Media
24 |   * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's. Comma Separated
25 | 
26 | 
27 | *Output Example:*
28 | ```bash
29 | TEx - Telegram Explorer
30 | Version 0.2.12
31 | By: Th3 0bservator
32 | 
33 | 2023-10-01 20:46:53,880 - INFO - [*] Loading Configurations:
34 | 2023-10-01 20:46:53,880 - INFO - [*] Installed Modules:
35 | 2023-10-01 20:46:53,880 - INFO - 	data_structure_handler.py
36 | 2023-10-01 20:46:53,880 - INFO - 	database_handler.py
37 | 2023-10-01 20:46:53,880 - INFO - 	execution_configuration_handler.py
38 | 2023-10-01 20:46:53,880 - INFO - 	telegram_connection_manager.py
39 | 2023-10-01 20:46:53,880 - INFO - 	telegram_groups_list.py
40 | 2023-10-01 20:46:53,880 - INFO - 	telegram_groups_scrapper.py
41 | 2023-10-01 20:46:53,880 - INFO - 	telegram_maintenance
42 | 2023-10-01 20:46:53,880 - INFO - 	telegram_messages_listener.py
43 | 2023-10-01 20:46:53,880 - INFO - 	telegram_messages_scrapper.py
44 | 2023-10-01 20:46:53,881 - INFO - 	telegram_report_generator
45 | 2023-10-01 20:46:53,881 - INFO - 	telegram_stats_generator.py
46 | 2023-10-01 20:46:53,891 - INFO - [*] Loading Execution Configurations:
47 | 2023-10-01 20:46:54,179 - INFO - [*] Executing Pipeline:
48 | 2023-10-01 20:46:54,179 - INFO - 	[+] telegram_connection_manager.TelegramConnector
49 | 2023-10-01 20:46:55,763 - INFO - 		User Authorized on Telegram: True
50 | 2023-10-01 20:46:55,775 - INFO - 	[+] telegram_messages_listener.TelegramGroupMessageListener
51 | 2023-10-01 20:46:55,912 - INFO - 		Listening Past Messages...
52 | 2023-10-01 20:46:55,912 - INFO - 		Listening New Messages...
53 | 2023-10-01 20:46:55,923 - INFO - 			Downloading Photo from Message 20436 at 2023-09-30 00:58:35
54 | 2023-10-01 20:46:56,774 - INFO - 			Downloading Photo from Message 788 at 2023-09-30 09:48:51
55 | 2023-10-01 20:46:56,805 - INFO - 			Downloading Photo from Message 20438 at 2023-09-30 11:18:12
56 | 2023-10-01 20:46:56,807 - INFO - 			Downloading Photo from Message 37345 at 2023-09-30 04:39:54
57 | 2023-10-01 20:46:56,823 - INFO - 			Downloading Photo from Message 37346 at 2023-09-30 13:12:39
58 | 2023-10-01 20:46:58,053 - INFO - 			Downloading Photo from Message 725 at 2023-09-30 15:07:38
59 | 2023-10-01 20:46:58,105 - INFO - 			Downloading Photo from Message 727 at 2023-09-30 15:16:05
60 | 2023-10-01 20:46:58,148 - INFO - 			Downloading Photo from Message 20440 at 2023-09-30 14:52:21
61 | 2023-10-01 20:46:58,149 - INFO - 			Downloading Photo from Message 37347 at 2023-09-30 15:23:33
62 | 2023-10-01 20:46:58,743 - WARNING - 		Group "1246578969" not found on DB. Performing automatic synchronization. Consider execute "load_groups" command to perform a full group synchronization (Members and Group Cover Photo).
63 | 2023-10-01 20:46:58,751 - INFO - 			Downloading Photo from Message 13855 at 2023-09-30 21:00:09
64 | 2023-10-01 20:46:58,752 - INFO - 			Downloading Media from Message 12587 (9739.13 Kbytes) as video/mp4 at 2023-09-30 21:37:30
65 | 2023-10-01 20:46:58,779 - INFO - 			Downloading Photo from Message 37348 at 2023-09-30 22:10:03
66 | 2023-10-01 20:46:59,062 - WARNING - 		User "1254788963" was not found on DB. Performing automatic synchronization.
67 | 2023-10-01 20:46:59,110 - INFO - 			Downloading Photo from Message 13856 at 2023-10-01 02:08:19
68 | 2023-10-01 20:46:59,111 - INFO - 			Downloading Photo from Message 13857 at 2023-10-01 02:08:19
69 | ```


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # Welcome to Telegram Explorer
 2 | 
 3 | [![](https://img.shields.io/github/last-commit/guibacellar/TEx)](https://github.com/guibacellar/TEx/tree/main)
 4 | [![](https://img.shields.io/github/languages/code-size/guibacellar/TEx)](https://github.com/guibacellar/TEx/tree/main)
 5 | [![](https://img.shields.io/badge/Python-3.8+-green.svg)](https://www.python.org/downloads/) 
 6 | [![](https://github.com/guibacellar/TEx/actions/workflows/cy.yml/badge.svg?branch=main)](https://github.com/guibacellar/TEx/actions/workflows/cy.yml)
 7 | [![](https://telegramexplorer.readthedocs.io/en/latest/?badge=latest)](https://telegramexplorer.readthedocs.io/en/latest/)
 8 | [![](https://img.shields.io/badge/maintainer-Th3%200bservator-blue)](https://theobservator.net/)
 9 | ![](https://img.shields.io/github/v/release/guibacellar/TeX)
10 | 
11 | <!-- ABOUT THE PROJECT -->
12 | ## About The Project
13 | 
14 | TEx is a Telegram Explorer tool created to help Researchers, Investigators and Law Enforcement Agents to Collect and Process the Huge Amount of Data Generated from Criminal, Fraud, Security and Others Telegram Groups.
15 | 
16 | Repository: [https://github.com/guibacellar/TEx](https://github.com/guibacellar/TEx)
17 | 
18 | !!! warning "BETA VERSION"
19 |     
20 |     Please note that V0.3.0 are the latest beta version for this project, so it is possible that you may encounter bugs that have not yet been mapped out.
21 |     I kindly ask you to report the bugs at: [https://github.com/guibacellar/TEx/issues](https://github.com/guibacellar/TEx/issues)
22 | 
23 | <!-- REQUIREMENTS -->
24 | ## Requirements
25 | - Python 3.8.1+ (⚠️ Deprecated. Consider using version 3.10+ ⚠️)
26 | - Windows x64 or Linux x64
27 | 
28 | <!-- FEATURES -->
29 | ## Features
30 | - Connection Manager (Handle Telegram Connection)
31 | - Group Information Scrapper
32 | - List Groups (Scrap info for all groups, including members, members info and profile pic)
33 | - Automatic Group Information Sync
34 | - Automatic Users Information Sync
35 | - Messages Listener (Listen all Incoming Messages)
36 | - Messages Scrapper (Scrap all Group Messages, since the first one)
37 | - Download Media (Including fine media settings like size, groups and/or media type)
38 | - HTML Report Generation
39 | - Export Downloaded Files
40 | - Export Messages
41 | - Message Finder System (Allow to Find, using terms or RegEx) patterns on messages
42 | - Message Notification System (Send alert's, finds, or all messages to Discord)
43 | - Elastic Search 8+ Native Integration
44 | - Image OCR using Tesseract
45 | - Signals for Helping Monitoring
46 | 
47 | <!-- LIMITATIONS -->
48 | ## Know Limitations
49 | 
50 | Although we do not currently know the limitations of using the tool, it is important to announce the limits to which we test the platform.
51 | 
52 | Currently, **one TeX process can support at least** (per configuration file/per phone numer):
53 | 
54 | **Per Group**
55 | 
56 | - 50,000 messages
57 | - 7,000 users per group
58 | - 8 GB of downloaded files
59 | 
60 | **Total**
61 | 
62 | - 400 groups
63 | - 800,000 messages
64 | - 50,000 unique users
65 | - 150 GB of total downloaded files
66 | 
67 | <!-- HOW WORKS -->
68 | ## How Telegram Explorer Works
69 | Telegram Explorer works using one configuration file per target phone number to be used. 
70 | 
71 | ![how_text_works.png](media/how_text_works.png)
72 | 
73 | So, you can deploy 1 or several Telegram Explorer runners in one machine, using on configuration file for each instance. You also can deploy the runner using Linux Containers or Docker containers.
74 | 
75 | !!! info "IMPORTANT"
76 |     
77 |     Depending on the security level and your account settings, you may be asked to enter a security code that will be sent to your Telegram, or some authentication information. </br></br>This way, the application will ask (only at the time of the first connection) for you to enter this value in the terminal (TTY).
78 | 
79 | <!-- INSTALLING -->
80 | ## Installing
81 | Telegram Explorer is available through *pip*, so, just use pip install in order to fully install TeX.
82 | 
83 | ```bash
84 | pip install TelegramExplorer
85 | ```
86 | 
87 | <!-- Upgrading -->
88 | ## Upgrading
89 | To upgrade TeX to the latest version, just use *pip install upgrade* command.
90 | 
91 | ```bash
92 | pip install --upgrade TelegramExplorer
93 | ```
94 | 


--------------------------------------------------------------------------------
/TEx/notifier/elastic_search_notifier.py:
--------------------------------------------------------------------------------
  1 | """Elastic Search Notifier."""
  2 | from __future__ import annotations
  3 | 
  4 | from configparser import SectionProxy
  5 | from typing import Dict, Optional, Union
  6 | 
  7 | import pytz
  8 | from elasticsearch import AsyncElasticsearch
  9 | 
 10 | from TEx.models.facade.finder_notification_facade_entity import FinderNotificationMessageEntity
 11 | from TEx.models.facade.signal_notification_model import SignalNotificationEntityModel
 12 | from TEx.notifier.notifier_base import BaseNotifier
 13 | 
 14 | 
 15 | class ElasticSearchNotifier(BaseNotifier):
 16 |     """Basic Elastic Search Notifier."""
 17 | 
 18 |     def __init__(self) -> None:
 19 |         """Initialize Elastic Search Notifier."""
 20 |         super().__init__()
 21 |         self.url: str = ''
 22 |         self.client: Optional[AsyncElasticsearch] = None
 23 |         self.index: str = ''
 24 |         self.pipeline: str = ''
 25 | 
 26 |     def configure(self, config: SectionProxy) -> None:
 27 |         """Configure the Notifier."""
 28 |         hosts_list: Optional[str] = config.get('address', fallback=None)
 29 | 
 30 |         self.client = AsyncElasticsearch(
 31 |             hosts=hosts_list.split(',') if hosts_list else None,  # type: ignore
 32 |             api_key=config.get('api_key', fallback=None),
 33 |             verify_certs=config.get('verify_ssl_cert', fallback='True') == 'True',
 34 |             cloud_id=config.get('cloud_id', fallback=None),
 35 |             request_timeout=30,
 36 |             max_retries=10,
 37 |             ssl_show_warn=False,
 38 |         )
 39 |         self.index = config['index_name']
 40 |         self.pipeline = config['pipeline_name']
 41 | 
 42 |     async def run(self, entity: Union[FinderNotificationMessageEntity, SignalNotificationEntityModel], rule_id: str, source: str) -> None:
 43 |         """Run Elastic Search Notifier."""
 44 |         if not self.client:
 45 |             return
 46 | 
 47 |         content: Dict
 48 | 
 49 |         if isinstance(entity, FinderNotificationMessageEntity):
 50 |             content = await self.__get_dict_for_finder_notification(
 51 |                 entity=entity,
 52 |                 rule_id=rule_id,
 53 |                 source=source,
 54 |             )
 55 |         else:
 56 |             content = await self.__get_dict_for_signal_notification(
 57 |                 entity=entity,
 58 |                 source=source,
 59 |             )
 60 | 
 61 |         await self.client.index(
 62 |             index=self.index,
 63 |             pipeline=self.pipeline,
 64 |             document=content,
 65 |         )
 66 | 
 67 |     async def __get_dict_for_finder_notification(self, entity: FinderNotificationMessageEntity, rule_id: str, source: str) -> Dict:
 68 |         """Return the Dict for Finder Notifications."""
 69 |         content: Dict = {
 70 |                 'time': entity.date_time.astimezone(tz=pytz.utc),
 71 |                 'source': source,
 72 |                 'rule': rule_id,
 73 |                 'raw': entity.raw_text,
 74 |                 'group_name': entity.group_name,
 75 |                 'group_id': entity.group_id,
 76 |                 'from_id': entity.from_id,
 77 |                 'to_id': entity.to_id,
 78 |                 'reply_to_msg_id': entity.reply_to_msg_id,
 79 |                 'message_id': entity.message_id,
 80 |                 'is_reply': entity.is_reply,
 81 |                 'found_on': entity.found_on,
 82 |             }
 83 | 
 84 |         if entity.downloaded_media_info:
 85 |             content['has_media'] = True
 86 |             content['media_mime_type'] = entity.downloaded_media_info.content_type
 87 |             content['media_size'] = entity.downloaded_media_info.size_bytes
 88 |         else:
 89 |             content['has_media'] = False
 90 |             content['media_mime_type'] = None
 91 |             content['media_size'] = None
 92 | 
 93 |         return content
 94 | 
 95 |     async def __get_dict_for_signal_notification(self, entity: SignalNotificationEntityModel, source: str) -> Dict:
 96 |         """Return the Dict for Signal Notifications."""
 97 |         content: Dict = {
 98 |             'time': entity.date_time.astimezone(tz=pytz.utc),
 99 |             'source': source,
100 |             'signal': entity.signal,
101 |             'content': entity.content,
102 |         }
103 | 
104 |         return content
105 | 
106 | 


--------------------------------------------------------------------------------