├── .dockerignore ├── .editorconfig ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .python-version ├── Dockerfile ├── Makefile ├── README.md ├── bot.py ├── dev-requirements.txt ├── docker-compose.yml ├── filters.py ├── helpers.py ├── models.py ├── mypy.ini ├── pyproject.toml ├── requirements.txt ├── tests ├── __init__.py ├── conftest.py └── tests_filters │ ├── __init__.py │ ├── conftest.py │ ├── test_chat_message_only.py │ ├── test_cointains_three_or_more_emojis.py │ ├── test_contains_link.py │ ├── test_contains_tg_contact.py │ ├── test_has_no_valid_previous_messages.py │ ├── test_is_media.py │ └── test_is_message_behalf_of_chat.py └── text.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .env 3 | venv 4 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | [*] 2 | indent_size = 2 3 | indent_style = space 4 | max_line_length = off 5 | 6 | [*.py] 7 | indent_size = 4 8 | 9 | [{Makefile,**.mk}] 10 | indent_style = tab 11 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: CI 3 | on: push 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | 11 | - uses: actions/setup-python@v5 12 | id: setup-python 13 | with: 14 | python-version-file: ".python-version" 15 | 16 | - uses: actions/cache@v4 17 | with: 18 | path: | 19 | venv 20 | key: ${{ runner.os }}-venv-v1-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/*requirements.txt') }} 21 | 22 | - name: Install dependencies 23 | if: steps.cache-primes.outputs.cache-hit != 'true' 24 | run: | 25 | python -m venv venv 26 | . venv/bin/activate 27 | pip install --upgrade pip pip-tools 28 | pip-sync requirements.txt dev-requirements.txt 29 | 30 | - name: Run the linter 31 | run: | 32 | . venv/bin/activate 33 | make lint 34 | 35 | test: 36 | needs: build 37 | runs-on: ubuntu-latest 38 | services: 39 | postgres: 40 | image: postgres:13.9-alpine 41 | env: 42 | POSTGRES_HOST_AUTH_METHOD: trust 43 | options: >- 44 | --health-cmd pg_isready 45 | --health-interval 10s 46 | --health-timeout 5s 47 | --health-retries 5 48 | ports: 49 | - 5432:5432 50 | 51 | steps: 52 | - uses: actions/checkout@v4 53 | 54 | - uses: actions/setup-python@v5 55 | id: setup-python 56 | with: 57 | python-version-file: '.python-version' 58 | 59 | - uses: actions/cache@v4 60 | with: 61 | path: | 62 | venv 63 | key: ${{ runner.os }}-venv-v1-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/*requirements.txt') }} 64 | 65 | - name: Run the tests 66 | env: 67 | DATABASE_URL: postgres://postgres@localhost:5432/postgres 68 | run: | 69 | . venv/bin/activate 70 | make test 71 | 72 | build-docker-image: 73 | needs: test 74 | runs-on: ubuntu-latest 75 | steps: 76 | - name: Checkout 77 | uses: actions/checkout@v4 78 | 79 | - uses: actions/setup-python@v5 80 | id: setup-python 81 | with: 82 | python-version-file: '.python-version' 83 | 84 | - name: Set up QEMU 85 | uses: docker/setup-qemu-action@v3 86 | 87 | - name: Set up Docker Buildx 88 | uses: docker/setup-buildx-action@v3 89 | 90 | - name: Generate image identifier 91 | id: image 92 | uses: ASzc/change-string-case-action@v6 93 | with: 94 | string: ${{ github.repository }} 95 | 96 | - name: Login to GitHub Container Registry 97 | uses: docker/login-action@v3 98 | if: ${{ github.ref == 'refs/heads/master' }} 99 | with: 100 | registry: ghcr.io 101 | username: ${{ github.repository_owner }} 102 | password: ${{ secrets.GITHUB_TOKEN }} 103 | 104 | - name: Build the image 105 | uses: docker/build-push-action@v6 106 | with: 107 | context: . 108 | push: ${{ github.ref == 'refs/heads/master' }} 109 | tags: | 110 | ghcr.io/${{ steps.image.outputs.lowercase }}:latest 111 | ghcr.io/${{ steps.image.outputs.lowercase }}:${{ github.sha }} 112 | build-args: | 113 | PYTHON_VERSION=${{ steps.setup-python.outputs.python-version }} 114 | 115 | deploy: 116 | needs: build-docker-image 117 | if: ${{ github.ref == 'refs/heads/master' }} 118 | runs-on: ubuntu-latest 119 | steps: 120 | - name: Read image identifiers 121 | id: image 122 | uses: ASzc/change-string-case-action@v6 123 | with: 124 | string: ${{ github.repository }} 125 | 126 | - name: Update the image 127 | uses: appleboy/ssh-action@v1.0.3 128 | with: 129 | host: ${{ secrets.DEPLOY_HOST }} 130 | username: circle 131 | key: ${{ secrets.DEPLOY_KEY }} 132 | script: docker service update telegram_antispam --image ghcr.io/${{ steps.image.outputs.lowercase }}:${{ github.sha }} --with-registry-auth 133 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python 3 | 4 | ### Python ### 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Local 60 | .DS_Store 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # IPython Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule* 83 | 84 | # dotenv 85 | .env 86 | 87 | # virtualenv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # deployed static files folder 98 | src/vangus/static/* 99 | static/* 100 | media/* 101 | 102 | *.sublime-workspace 103 | *~ 104 | .dir-locals.el 105 | .vscode 106 | 107 | node_modules/ 108 | 109 | # vue frontend 110 | src/frontend/templates/frontend/index.html 111 | src/frontend/static/build 112 | src/frontend/static/js/* 113 | 114 | .tern-port 115 | 116 | # IDEA Folder 117 | .idea 118 | 119 | .envrc 120 | .pytest_cache 121 | .postgres 122 | *.sqlite 123 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.10.10 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTHON_VERSION 2 | FROM python:${PYTHON_VERSION}-slim-bullseye 3 | 4 | RUN apt-get update && apt-get --no-install-recommends -y install wget && rm -Rf rm -rf /var/lib/apt/lists/* 5 | 6 | WORKDIR / 7 | COPY requirements.txt / 8 | RUN pip install --upgrade --no-cache-dir pip && \ 9 | pip install --no-cache-dir -r /requirements.txt 10 | 11 | WORKDIR /srv 12 | COPY . /srv/ 13 | 14 | HEALTHCHECK CMD wget -q -O - --content-on-error http://localhost:8000|grep -qi "not found" 15 | 16 | USER nobody 17 | CMD python bot.py 18 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | install-dev-deps: dev-deps 2 | pip-sync requirements.txt dev-requirements.txt 3 | 4 | install-deps: deps 5 | pip-sync requirements.txt 6 | 7 | deps: 8 | pip-compile --resolver=backtracking --output-file=requirements.txt pyproject.toml 9 | 10 | dev-deps: deps 11 | pip-compile --resolver=backtracking --extra=dev --output-file=dev-requirements.txt pyproject.toml 12 | 13 | lint: 14 | flake8 *.py 15 | mypy 16 | 17 | fmt: 18 | isort . 19 | 20 | test: 21 | pytest --dead-fixtures 22 | pytest -x 23 | 24 | dev: 25 | watchmedo auto-restart --patterns '*.py' python bot.py 26 | 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Channel discussion antispam bot 2 | 3 | Removes spam messasges from the telegram channel discussion. Ignores old-time members allowing them to talk as usual 4 | * Messages, containing links 5 | * Messages on behalf of another channels or groups 6 | * Photos and voice messages 7 | 8 | ## Usage 9 | 10 | Add our [hosted version](https://t.me/discussion_sentinel_bot) as an admin to your channel discussion group. That is it! 11 | -------------------------------------------------------------------------------- /bot.py: -------------------------------------------------------------------------------- 1 | import os 2 | from telegram import Message, Update 3 | from telegram.error import TelegramError 4 | from telegram.ext import Application, CommandHandler, ContextTypes, MessageHandler 5 | from telegram.ext.filters import TEXT, BaseFilter 6 | 7 | import text 8 | from filters import ContainsLink, ContainsTelegramContact, ContainsThreeOrMoreEmojies, IsMedia, IsMessageOnBehalfOfChat, with_default_filters 9 | from helpers import enable_logging, in_production, init_sentry 10 | 11 | 12 | def get_previous_non_deleted_message_count(chat_id: int) -> int: 13 | from models import LogEntry 14 | 15 | return LogEntry.select().where( 16 | (LogEntry.chat_id == chat_id), 17 | (LogEntry.action == 'deletion_error'), 18 | ).count() 19 | 20 | 21 | async def log_message(message: Message | None, action: str | None = ''): 22 | """Create a log entry for telegram message""" 23 | 24 | if message is None or message.from_user is None: 25 | return 26 | 27 | if get_previous_non_deleted_message_count(message.chat_id) > 10: 28 | return 29 | 30 | from models import LogEntry 31 | 32 | LogEntry.create( 33 | user_id=message.from_user.id, 34 | chat_id=message.chat_id, 35 | message_id=message.message_id, 36 | text=message.text or '', 37 | meta={ 38 | 'tags': text.Labels(message.text)(), 39 | }, 40 | raw=message.to_dict(), 41 | action=action, 42 | ) 43 | 44 | 45 | async def delete(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: 46 | """Delete a message""" 47 | message = update.message or update.edited_message 48 | 49 | if message is not None: 50 | try: 51 | await message.delete() 52 | except TelegramError: 53 | await log_message(message, action='deletion_error') 54 | else: 55 | await log_message(message, action='delete') 56 | 57 | 58 | async def introduce_myself(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: 59 | if update.effective_chat is not None: 60 | await context.bot.send_message( 61 | chat_id=update.effective_chat.id, 62 | text=""" 63 | Это бот, который чистит спам из телеграм-комментов. Чтобы он заработал — добавьте его как админа в дискуссионную группу канала. Не забудьте разрешить удалять сообщения, без этого бот не будет работать. 64 | """, 65 | ) 66 | 67 | 68 | async def ping(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: 69 | if update.effective_chat is not None: 70 | await context.bot.send_message( 71 | chat_id=update.effective_chat.id, 72 | text='pong!', 73 | ) 74 | 75 | 76 | def delete_messages_that_match(*filters: BaseFilter) -> MessageHandler: 77 | """Sugar for quick adding delete message callbacks""" 78 | return MessageHandler(callback=delete, filters=with_default_filters(*filters)) 79 | 80 | 81 | if __name__ == '__main__': 82 | from dotenv import load_dotenv 83 | 84 | load_dotenv() 85 | 86 | bot_token = os.getenv('BOT_TOKEN') 87 | if not bot_token: 88 | raise RuntimeError('Please set BOT_TOKEN environment variable') 89 | app_name = os.getenv('BOT_NAME') 90 | 91 | bot = Application.builder().token(bot_token).build() 92 | 93 | bot.add_handler(CommandHandler('start', introduce_myself)) 94 | bot.add_handler(CommandHandler('ping', ping)) 95 | 96 | bot.add_handler(delete_messages_that_match(IsMessageOnBehalfOfChat())) 97 | bot.add_handler(delete_messages_that_match(ContainsTelegramContact())) 98 | bot.add_handler(delete_messages_that_match(ContainsLink())) 99 | bot.add_handler(delete_messages_that_match(ContainsThreeOrMoreEmojies())) 100 | bot.add_handler(delete_messages_that_match(IsMedia())) 101 | 102 | from models import create_tables 103 | 104 | create_tables() # type: ignore 105 | bot.add_handler( 106 | MessageHandler( 107 | filters=TEXT, 108 | callback=lambda update, context: log_message(update.message or update.edited_message), 109 | ), 110 | ) 111 | 112 | if in_production(): 113 | init_sentry() 114 | bot.run_webhook( 115 | listen='0.0.0.0', 116 | port=8000, 117 | url_path=bot_token, 118 | webhook_url=f'https://{app_name}.tough-dev.school/' + bot_token, 119 | ) 120 | else: # bot is running on the dev machine 121 | enable_logging() 122 | bot.run_polling() 123 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.10 3 | # by the following command: 4 | # 5 | # pip-compile --extra=dev --output-file=dev-requirements.txt --resolver=backtracking pyproject.toml 6 | # 7 | anyio==3.7.1 8 | # via httpcore 9 | appnope==0.1.3 10 | # via ipython 11 | astor==0.8.1 12 | # via flake8-simplify 13 | asttokens==2.2.1 14 | # via stack-data 15 | attrs==23.1.0 16 | # via 17 | # flake8-bugbear 18 | # flake8-eradicate 19 | # flake8-multiline-containers 20 | autopep8==1.5.7 21 | # via channel-discussion-antispam-bot (pyproject.toml) 22 | backcall==0.2.0 23 | # via ipython 24 | boto3-stubs==1.27.1 25 | # via types-boto3 26 | botocore-stubs==1.29.165 27 | # via boto3-stubs 28 | certifi==2023.5.7 29 | # via 30 | # httpcore 31 | # httpx 32 | # sentry-sdk 33 | cognitive-complexity==1.3.0 34 | # via flake8-cognitive-complexity 35 | decorator==5.1.1 36 | # via ipython 37 | emoji==1.7.0 38 | # via channel-discussion-antispam-bot (pyproject.toml) 39 | eradicate==2.3.0 40 | # via flake8-eradicate 41 | exceptiongroup==1.1.2 42 | # via 43 | # anyio 44 | # pytest 45 | executing==1.2.0 46 | # via stack-data 47 | flake8==6.0.0 48 | # via 49 | # flake8-bugbear 50 | # flake8-commas 51 | # flake8-eradicate 52 | # flake8-isort 53 | # flake8-multiline-containers 54 | # flake8-mutable 55 | # flake8-pep3101 56 | # flake8-print 57 | # flake8-printf-formatting 58 | # flake8-pyproject 59 | # flake8-quotes 60 | # flake8-simplify 61 | # flake8-use-fstring 62 | # flake8-walrus 63 | flake8-bugbear==23.6.5 64 | # via channel-discussion-antispam-bot (pyproject.toml) 65 | flake8-cognitive-complexity==0.1.0 66 | # via channel-discussion-antispam-bot (pyproject.toml) 67 | flake8-commas==2.1.0 68 | # via channel-discussion-antispam-bot (pyproject.toml) 69 | flake8-eradicate==1.5.0 70 | # via channel-discussion-antispam-bot (pyproject.toml) 71 | flake8-fixme==1.1.1 72 | # via channel-discussion-antispam-bot (pyproject.toml) 73 | flake8-isort==6.0.0 74 | # via channel-discussion-antispam-bot (pyproject.toml) 75 | flake8-multiline-containers==0.0.19 76 | # via channel-discussion-antispam-bot (pyproject.toml) 77 | flake8-mutable==1.2.0 78 | # via channel-discussion-antispam-bot (pyproject.toml) 79 | flake8-pep3101==2.0.0 80 | # via channel-discussion-antispam-bot (pyproject.toml) 81 | flake8-pie==0.16.0 82 | # via channel-discussion-antispam-bot (pyproject.toml) 83 | flake8-print==5.0.0 84 | # via channel-discussion-antispam-bot (pyproject.toml) 85 | flake8-printf-formatting==1.1.2 86 | # via channel-discussion-antispam-bot (pyproject.toml) 87 | flake8-pyproject==1.2.3 88 | # via channel-discussion-antispam-bot (pyproject.toml) 89 | flake8-quotes==3.3.2 90 | # via channel-discussion-antispam-bot (pyproject.toml) 91 | flake8-simplify==0.20.0 92 | # via channel-discussion-antispam-bot (pyproject.toml) 93 | flake8-todo==0.7 94 | # via channel-discussion-antispam-bot (pyproject.toml) 95 | flake8-use-fstring==1.4 96 | # via channel-discussion-antispam-bot (pyproject.toml) 97 | flake8-variables-names==0.0.6 98 | # via channel-discussion-antispam-bot (pyproject.toml) 99 | flake8-walrus==1.2.0 100 | # via channel-discussion-antispam-bot (pyproject.toml) 101 | h11==0.14.0 102 | # via httpcore 103 | httpcore==0.17.3 104 | # via httpx 105 | httpx==0.24.1 106 | # via python-telegram-bot 107 | idna==3.4 108 | # via 109 | # anyio 110 | # httpx 111 | iniconfig==2.0.0 112 | # via pytest 113 | ipython==8.14.0 114 | # via channel-discussion-antispam-bot (pyproject.toml) 115 | isort==5.12.0 116 | # via 117 | # channel-discussion-antispam-bot (pyproject.toml) 118 | # flake8-isort 119 | jedi==0.18.2 120 | # via ipython 121 | matplotlib-inline==0.1.6 122 | # via ipython 123 | mccabe==0.7.0 124 | # via flake8 125 | mypy==1.4.1 126 | # via channel-discussion-antispam-bot (pyproject.toml) 127 | mypy-extensions==1.0.0 128 | # via mypy 129 | packaging==23.1 130 | # via pytest 131 | parso==0.8.3 132 | # via jedi 133 | peewee==3.16.2 134 | # via channel-discussion-antispam-bot (pyproject.toml) 135 | pexpect==4.8.0 136 | # via ipython 137 | pickleshare==0.7.5 138 | # via ipython 139 | pluggy==1.2.0 140 | # via pytest 141 | prompt-toolkit==3.0.39 142 | # via ipython 143 | psycopg2-binary==2.9.6 144 | # via channel-discussion-antispam-bot (pyproject.toml) 145 | ptyprocess==0.7.0 146 | # via pexpect 147 | pure-eval==0.2.2 148 | # via stack-data 149 | pycodestyle==2.10.0 150 | # via 151 | # autopep8 152 | # flake8 153 | # flake8-print 154 | # flake8-todo 155 | pyflakes==3.0.1 156 | # via flake8 157 | pygments==2.15.1 158 | # via ipython 159 | pytest==7.4.0 160 | # via 161 | # pytest-deadfixtures 162 | # pytest-env 163 | # pytest-mock 164 | # pytest-randomly 165 | pytest-deadfixtures==2.2.1 166 | # via channel-discussion-antispam-bot (pyproject.toml) 167 | pytest-env==0.8.2 168 | # via channel-discussion-antispam-bot (pyproject.toml) 169 | pytest-mock==3.11.1 170 | # via channel-discussion-antispam-bot (pyproject.toml) 171 | pytest-randomly==3.12.0 172 | # via channel-discussion-antispam-bot (pyproject.toml) 173 | python-dotenv==1.0.0 174 | # via channel-discussion-antispam-bot (pyproject.toml) 175 | python-telegram-bot[webhooks]==20.3 176 | # via channel-discussion-antispam-bot (pyproject.toml) 177 | pyyaml==6.0 178 | # via watchdog 179 | sentry-sdk==1.27.0 180 | # via channel-discussion-antispam-bot (pyproject.toml) 181 | six==1.16.0 182 | # via asttokens 183 | sniffio==1.3.0 184 | # via 185 | # anyio 186 | # httpcore 187 | # httpx 188 | stack-data==0.6.2 189 | # via ipython 190 | toml==0.10.2 191 | # via autopep8 192 | tomli==2.0.1 193 | # via 194 | # flake8-pyproject 195 | # mypy 196 | # pytest 197 | tornado==6.3.2 198 | # via python-telegram-bot 199 | traitlets==5.9.0 200 | # via 201 | # ipython 202 | # matplotlib-inline 203 | types-awscrt==0.16.21 204 | # via 205 | # botocore-stubs 206 | # types-s3transfer 207 | types-boto3==1.0.2 208 | # via channel-discussion-antispam-bot (pyproject.toml) 209 | types-emoji==2.1.0.3 210 | # via channel-discussion-antispam-bot (pyproject.toml) 211 | types-s3transfer==0.6.1 212 | # via boto3-stubs 213 | typing-extensions==4.7.1 214 | # via 215 | # flake8-pie 216 | # mypy 217 | urllib3==1.26.16 218 | # via sentry-sdk 219 | watchdog[watchmedo]==3.0.0 220 | # via channel-discussion-antispam-bot (pyproject.toml) 221 | wcwidth==0.2.6 222 | # via prompt-toolkit 223 | 224 | # The following packages are considered to be unsafe in a requirements file: 225 | # setuptools 226 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | 3 | services: 4 | postgres: 5 | image: postgres:13.3-alpine 6 | environment: 7 | - POSTGRES_HOST_AUTH_METHOD=trust 8 | ports: 9 | - 5432:5432 10 | command: --autovacuum=off --fsync=off --synchronous_commit=off --full_page_writes=off --work_mem=12MB --max-connections=10 --max_wal_senders=0 11 | 12 | -------------------------------------------------------------------------------- /filters.py: -------------------------------------------------------------------------------- 1 | import operator 2 | from functools import reduce 3 | from telegram import Message 4 | from telegram.ext.filters import BaseFilter, MessageFilter 5 | 6 | import text 7 | 8 | 9 | class HasNoValidPreviousMessages(MessageFilter): 10 | MIN_PREVIOUS_MESSAGES_COUNT = 3 11 | 12 | def filter(self, message: Message) -> bool: 13 | if message.from_user is None: 14 | return True 15 | return self.has_no_valid_previous_messages(user_id=message.from_user.id, chat_id=message.chat_id) 16 | 17 | @classmethod 18 | def has_no_valid_previous_messages(cls, user_id: int, chat_id: int) -> bool: 19 | from models import LogEntry 20 | 21 | messages_count = LogEntry.select().where( 22 | (LogEntry.user_id == user_id), 23 | (LogEntry.chat_id == chat_id), 24 | (LogEntry.action != 'delete'), 25 | ).count() 26 | return messages_count < cls.MIN_PREVIOUS_MESSAGES_COUNT 27 | 28 | 29 | class ChatMessageOnly(MessageFilter): 30 | def filter(self, message: Message) -> bool: 31 | return message.forward_from_message_id is None 32 | 33 | 34 | class NotRootChatMessage(MessageFilter): 35 | def filter(self, message: Message) -> bool: 36 | if message.sender_chat is None: 37 | return True 38 | 39 | return message.sender_chat.id != message.chat.id 40 | 41 | 42 | def with_default_filters(*filters: BaseFilter) -> BaseFilter: 43 | """Apply default filters to the given filter classes""" 44 | default_filters = [ 45 | ChatMessageOnly(), 46 | NotRootChatMessage(), 47 | HasNoValidPreviousMessages(), 48 | ] 49 | return reduce(operator.and_, [*default_filters, *filters]) # МАМА Я УМЕЮ ФУНКЦИОНАЛЬНО ПРОГРАММИРОВАТЬ 50 | 51 | 52 | class IsMessageOnBehalfOfChat(MessageFilter): 53 | def filter(self, message: Message) -> bool: 54 | return message.sender_chat is not None and message.sender_chat.id != message.chat.id 55 | 56 | 57 | class ContainsTelegramContact(MessageFilter): 58 | def filter(self, message: Message) -> bool: 59 | if message.text is None: 60 | return False 61 | 62 | return ' @' in message.text or message.text.startswith('@') 63 | 64 | 65 | class ContainsLink(MessageFilter): 66 | def filter(self, message: Message) -> bool: 67 | if message.text is None: 68 | return False 69 | 70 | return any(entity.type in ('url', 'text_link') for entity in message.entities) 71 | 72 | 73 | class ContainsThreeOrMoreEmojies(MessageFilter): 74 | def filter(self, message: Message) -> bool: 75 | return 'three_or_more_emojies' in text.Labels(message.text)() 76 | 77 | 78 | class IsMedia(MessageFilter): 79 | def filter(self, message: Message) -> bool: 80 | if any([message.document, message.audio, message.voice, message.video_note]): 81 | return True 82 | 83 | if len(message.photo) > 0: 84 | return True 85 | 86 | return False 87 | 88 | 89 | __all__ = [ 90 | 'ChatMessageOnly', 91 | 'ContainsLink', 92 | 'ContainsTelegramContact', 93 | 'ContainsThreeOrMoreEmojies', 94 | 'IsMessageOnBehalfOfChat', 95 | 'with_default_filters', 96 | ] 97 | -------------------------------------------------------------------------------- /helpers.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sentry_sdk 4 | from sentry_sdk.integrations.asyncio import AsyncioIntegration 5 | 6 | 7 | def in_production() -> bool: 8 | return os.getenv('BOT_NAME', None) is not None 9 | 10 | 11 | def enable_logging() -> None: 12 | logging.basicConfig( 13 | level=logging.DEBUG, 14 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 15 | ) 16 | 17 | 18 | def init_sentry() -> None: 19 | sentry_dsn = os.getenv('SENTRY_DSN', None) 20 | 21 | if sentry_dsn: 22 | sentry_sdk.init( 23 | dsn=sentry_dsn, 24 | integrations=[ 25 | AsyncioIntegration(), 26 | ], 27 | ) 28 | 29 | 30 | __all__ = [ 31 | 'enable_logging', 32 | 'in_production', 33 | 'init_sentry', 34 | ] 35 | -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | import os 2 | import peewee as pw 3 | from playhouse.db_url import connect 4 | from playhouse.postgres_ext import JSONField 5 | 6 | database_url = os.getenv('DATABASE_URL') 7 | if not database_url: 8 | raise RuntimeError('Please set DATABASE_URL environment variable') 9 | db = connect(database_url) 10 | 11 | 12 | class LogEntry(pw.Model): 13 | user_id = pw.BigIntegerField() 14 | chat_id = pw.BigIntegerField() 15 | message_id = pw.BigIntegerField() 16 | action = pw.CharField(default='') 17 | text = pw.TextField() 18 | meta = JSONField() 19 | raw = JSONField() 20 | 21 | class Meta: 22 | database = db 23 | indexes = ( 24 | ( 25 | ('chat_id', 'message_id'), 26 | True, 27 | ), 28 | ) 29 | 30 | 31 | def create_tables(): 32 | db.create_tables([LogEntry]) 33 | 34 | 35 | def drop_tables(): 36 | db.drop_tables([LogEntry]) 37 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | python_version = 3.10 3 | files = *.py 4 | warn_no_return = off 5 | warn_unused_configs = on 6 | warn_unused_ignores = on 7 | warn_redundant_casts = on 8 | no_implicit_optional = on 9 | no_implicit_reexport = on 10 | strict_equality = on 11 | warn_unreachable = on 12 | disallow_untyped_calls = on 13 | implicit_reexport = off 14 | 15 | [mypy-peewee.*] 16 | ignore_missing_imports = on 17 | 18 | [mypy-playhouse.*] 19 | ignore_missing_imports = on 20 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "channel-discussion-antispam-bot" 3 | version = "0.0.1" 4 | dependencies = [ 5 | "python-telegram-bot[webhooks]>20", 6 | "python-dotenv", 7 | "sentry-sdk", 8 | "peewee", 9 | "psycopg2-binary", 10 | "emoji", 11 | ] 12 | 13 | 14 | [project.optional-dependencies] 15 | dev = [ 16 | "ipython", 17 | "watchdog[watchmedo]", 18 | "mypy", 19 | "types-emoji", 20 | "types-boto3", 21 | "isort", 22 | "autopep8<1.6.0", 23 | "flake8-bugbear", 24 | "flake8-cognitive-complexity", 25 | "flake8-commas", 26 | "flake8-eradicate", 27 | "flake8-isort>=4.0.0", 28 | "flake8-fixme", 29 | "flake8-multiline-containers", 30 | "flake8-mutable", 31 | "flake8-pep3101", 32 | "flake8-pie", 33 | "flake8-print", 34 | "flake8-printf-formatting", 35 | "flake8-quotes", 36 | "flake8-simplify", 37 | "flake8-todo", 38 | "flake8-use-fstring", 39 | "flake8-variables-names", 40 | "flake8-walrus", 41 | "flake8-pyproject", 42 | 43 | "pytest-deadfixtures", 44 | "pytest-mock", 45 | "pytest-randomly", 46 | "pytest-env", 47 | ] 48 | 49 | 50 | 51 | [tool.setuptools] 52 | packages = [] 53 | 54 | 55 | [tool.flake8] 56 | max-line-length = 160 57 | ignore = [ 58 | "E501", 59 | "E265", 60 | "F811", 61 | "B010", 62 | "PT001", 63 | "VNE003", 64 | "PIE783", 65 | "PIE785", 66 | "SIM113", 67 | "SIM102", 68 | "FS003", 69 | "W504", 70 | "PIE801", 71 | ] 72 | exclude = [ 73 | "venv", 74 | ".git", 75 | "__pycache__", 76 | ] 77 | 78 | 79 | [tool.isort] 80 | line_length = 160 81 | known_standard_library = ["typing"] 82 | multi_line_output = 4 83 | 84 | [tool.pytest.ini_options] 85 | python_files = ["test*.py"] 86 | env = [ 87 | "DATABASE_URL=postgres://postgres@localhost:5432/postgres" 88 | ] 89 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.10 3 | # by the following command: 4 | # 5 | # pip-compile --output-file=requirements.txt --resolver=backtracking pyproject.toml 6 | # 7 | anyio==3.7.1 8 | # via httpcore 9 | certifi==2023.5.7 10 | # via 11 | # httpcore 12 | # httpx 13 | # sentry-sdk 14 | emoji==1.7.0 15 | # via channel-discussion-antispam-bot (pyproject.toml) 16 | exceptiongroup==1.1.2 17 | # via anyio 18 | h11==0.14.0 19 | # via httpcore 20 | httpcore==0.17.3 21 | # via httpx 22 | httpx==0.24.1 23 | # via python-telegram-bot 24 | idna==3.4 25 | # via 26 | # anyio 27 | # httpx 28 | peewee==3.16.2 29 | # via channel-discussion-antispam-bot (pyproject.toml) 30 | psycopg2-binary==2.9.6 31 | # via channel-discussion-antispam-bot (pyproject.toml) 32 | python-dotenv==1.0.0 33 | # via channel-discussion-antispam-bot (pyproject.toml) 34 | python-telegram-bot[webhooks]==20.3 35 | # via channel-discussion-antispam-bot (pyproject.toml) 36 | sentry-sdk==1.27.0 37 | # via channel-discussion-antispam-bot (pyproject.toml) 38 | sniffio==1.3.0 39 | # via 40 | # anyio 41 | # httpcore 42 | # httpx 43 | tornado==6.3.2 44 | # via python-telegram-bot 45 | urllib3==1.26.16 46 | # via sentry-sdk 47 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/f213/discussion-sentinel-bot/0de9a6a9306dd0924a7911a7f8f30a4e49ca00c8/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from models import create_tables, db, drop_tables 4 | 5 | 6 | @pytest.fixture(scope="session") 7 | def test_db(): 8 | create_tables() 9 | 10 | yield db 11 | 12 | drop_tables() 13 | 14 | 15 | @pytest.fixture(scope='function', autouse=True) 16 | def _rollback_transactions(test_db): 17 | test_db.begin() 18 | 19 | yield 20 | 21 | test_db.rollback() 22 | 23 | @pytest.fixture 24 | def mock_message(mocker): 25 | return mocker.patch("telegram.Message", autospec=True).return_value 26 | -------------------------------------------------------------------------------- /tests/tests_filters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/f213/discussion-sentinel-bot/0de9a6a9306dd0924a7911a7f8f30a4e49ca00c8/tests/tests_filters/__init__.py -------------------------------------------------------------------------------- /tests/tests_filters/conftest.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | import pytest 4 | 5 | 6 | @pytest.fixture 7 | def do_filter(filter_obj) -> Callable[[], bool]: 8 | return lambda message: filter_obj.filter(message) 9 | -------------------------------------------------------------------------------- /tests/tests_filters/test_chat_message_only.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from filters import ChatMessageOnly 4 | 5 | 6 | @pytest.fixture(scope="session") 7 | def filter_obj(): 8 | return ChatMessageOnly() 9 | 10 | 11 | def test_false_if_forwarded(do_filter, mock_message): 12 | mock_message.forward_from_message_id = "ordinary-id-yep" 13 | 14 | assert do_filter(mock_message) is False 15 | 16 | 17 | def test_true_if_not_forwarded(do_filter, mock_message): 18 | mock_message.forward_from_message_id = None 19 | 20 | assert do_filter(mock_message) is True 21 | -------------------------------------------------------------------------------- /tests/tests_filters/test_cointains_three_or_more_emojis.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from filters import ContainsThreeOrMoreEmojies 4 | 5 | 6 | @pytest.fixture 7 | def message(mock_message): 8 | mock_message.text = None 9 | return mock_message 10 | 11 | 12 | @pytest.fixture(scope="session") 13 | def filter_obj(): 14 | return ContainsThreeOrMoreEmojies() 15 | 16 | 17 | def test_false_if_empty_message(do_filter, message): 18 | assert do_filter(message) is False 19 | 20 | 21 | @pytest.mark.parametrize( 22 | "text", 23 | [ 24 | "Shalom 👋🏾", 25 | "Ou ui 👀🙃", 26 | "No emojis actually", 27 | "🐍", 28 | " ", 29 | ] 30 | ) 31 | def test_false_if_less_than_3_emojis(do_filter, message, text): 32 | message.text = text 33 | 34 | assert do_filter(message) is False 35 | 36 | 37 | @pytest.mark.parametrize( 38 | "text", 39 | [ 40 | "Shalom 👋🏾👀🙃", 41 | "😅😎🧑🏿‍🦱👨‍👨‍👧‍👧", 42 | "😅😎🧑🏿‍🦱👨‍👨‍👧‍👧🐍 some text 👋🏾👀🙃", 43 | ] 44 | ) 45 | def test_true_if_more_than_2_emojis(do_filter, message, text): 46 | message.text = text 47 | 48 | assert do_filter(message) is True 49 | -------------------------------------------------------------------------------- /tests/tests_filters/test_contains_link.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from filters import ContainsLink 4 | 5 | 6 | class FakeMessageEntity: 7 | def __init__(self, type: str): 8 | self.type = type 9 | 10 | 11 | @pytest.fixture 12 | def mock_message_entity(mocker): 13 | return lambda type_str: FakeMessageEntity(type_str) 14 | 15 | 16 | @pytest.fixture 17 | def message(mock_message, mock_message_entity): 18 | # To see all possible types look at telegram.MessageEntity Attributes 19 | message.text = "I'm not empty inside" 20 | code = mock_message_entity("code") 21 | phone_number = mock_message_entity("phone_number") 22 | mock_message.entities = [code, phone_number] 23 | return mock_message 24 | 25 | 26 | @pytest.fixture(scope="session") 27 | def filter_obj(): 28 | return ContainsLink() 29 | 30 | 31 | def test_false_if_no_links_message(do_filter, message): 32 | assert do_filter(message) is False 33 | 34 | 35 | @pytest.mark.parametrize( 36 | "link_type", 37 | [ 38 | "url", 39 | "text_link", 40 | ] 41 | ) 42 | def test_true_if_has_link(do_filter, message, mock_message_entity, link_type): 43 | message_entity = mock_message_entity(link_type) 44 | message.entities.append(message_entity) 45 | 46 | assert do_filter(message) is True 47 | 48 | 49 | @pytest.mark.parametrize( 50 | "link_types", 51 | [ 52 | ["text_link", "url"], 53 | ["text_link", "text_link"], 54 | ["url", "url"], 55 | ] 56 | ) 57 | def test_true_if_has_many_links(do_filter, message, mock_message_entity, link_types): 58 | for link_type in link_types: 59 | message_entity = mock_message_entity(link_type) 60 | message.entities.append(message_entity) 61 | 62 | assert do_filter(message) is True 63 | -------------------------------------------------------------------------------- /tests/tests_filters/test_contains_tg_contact.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from filters import ContainsTelegramContact 4 | 5 | 6 | @pytest.fixture 7 | def message(mock_message): 8 | message.text = "Ordinary text" 9 | return mock_message 10 | 11 | 12 | @pytest.fixture(scope="session") 13 | def filter_obj(): 14 | return ContainsTelegramContact() 15 | 16 | 17 | def test_false_if_no_text_message(do_filter, message): 18 | message.text = None 19 | assert do_filter(message) is False 20 | 21 | 22 | @pytest.mark.parametrize( 23 | "text", 24 | [ 25 | "Hello there!", 26 | "OMG look at my email omg@bbq.wtf", 27 | "sobaka@sobaka", 28 | ] 29 | ) 30 | def test_false_if_no_contact(do_filter, message, text): 31 | message.text = text 32 | 33 | assert do_filter(message) is False 34 | 35 | 36 | @pytest.mark.parametrize( 37 | "text", 38 | [ 39 | "write me a message @bbqomg", 40 | "@contact_me", 41 | ] 42 | ) 43 | def test_true_if_contact(do_filter, message, text): 44 | message.text = text 45 | 46 | assert do_filter(message) is True 47 | -------------------------------------------------------------------------------- /tests/tests_filters/test_has_no_valid_previous_messages.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import random 3 | 4 | from filters import HasNoValidPreviousMessages 5 | from models import LogEntry 6 | 7 | CHAT_ID = 1 8 | 9 | 10 | def create_log_message(user_id: int, chat_id: int = CHAT_ID, action: str = '', message_id: int = random.randint(1, 9999)): 11 | return LogEntry.create( 12 | user_id=user_id, 13 | chat_id=chat_id, 14 | message_id=message_id, 15 | text='meh', 16 | meta={'tags': ["ou"]}, 17 | raw={'text': 'meh'}, 18 | action=action, 19 | ) 20 | 21 | 22 | @pytest.fixture 23 | def user(): 24 | class FakeUser: 25 | def __init__(self, id: int): 26 | self.id = id 27 | 28 | return FakeUser(4815162342) 29 | 30 | 31 | @pytest.fixture 32 | def message(mock_message, user): 33 | mock_message.from_user = user 34 | mock_message.chat_id = CHAT_ID 35 | return mock_message 36 | 37 | 38 | @pytest.fixture(scope="session") 39 | def filter_obj(): 40 | return HasNoValidPreviousMessages() 41 | 42 | 43 | @pytest.fixture 44 | def valid_messages(user, filter_obj): 45 | message_id = 1 46 | for _ in range(filter_obj.MIN_PREVIOUS_MESSAGES_COUNT): 47 | create_log_message(user_id=user.id, message_id=message_id) 48 | message_id += 1 49 | 50 | 51 | def test_true_if_no_valid_messages(do_filter, message): 52 | assert do_filter(message) is True 53 | 54 | 55 | def test_true_if_not_from_user(do_filter, message): 56 | message.from_user = None 57 | 58 | assert do_filter(message) is True 59 | 60 | 61 | def test_true_if_has_not_enough_valid_messages(do_filter, message, valid_messages): 62 | LogEntry.get(LogEntry.message_id == 1).delete_instance() 63 | 64 | assert do_filter(message) is True 65 | 66 | 67 | @pytest.mark.parametrize( 68 | ("attribute", "value"), 69 | [ 70 | ("action", "delete"), 71 | ("chat_id", 4815), 72 | ("user_id", 9911), 73 | ] 74 | ) 75 | def test_true_if_user_has_not_enough_valid_messages(do_filter, message, valid_messages, attribute, value): 76 | log_entry = LogEntry.get(LogEntry.message_id == 1) 77 | setattr(log_entry, attribute, value) 78 | log_entry.save() 79 | 80 | assert do_filter(message) is True 81 | 82 | 83 | def test_false_if_has_valid_messages(do_filter, message, valid_messages): 84 | assert do_filter(message) is False 85 | -------------------------------------------------------------------------------- /tests/tests_filters/test_is_media.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from filters import IsMedia 4 | 5 | 6 | @pytest.fixture 7 | def message(mock_message): 8 | mock_message.photo = [] 9 | mock_message.document = None 10 | mock_message.audio = None 11 | mock_message.voice = None 12 | mock_message.video_note = None 13 | return mock_message 14 | 15 | 16 | @pytest.fixture(scope="session") 17 | def filter_obj(): 18 | return IsMedia() 19 | 20 | 21 | def test_false_if_empty_message(do_filter, message): 22 | assert do_filter(message) is False 23 | 24 | 25 | @pytest.mark.parametrize( 26 | "photo", 27 | [ 28 | "http://photo.com/", 29 | ["http://localhost/photo", "some-id-like-123"] 30 | ] 31 | ) 32 | def test_true_if_has_photos(do_filter, message, photo): 33 | message.photo.append(photo) 34 | 35 | assert do_filter(message) is True 36 | 37 | 38 | @pytest.mark.parametrize( 39 | "attribute", 40 | [ 41 | "document", 42 | "audio", 43 | "voice", 44 | "video_note", 45 | ] 46 | ) 47 | def test_true_if_has_media_attr(do_filter, message, attribute): 48 | setattr(message, attribute, "Here we are born to be kings") 49 | 50 | assert do_filter(message) is True 51 | -------------------------------------------------------------------------------- /tests/tests_filters/test_is_message_behalf_of_chat.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from filters import IsMessageOnBehalfOfChat 4 | 5 | 6 | @pytest.fixture 7 | def mock_chat(): 8 | class MockChat: 9 | def __init__(self, chat_id: int) -> None: 10 | self.id = chat_id 11 | 12 | return lambda chat_id: MockChat(chat_id=chat_id) 13 | 14 | @pytest.fixture 15 | def message(mock_message, mock_chat): 16 | mock_message.sender_chat = None 17 | mock_message.chat = mock_chat(chat_id=7) 18 | return mock_message 19 | 20 | 21 | @pytest.fixture(scope="session") 22 | def filter_obj(): 23 | return IsMessageOnBehalfOfChat() 24 | 25 | 26 | def test_false_if_no_sender_chat(do_filter, message): 27 | assert do_filter(message) is False 28 | 29 | 30 | def test_false_if_sender_chat_same_as_current(do_filter, message, mock_chat): 31 | message.sender_chat = message.chat 32 | 33 | assert do_filter(message) is False 34 | 35 | 36 | def test_true_if_sender_chat(do_filter, message, mock_chat): 37 | message.sender_chat = mock_chat(chat_id=55) 38 | 39 | assert do_filter(message) is True 40 | -------------------------------------------------------------------------------- /text.py: -------------------------------------------------------------------------------- 1 | 2 | import emoji 3 | 4 | 5 | class Labels: 6 | def __init__(self, text: str | None) -> None: 7 | self.text = text 8 | 9 | def __call__(self) -> list[str]: 10 | return self.get_emoji_label() 11 | 12 | def get_emoji_label(self) -> list[str]: 13 | if self.text is None: 14 | return [] 15 | 16 | emoji_count = len(emoji.emoji_list(self.text)) 17 | 18 | if emoji_count == 0: 19 | return [] 20 | 21 | if emoji_count == 1: 22 | return ['emoji'] 23 | 24 | if emoji_count == 2: 25 | return ['two_emojies'] 26 | 27 | return ['three_or_more_emojies'] 28 | 29 | 30 | if __name__ == '__main__': 31 | assert Labels('текст')() == [] 32 | assert Labels('текст😁')() == ['emoji'] 33 | assert Labels('😁текст😁')() == ['two_emojies'] 34 | assert Labels('😁😁😁текст')() == ['three_or_more_emojies'] 35 | --------------------------------------------------------------------------------