├── test ├── __init__.py └── test_sql.py ├── requirements-dev.txt ├── migrations ├── README ├── script.py.mako ├── versions │ ├── 2023_03_08_2055-463c152f30aa_add_audit_log_types.py │ ├── 2022_09_30_0242-674f360b14e3_deprecate_voice_region.py │ ├── 2022_10_06_2247-ab2ee4dab862_add_thread_crawl_table.py │ ├── 2022_10_02_0116-2b5e4f83be7e_add_thread_id_to_messages_and_typing_.py │ ├── 2022_10_01_1229-8c060bb0e6dc_extend_messagetype_enum.py │ ├── 2022_10_01_1813-74bc9658d7ff_extend_auditlogaction_enum.py │ ├── 2022_10_01_2231-4ad50631992f_add_threads_and_thread_members.py │ └── initial_revision_discord_py_1_5.py └── env.py ├── requirements.txt ├── .travis └── setup.sql ├── .gitignore ├── statbot.service ├── misc ├── statbot.service ├── header.txt ├── config.yaml └── docker-compose.yaml ├── statbot ├── mention.py ├── __init__.py ├── util.py ├── cache.py ├── emoji.py ├── audit_log.py ├── config.py ├── __main__.py ├── schema.py ├── crawler.py ├── client.py └── sql.py ├── .travis.yml ├── deploy.sh ├── LICENSE ├── README.md ├── user_privacy_scrub.py ├── alembic.ini ├── pylintrc └── SCHEMA.md /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | black 2 | pylint>=2.5 3 | -------------------------------------------------------------------------------- /migrations/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | PyYAML>=3.10 2 | SQLAlchemy>=1.4,<2 3 | discord.py>=2 4 | psycopg2-binary>=2.7 5 | alembic>=1.8 6 | -------------------------------------------------------------------------------- /.travis/setup.sql: -------------------------------------------------------------------------------- 1 | CREATE database statbot_test; 2 | CREATE USER statbot_test; 3 | GRANT ALL PRIVILEGES ON DATABASE statbot_test TO statbot_test; 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Configuration 2 | /config*.yaml 3 | /docker-compose.yaml 4 | 5 | # Environment 6 | venv/ 7 | .vscode/ 8 | 9 | # Artifacts 10 | __pycache__ 11 | *.py[cdo] 12 | 13 | # Database 14 | docker/ 15 | postgresql/ 16 | 17 | # Logging 18 | *.log 19 | 20 | # Misc 21 | .*.swp 22 | *.bak 23 | *~ 24 | *# 25 | -------------------------------------------------------------------------------- /statbot.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Statbot - the Discord SQL ingestion bot 3 | After=network.target 4 | 5 | [Service] 6 | Type=simple 7 | User=statbot 8 | ExecStart=/usr/bin/python3 -m statbot config.yaml 9 | WorkingDirectory=/home/statbot/repo 10 | Restart=always 11 | RestartSec=600 12 | 13 | [Install] 14 | WantedBy=multi-user.target 15 | -------------------------------------------------------------------------------- /misc/statbot.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Statbot - the Discord SQL ingestion bot 3 | After=network.target 4 | 5 | [Service] 6 | Type=simple 7 | User=statbot 8 | ExecStart=/usr/bin/python3.8 -m statbot config.yaml 9 | WorkingDirectory=/home/statbot/repo 10 | Restart=always 11 | RestartSec=600 12 | 13 | [Install] 14 | WantedBy=multi-user.target 15 | -------------------------------------------------------------------------------- /misc/header.txt: -------------------------------------------------------------------------------- 1 | # 2 | # (FILENAME) 3 | # 4 | # statbot - Store Discord records for later analysis 5 | # Copyright (c) 2017-2018 Ammon Smith 6 | # 7 | # statbot is available free of charge under the terms of the MIT 8 | # License. You are free to redistribute and/or modify it under those 9 | # terms. It is distributed in the hopes that it will be useful, but 10 | # WITHOUT ANY WARRANTY. See the LICENSE file for more details. 11 | # 12 | 13 | -------------------------------------------------------------------------------- /misc/config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | guild-ids: 4 | - 193593039650100397 5 | 6 | cache: 7 | event-size: 16 8 | lookup-size: 384 9 | 10 | logger: 11 | full-messages: false 12 | ignored-events: false 13 | 14 | crawler: 15 | batch-size: 256 16 | queue-size: 32 17 | delays: 18 | yield: 0.5 19 | empty-source: 3600 20 | 21 | bot: 22 | username: MyGreatBot#1000 23 | token: 'your discord token here' 24 | db-url: 'postgresql://username:password@localhost:5432/statbot' 25 | 26 | -------------------------------------------------------------------------------- /misc/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | PostgreSQL: 2 | restart: always 3 | image: sameersbn/postgresql:9.6-2 4 | ports: 5 | - "5432:5432" 6 | environment: 7 | - DEBUG=false 8 | 9 | - PG_USERNAME=root 10 | - PG_PASSWORD=root 11 | 12 | - DB_USER=username 13 | - DB_PASS=password 14 | - DB_NAME=statbot 15 | - DB_TEMPLATE= 16 | 17 | - DB_EXTENSION= 18 | 19 | - REPLICATION_MODE= 20 | - REPLICATION_USER= 21 | - REPLICATION_PASS= 22 | - REPLICATION_SSLMODE= 23 | volumes: 24 | - ./docker/postgresql:/var/lib/postgresql 25 | -------------------------------------------------------------------------------- /statbot/mention.py: -------------------------------------------------------------------------------- 1 | # 2 | # mention.py 3 | # 4 | # statbot - Store Discord records for later analysis 5 | # Copyright (c) 2017-2018 Ammon Smith 6 | # 7 | # statbot is available free of charge under the terms of the MIT 8 | # License. You are free to redistribute and/or modify it under those 9 | # terms. It is distributed in the hopes that it will be useful, but 10 | # WITHOUT ANY WARRANTY. See the LICENSE file for more details. 11 | # 12 | 13 | from enum import Enum 14 | 15 | __all__ = [ 16 | "MentionType", 17 | ] 18 | 19 | 20 | class MentionType(Enum): 21 | USER = 0 22 | ROLE = 1 23 | CHANNEL = 2 24 | -------------------------------------------------------------------------------- /migrations/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade() -> None: 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade() -> None: 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /statbot/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # __init__.py 3 | # 4 | # statbot - Store Discord records for later analysis 5 | # Copyright (c) 2017-2018 Ammon Smith 6 | # 7 | # statbot is available free of charge under the terms of the MIT 8 | # License. You are free to redistribute and/or modify it under those 9 | # terms. It is distributed in the hopes that it will be useful, but 10 | # WITHOUT ANY WARRANTY. See the LICENSE file for more details. 11 | # 12 | 13 | from . import audit_log, cache, client, config, emoji, mention, sql, util 14 | 15 | __all__ = [ 16 | "__version__", 17 | "audit_log", 18 | "cache", 19 | "client", 20 | "config", 21 | "emoji", 22 | "mention", 23 | "sql", 24 | "util", 25 | ] 26 | 27 | __version__ = "0.7.0" 28 | -------------------------------------------------------------------------------- /migrations/versions/2023_03_08_2055-463c152f30aa_add_audit_log_types.py: -------------------------------------------------------------------------------- 1 | """Add audit log types 2 | 3 | Revision ID: 463c152f30aa 4 | Revises: ab2ee4dab862 5 | Create Date: 2023-03-08 20:55:08.357126 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '463c152f30aa' 14 | down_revision = 'ab2ee4dab862' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | with op.get_context().autocommit_block(): 21 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'automod_flag_message'") 22 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'automod_timeout_member'") 23 | 24 | 25 | def downgrade() -> None: 26 | pass 27 | -------------------------------------------------------------------------------- /migrations/versions/2022_09_30_0242-674f360b14e3_deprecate_voice_region.py: -------------------------------------------------------------------------------- 1 | """Deprecate voice_region 2 | 3 | Revision ID: 674f360b14e3 4 | Revises: initial_revision_discord_py_1_5 5 | Create Date: 2022-09-30 02:42:34.186912 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '674f360b14e3' 14 | down_revision = 'initial_revision_discord_py_1_5' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | # https://alembic.sqlalchemy.org/en/latest/api/runtime.html#alembic.runtime.migration.MigrationContext.autocommit_block 21 | with op.get_context().autocommit_block(): 22 | op.execute("ALTER TYPE voiceregion ADD VALUE IF NOT EXISTS 'deprecated'") 23 | 24 | 25 | def downgrade() -> None: 26 | pass 27 | -------------------------------------------------------------------------------- /migrations/versions/2022_10_06_2247-ab2ee4dab862_add_thread_crawl_table.py: -------------------------------------------------------------------------------- 1 | """Add thread_crawl table 2 | 3 | Revision ID: ab2ee4dab862 4 | Revises: 2b5e4f83be7e 5 | Create Date: 2022-10-06 22:47:43.889520 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'ab2ee4dab862' 14 | down_revision = '2b5e4f83be7e' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | op.create_table('thread_crawl', 21 | sa.Column('thread_id', sa.BigInteger(), nullable=False), 22 | sa.Column('last_message_id', sa.BigInteger(), nullable=True), 23 | sa.ForeignKeyConstraint(['thread_id'], ['threads.thread_id'], ), 24 | sa.PrimaryKeyConstraint('thread_id') 25 | ) 26 | 27 | 28 | def downgrade() -> None: 29 | op.drop_table('thread_crawl') 30 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | dist: xenial 3 | os: linux 4 | 5 | stages: 6 | - black 7 | - test 8 | 9 | python: 10 | - '3.6' 11 | - '3.7' 12 | - '3.8' 13 | - '3.8-dev' 14 | - 'nightly' 15 | 16 | services: 17 | - postgresql 18 | 19 | addons: 20 | postgresql: '9.5' # on_conflict requires 9.5+ 21 | 22 | jobs: 23 | include: 24 | - stage: black 25 | python: '3.8' 26 | script: 27 | - black --check statbot 28 | allow_failures: 29 | - python: 'nightly' 30 | fast_finish: true 31 | 32 | cache: pip 33 | 34 | install: 35 | - pip install -r requirements.txt 36 | - pip install -r requirements-dev.txt 37 | 38 | before_script: 39 | - psql -f .travis/setup.sql -U postgres 40 | 41 | script: 42 | # Run statbot tests 43 | - python3 -m unittest --verbose 44 | # Display all lints and a report 45 | - pylint --reports=yes statbot 46 | 47 | notifications: 48 | email: 49 | on_success: change 50 | on_failure: always 51 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu 3 | 4 | if [[ $# -ne 1 ]]; then 5 | echo >&2 "Usage: $0 statbot-config.yaml" 6 | exit 1 7 | fi 8 | 9 | python_ver=python3 10 | repo_dir="$(dirname "$0")" 11 | dest_dir=~statbot/repo 12 | 13 | if [[ -f "$repo_dir/statbot.service" ]]; then 14 | service="$repo_dir/statbot.service" 15 | else 16 | service="$repo_dir/misc/statbot.service" 17 | fi 18 | 19 | rm -r "$dest_dir" 20 | mkdir -p "$dest_dir" 21 | cp -a "$repo_dir" "$dest_dir" 22 | install -m400 "$1" "$dest_dir/config.yaml" 23 | chown -R statbot:statbot "$dest_dir" 24 | echo "Installed source code to '$dest_dir'" 25 | 26 | "$python_ver" -m pip install -r "$repo_dir/requirements.txt" 27 | echo "Installed Python dependencies" 28 | 29 | install -m644 "$service" /usr/local/lib/systemd/system/statbot.service 30 | chown root:root /usr/local/lib/systemd/system/statbot.service 31 | echo "Installed systemd service" 32 | 33 | systemctl daemon-reload 34 | systemctl restart statbot.service 35 | echo "Started statbot systemd service" 36 | -------------------------------------------------------------------------------- /statbot/util.py: -------------------------------------------------------------------------------- 1 | # 2 | # util.py 3 | # 4 | # statbot - Store Discord records for later analysis 5 | # Copyright (c) 2017-2018 Ammon Smith 6 | # 7 | # statbot is available free of charge under the terms of the MIT 8 | # License. You are free to redistribute and/or modify it under those 9 | # terms. It is distributed in the hopes that it will be useful, but 10 | # WITHOUT ANY WARRANTY. See the LICENSE file for more details. 11 | # 12 | 13 | import hashlib 14 | import struct 15 | 16 | __all__ = [ 17 | "null_logger", 18 | "int_hash", 19 | ] 20 | 21 | 22 | class _NullLogger: 23 | __slots__ = () 24 | 25 | def __init__(self): 26 | pass 27 | 28 | def debug(self, *args, **kwargs): 29 | pass 30 | 31 | def info(self, *args, **kwargs): 32 | pass 33 | 34 | def warning(self, *args, **kwargs): 35 | pass 36 | 37 | def error(self, *args, **kwargs): 38 | pass 39 | 40 | 41 | null_logger = _NullLogger() 42 | 43 | 44 | def int_hash(n): 45 | bytez = struct.pack(">q", n) 46 | hashbytes = hashlib.sha512(bytez).digest() 47 | (result,) = struct.unpack(">q", hashbytes[24:32]) 48 | return result 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017-2018 Ammon Smith 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 | of the Software, and to permit persons to whom the Software is furnished to do 10 | so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /test/test_sql.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import unittest 3 | from unittest.mock import Mock 4 | 5 | import discord 6 | 7 | import statbot.sql 8 | 9 | class TestSql(unittest.TestCase): 10 | @classmethod 11 | def setUpClass(cls): 12 | addr = 'postgresql://statbot_test@/statbot_test' 13 | cache_sizes = defaultdict(int) 14 | cls.sql = statbot.sql.DiscordSqlHandler(addr, cache_sizes) 15 | 16 | def setUp(self): 17 | self.transaction = self.sql.conn.begin() 18 | 19 | def tearDown(self): 20 | self.transaction.rollback() 21 | 22 | @classmethod 23 | def tearDownClass(cls): 24 | cls.sql.conn.close() 25 | 26 | def test_upsert_guild(self): 27 | user = Mock() 28 | guild = Mock() 29 | user.configure_mock(id=0, name='cow', discriminator=1, avatar=None, bot=False) 30 | guild.configure_mock(id=1, owner=user, name='statbot_test', icon='', 31 | region=discord.VoiceRegion.us_south, 32 | afk_channel=None, afk_timeout=2, mfa_level=False, 33 | verification_level=discord.VerificationLevel.none, 34 | explicit_content_filter=discord.ContentFilter.disabled, features=[], splash=None) 35 | with self.sql.transaction() as trans: 36 | self.sql.add_user(trans, user) 37 | self.sql.upsert_guild(trans, guild) 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## statbot 2 | A [Discord](https://discordapp.com) bot that reads in posts from a given set 3 | of servers and stores it in a SQL database. This application has two parts: the 4 | listener, which ingests raw data as it arrives, and the crawler, which walks 5 | through past history of Discord messages and adds them. 6 | 7 | This bot is designed for use with Postgres, but in principle could be used 8 | with any database that SQLAlchemy supports. 9 | 10 | Available under the terms of the MIT License. 11 | 12 | ### Requirements 13 | * Python 3.8 or later 14 | * [discord.py](https://github.com/Rapptz/discord.py) 15 | * [SQLAlchemy](http://www.sqlalchemy.org/) 16 | * [psycopg2](https://pypi.python.org/pypi/psycopg2) 17 | 18 | You can install them all using pip by running: 19 | ```sh 20 | pip3 install -r requirements.txt 21 | ``` 22 | 23 | ### Execution 24 | After preparing a configuration file, (see `misc/config.yaml`) 25 | you can call the program as follows: 26 | ```sh 27 | python3 -m statbot [-q] [-d] your_config_file.yaml 28 | ``` 29 | 30 | A sample `docker-compose.yaml` configuration is also provided in `misc/` in case you would 31 | like to host your PostgreSQL database via Docker. 32 | 33 | ### Questions 34 | **How do I use statbot as a selfbot?** 35 | 36 | You shouldn't. Each person who you collect data from must explicitly agree to it. If you are 37 | running a server you have the ability to enforce this, but that also means you may as well 38 | just use an actual bot account. We will not support forks that add selfbot support to statbot, 39 | and we will not accept patches that do so either. 40 | -------------------------------------------------------------------------------- /user_privacy_scrub.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3.6 2 | 3 | # 4 | # __main__.py 5 | # 6 | # statbot - Store Discord records for later analysis 7 | # Copyright (c) 2017-2018 Ammon Smith 8 | # 9 | # statbot is available free of charge under the terms of the MIT 10 | # License. You are free to redistribute and/or modify it under those 11 | # terms. It is distributed in the hopes that it will be useful, but 12 | # WITHOUT ANY WARRANTY. See the LICENSE file for more details. 13 | # 14 | 15 | from collections import namedtuple 16 | import logging 17 | import sys 18 | 19 | import yaml 20 | 21 | import statbot 22 | 23 | FakeUser = namedtuple('FakeUser', ('id', 'name')) 24 | 25 | if __name__ == '__main__': 26 | if len(sys.argv) != 3: 27 | print(f"Usage: {sys.argv[0]} config-file user-id") 28 | exit(1) 29 | 30 | with open(sys.argv[1], 'r') as fh: 31 | config = yaml.safe_load(fh) 32 | 33 | # Get arguments 34 | db_url = config['bot']['db-url'] 35 | user_id = int(sys.argv[2]) 36 | 37 | # Set up logging 38 | logger = logging.getLogger('statbot.script.user_privacy_scrub') 39 | logger.setLevel(logging.INFO) 40 | log_hndl = logging.StreamHandler(sys.stdout) 41 | log_hndl.setFormatter(logging.Formatter('[%(levelname)s] %(message)s')) 42 | logger.addHandler(log_hndl) 43 | 44 | # Open database connection 45 | logger.info("Preparation done, starting user privacy scrub procedure...") 46 | sql = statbot.sql.DiscordSqlHandler(db_url, None, logger) 47 | sql.privacy_scrub(FakeUser(id=user_id, name=str(user_id))) 48 | logger.info("Done! Exiting...") 49 | -------------------------------------------------------------------------------- /statbot/cache.py: -------------------------------------------------------------------------------- 1 | # 2 | # cache.py 3 | # 4 | # statbot - Store Discord records for later analysis 5 | # Copyright (c) 2017-2018 Ammon Smith 6 | # 7 | # statbot is available free of charge under the terms of the MIT 8 | # License. You are free to redistribute and/or modify it under those 9 | # terms. It is distributed in the hopes that it will be useful, but 10 | # WITHOUT ANY WARRANTY. See the LICENSE file for more details. 11 | # 12 | 13 | from collections import OrderedDict 14 | from collections.abc import MutableMapping 15 | 16 | __all__ = [ 17 | "LruCache", 18 | ] 19 | 20 | 21 | class LruCache(MutableMapping): 22 | __slots__ = ( 23 | "store", 24 | "max_size", 25 | ) 26 | 27 | def __init__(self, max_size=None): 28 | self.store = OrderedDict() 29 | self.max_size = max_size 30 | 31 | def __getitem__(self, key): 32 | obj = self.store.pop(key) 33 | self.store[key] = obj 34 | return obj 35 | 36 | def get(self, key, default=None): 37 | try: 38 | return self[key] 39 | except KeyError: 40 | return default 41 | 42 | def __setitem__(self, key, value): 43 | self.store.pop(key, None) 44 | self.store[key] = value 45 | 46 | while len(self) > self.max_size: 47 | self.store.popitem(last=False) 48 | 49 | def __delitem__(self, key): 50 | del self.store[key] 51 | 52 | def __contains__(self, key): 53 | return key in self.store 54 | 55 | def __iter__(self): 56 | return iter(self.store) 57 | 58 | def __len__(self): 59 | return len(self.store) 60 | -------------------------------------------------------------------------------- /migrations/versions/2022_10_02_0116-2b5e4f83be7e_add_thread_id_to_messages_and_typing_.py: -------------------------------------------------------------------------------- 1 | """Add thread_id to messages and typing tables 2 | 3 | Revision ID: 2b5e4f83be7e 4 | Revises: 4ad50631992f 5 | Create Date: 2022-10-02 01:16:37.285311 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '2b5e4f83be7e' 14 | down_revision = '4ad50631992f' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | op.alter_column('messages', 'channel_id', nullable=True) 21 | op.add_column('messages', sa.Column('thread_id', sa.BigInteger(), nullable=True)) 22 | op.create_foreign_key('messages_thread_id_fkey', 'messages', 'threads', ['thread_id'], ['thread_id']) 23 | 24 | op.alter_column('typing', 'channel_id', nullable=True) 25 | op.add_column('typing', sa.Column('thread_id', sa.BigInteger(), nullable=True)) 26 | op.drop_constraint('uq_typing', 'typing', type_='unique') 27 | op.create_unique_constraint('uq_typing', 'typing', ['timestamp', 'int_user_id', 'channel_id', 'thread_id', 'guild_id']) 28 | op.create_foreign_key('typing_thread_id_fkey', 'typing', 'threads', ['thread_id'], ['thread_id']) 29 | 30 | 31 | def downgrade() -> None: 32 | op.drop_constraint('typing_thread_id_fkey', 'typing', type_='foreignkey') 33 | op.drop_constraint('uq_typing', 'typing', type_='unique') 34 | op.create_unique_constraint('uq_typing', 'typing', ['timestamp', 'int_user_id', 'channel_id', 'guild_id']) 35 | op.drop_column('typing', 'thread_id') 36 | op.drop_constraint('messages_thread_id_fkey', 'messages', type_='foreignkey') 37 | op.drop_column('messages', 'thread_id') 38 | -------------------------------------------------------------------------------- /migrations/versions/2022_10_01_1229-8c060bb0e6dc_extend_messagetype_enum.py: -------------------------------------------------------------------------------- 1 | """Extend messagetype enum 2 | 3 | Revision ID: 8c060bb0e6dc 4 | Revises: 674f360b14e3 5 | Create Date: 2022-10-01 12:29:46.681282 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '8c060bb0e6dc' 14 | down_revision = '674f360b14e3' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | # https://alembic.sqlalchemy.org/en/latest/api/runtime.html#alembic.runtime.migration.MigrationContext.autocommit_block 21 | with op.get_context().autocommit_block(): 22 | # added in discord.py v1.7 23 | op.execute("ALTER TYPE messagetype ADD VALUE IF NOT EXISTS 'guild_stream'") 24 | op.execute("ALTER TYPE messagetype ADD VALUE IF NOT EXISTS 'guild_discovery_disqualified'") 25 | op.execute("ALTER TYPE messagetype ADD VALUE IF NOT EXISTS 'guild_discovery_requalified'") 26 | op.execute("ALTER TYPE messagetype ADD VALUE IF NOT EXISTS 'guild_discovery_grace_period_initial_warning'") 27 | op.execute("ALTER TYPE messagetype ADD VALUE IF NOT EXISTS 'guild_discovery_grace_period_final_warning'") 28 | 29 | # added in discord.py v2.0 30 | op.execute("ALTER TYPE messagetype ADD VALUE IF NOT EXISTS 'thread_created'") 31 | op.execute("ALTER TYPE messagetype ADD VALUE IF NOT EXISTS 'reply'") 32 | op.execute("ALTER TYPE messagetype ADD VALUE IF NOT EXISTS 'chat_input_command'") 33 | op.execute("ALTER TYPE messagetype ADD VALUE IF NOT EXISTS 'thread_starter_message'") 34 | op.execute("ALTER TYPE messagetype ADD VALUE IF NOT EXISTS 'guild_invite_reminder'") 35 | op.execute("ALTER TYPE messagetype ADD VALUE IF NOT EXISTS 'context_menu_command'") 36 | op.execute("ALTER TYPE messagetype ADD VALUE IF NOT EXISTS 'auto_moderation_action'") 37 | 38 | 39 | def downgrade() -> None: 40 | pass 41 | -------------------------------------------------------------------------------- /migrations/versions/2022_10_01_1813-74bc9658d7ff_extend_auditlogaction_enum.py: -------------------------------------------------------------------------------- 1 | """Extend auditlogaction enum 2 | 3 | Revision ID: 74bc9658d7ff 4 | Revises: 8c060bb0e6dc 5 | Create Date: 2022-10-01 18:13:54.827286 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '74bc9658d7ff' 14 | down_revision = '8c060bb0e6dc' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | with op.get_context().autocommit_block(): 21 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'stage_instance_create'") 22 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'stage_instance_update'") 23 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'stage_instance_delete'") 24 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'sticker_create'") 25 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'sticker_update'") 26 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'sticker_delete'") 27 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'scheduled_event_create'") 28 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'scheduled_event_update'") 29 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'scheduled_event_delete'") 30 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'thread_create'") 31 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'thread_update'") 32 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'thread_delete'") 33 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'app_command_permission_update'") 34 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'automod_rule_create'") 35 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'automod_rule_update'") 36 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'automod_rule_delete'") 37 | op.execute("ALTER TYPE auditlogaction ADD VALUE IF NOT EXISTS 'automod_block_message'") 38 | 39 | 40 | def downgrade() -> None: 41 | pass 42 | -------------------------------------------------------------------------------- /migrations/versions/2022_10_01_2231-4ad50631992f_add_threads_and_thread_members.py: -------------------------------------------------------------------------------- 1 | """Add threads and thread_members 2 | 3 | Revision ID: 4ad50631992f 4 | Revises: 74bc9658d7ff 5 | Create Date: 2022-10-01 22:31:49.574159 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '4ad50631992f' 14 | down_revision = '74bc9658d7ff' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | op.create_table('threads', 21 | sa.Column('thread_id', sa.BigInteger(), nullable=False), 22 | sa.Column('name', sa.String(), nullable=True), 23 | sa.Column('invitable', sa.Boolean(), nullable=True), 24 | sa.Column('locked', sa.Boolean(), nullable=True), 25 | sa.Column('archived', sa.Boolean(), nullable=True), 26 | sa.Column('auto_archive_duration', sa.Integer(), nullable=True), 27 | sa.Column('archive_timestamp', sa.DateTime(), nullable=True), 28 | sa.Column('created_at', sa.DateTime(), nullable=True), 29 | sa.Column('edited_at', sa.DateTime(), nullable=True), 30 | sa.Column('deleted_at', sa.DateTime(), nullable=True), 31 | sa.Column('is_deleted', sa.Boolean(), nullable=True), 32 | sa.Column('int_owner_id', sa.BigInteger(), nullable=True), 33 | sa.Column('parent_id', sa.BigInteger(), nullable=True), 34 | sa.Column('guild_id', sa.BigInteger(), nullable=True), 35 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 36 | sa.ForeignKeyConstraint(['int_owner_id'], ['users.int_user_id'], ), 37 | sa.ForeignKeyConstraint(['parent_id'], ['channels.channel_id'], ), 38 | sa.PrimaryKeyConstraint('thread_id') 39 | ) 40 | op.create_table('thread_members', 41 | sa.Column('int_member_id', sa.BigInteger(), nullable=True), 42 | sa.Column('thread_id', sa.BigInteger(), nullable=True), 43 | sa.Column('joined_at', sa.DateTime(), nullable=True), 44 | sa.Column('left_at', sa.DateTime(), nullable=True), 45 | sa.ForeignKeyConstraint(['int_member_id'], ['users.int_user_id'], ), 46 | sa.ForeignKeyConstraint(['thread_id'], ['threads.thread_id'], ), 47 | sa.UniqueConstraint('int_member_id', 'thread_id', 'joined_at', name='uq_thread_members') 48 | ) 49 | 50 | 51 | def downgrade() -> None: 52 | op.drop_table('thread_members') 53 | op.drop_table('threads') 54 | -------------------------------------------------------------------------------- /migrations/env.py: -------------------------------------------------------------------------------- 1 | from logging.config import fileConfig 2 | 3 | from alembic import context 4 | from sqlalchemy import engine_from_config 5 | from sqlalchemy import pool 6 | 7 | from statbot.schema import DiscordMetadata 8 | 9 | # this is the Alembic Config object, which provides 10 | # access to the values within the .ini file in use. 11 | config = context.config 12 | 13 | # Interpret the config file for Python logging. 14 | if config.config_file_name is not None: 15 | # `fileConfig` will replace existing logging handlers. 16 | # using a "configure_logger" attribute allows this to be disabled at runtime 17 | if config.attributes.get("configure_logger", True): 18 | fileConfig(config.config_file_name) 19 | 20 | # add your model's MetaData object here 21 | # for 'autogenerate' support 22 | # from myapp import mymodel 23 | # target_metadata = mymodel.Base.metadata 24 | target_metadata = DiscordMetadata(None).metadata_obj 25 | 26 | # other values from the config, defined by the needs of env.py, 27 | # can be acquired: 28 | # my_important_option = config.get_main_option("my_important_option") 29 | # ... etc. 30 | 31 | 32 | def run_migrations_offline() -> None: 33 | """Run migrations in 'offline' mode. 34 | 35 | This configures the context with just a URL 36 | and not an Engine, though an Engine is acceptable 37 | here as well. By skipping the Engine creation 38 | we don't even need a DBAPI to be available. 39 | 40 | Calls to context.execute() here emit the given string to the 41 | script output. 42 | 43 | """ 44 | url = config.get_main_option("sqlalchemy.url") 45 | context.configure( 46 | url=url, 47 | target_metadata=target_metadata, 48 | literal_binds=True, 49 | dialect_opts={"paramstyle": "named"}, 50 | ) 51 | 52 | with context.begin_transaction(): 53 | context.run_migrations() 54 | 55 | 56 | def run_migrations_online() -> None: 57 | """Run migrations in 'online' mode. 58 | 59 | In this scenario we need to create an Engine 60 | and associate a connection with the context. 61 | 62 | """ 63 | connectable = engine_from_config( 64 | config.get_section(config.config_ini_section), 65 | prefix="sqlalchemy.", 66 | poolclass=pool.NullPool, 67 | ) 68 | 69 | with connectable.connect() as connection: 70 | context.configure( 71 | connection=connection, target_metadata=target_metadata 72 | ) 73 | 74 | with context.begin_transaction(): 75 | context.run_migrations() 76 | 77 | 78 | if context.is_offline_mode(): 79 | run_migrations_offline() 80 | else: 81 | run_migrations_online() 82 | -------------------------------------------------------------------------------- /statbot/emoji.py: -------------------------------------------------------------------------------- 1 | # 2 | # emoji.py 3 | # 4 | # statbot - Store Discord records for later analysis 5 | # Copyright (c) 2017-2018 Ammon Smith 6 | # 7 | # statbot is available free of charge under the terms of the MIT 8 | # License. You are free to redistribute and/or modify it under those 9 | # terms. It is distributed in the hopes that it will be useful, but 10 | # WITHOUT ANY WARRANTY. See the LICENSE file for more details. 11 | # 12 | 13 | import unicodedata 14 | 15 | __all__ = [ 16 | "EmojiData", 17 | ] 18 | 19 | 20 | def get_unicode_data(emoji): 21 | try: 22 | name = [unicodedata.name(ch) for ch in emoji] 23 | category = [unicodedata.category(ch) for ch in emoji] 24 | except ValueError: 25 | # Couldn't find for codepoint 26 | name = [emoji] 27 | category = ["unicode_other"] 28 | 29 | return name, category 30 | 31 | 32 | class EmojiData: 33 | __slots__ = ( 34 | "raw", 35 | "id", 36 | "unicode", 37 | "custom", 38 | "managed", 39 | "name", 40 | "category", 41 | "roles", 42 | "guild", 43 | ) 44 | 45 | def __init__(self, emoji): 46 | self.raw = emoji 47 | 48 | if isinstance(emoji, str): 49 | name, category = get_unicode_data(emoji) 50 | 51 | self.id = 0 52 | self.unicode = emoji 53 | self.custom = False 54 | self.managed = False 55 | self.name = name 56 | self.category = category 57 | self.roles = [] 58 | self.guild = None 59 | else: 60 | self.id = emoji.id 61 | self.unicode = "" 62 | self.custom = True 63 | self.managed = getattr(emoji, "managed", None) 64 | self.name = [emoji.name] 65 | self.category = ["custom"] 66 | self.roles = getattr(emoji, "roles", None) 67 | self.guild = getattr(emoji, "guild", None) 68 | 69 | @property 70 | def mention(self): 71 | if self.id: 72 | return f"<:{self.name[0]}:{self.id}>" 73 | else: 74 | return self.unicode 75 | 76 | @property 77 | def cache_id(self): 78 | return (self.id, self.unicode) 79 | 80 | def values(self): 81 | return { 82 | "emoji_id": self.id, 83 | "emoji_unicode": self.unicode, 84 | "is_custom": self.custom, 85 | "is_managed": self.managed, 86 | "is_deleted": False, 87 | "name": self.name, 88 | "category": self.category, 89 | "roles": list(map(lambda r: r.id, self.roles or [])), 90 | "guild_id": getattr(self.guild, "id", None), 91 | } 92 | 93 | def __str__(self): 94 | return str(self.id or self.unicode) 95 | 96 | def __repr__(self): 97 | return f"" 98 | -------------------------------------------------------------------------------- /alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = migrations 6 | 7 | # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s 8 | # Uncomment the line below if you want the files to be prepended with date and time 9 | # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file 10 | # for all available tokens 11 | file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s 12 | 13 | # sys.path path, will be prepended to sys.path if present. 14 | # defaults to the current working directory. 15 | prepend_sys_path = . 16 | 17 | # timezone to use when rendering the date within the migration file 18 | # as well as the filename. 19 | # If specified, requires the python-dateutil library that can be 20 | # installed by adding `alembic[tz]` to the pip requirements 21 | # string value is passed to dateutil.tz.gettz() 22 | # leave blank for localtime 23 | # timezone = 24 | 25 | # max length of characters to apply to the 26 | # "slug" field 27 | # truncate_slug_length = 40 28 | 29 | # set to 'true' to run the environment during 30 | # the 'revision' command, regardless of autogenerate 31 | # revision_environment = false 32 | 33 | # set to 'true' to allow .pyc and .pyo files without 34 | # a source .py file to be detected as revisions in the 35 | # versions/ directory 36 | # sourceless = false 37 | 38 | # version location specification; This defaults 39 | # to migrations/versions. When using multiple version 40 | # directories, initial revisions must be specified with --version-path. 41 | # The path separator used here should be the separator specified by "version_path_separator" below. 42 | # version_locations = %(here)s/bar:%(here)s/bat:migrations/versions 43 | 44 | # version path separator; As mentioned above, this is the character used to split 45 | # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. 46 | # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. 47 | # Valid values for version_path_separator are: 48 | # 49 | # version_path_separator = : 50 | # version_path_separator = ; 51 | # version_path_separator = space 52 | version_path_separator = os # Use os.pathsep. Default configuration used for new projects. 53 | 54 | # the output encoding used when revision files 55 | # are written from script.py.mako 56 | # output_encoding = utf-8 57 | 58 | # Set this programmatically in statbot.sql 59 | # sqlalchemy.url = driver://user:pass@localhost/dbname 60 | 61 | 62 | [post_write_hooks] 63 | # post_write_hooks defines scripts or Python functions that are run 64 | # on newly generated revision scripts. See the documentation for further 65 | # detail and examples 66 | 67 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 68 | # hooks = black 69 | # black.type = console_scripts 70 | # black.entrypoint = black 71 | # black.options = -l 79 REVISION_SCRIPT_FILENAME 72 | 73 | # Logging configuration 74 | [loggers] 75 | keys = root,sqlalchemy,alembic 76 | 77 | [handlers] 78 | keys = console 79 | 80 | [formatters] 81 | keys = generic 82 | 83 | [logger_root] 84 | level = WARN 85 | handlers = console 86 | qualname = 87 | 88 | [logger_sqlalchemy] 89 | level = WARN 90 | handlers = 91 | qualname = sqlalchemy.engine 92 | 93 | [logger_alembic] 94 | level = INFO 95 | handlers = 96 | qualname = alembic 97 | 98 | [handler_console] 99 | class = StreamHandler 100 | args = (sys.stderr,) 101 | level = NOTSET 102 | formatter = generic 103 | 104 | [formatter_generic] 105 | format = %(levelname)-5.5s [%(name)s] %(message)s 106 | datefmt = %H:%M:%S 107 | -------------------------------------------------------------------------------- /statbot/audit_log.py: -------------------------------------------------------------------------------- 1 | # 2 | # audit_log.py 3 | # 4 | # statbot - Store Discord records for later analysis 5 | # Copyright (c) 2017-2018 Ammon Smith 6 | # 7 | # statbot is available free of charge under the terms of the MIT 8 | # License. You are free to redistribute and/or modify it under those 9 | # terms. It is distributed in the hopes that it will be useful, but 10 | # WITHOUT ANY WARRANTY. See the LICENSE file for more details. 11 | # 12 | 13 | import discord 14 | 15 | from .util import int_hash 16 | 17 | __all__ = [ 18 | "AuditLogData", 19 | ] 20 | 21 | NAME_ATTRS = ( 22 | "name", 23 | "icon", 24 | "region", 25 | "afk_timeout", 26 | "widget_enabled", 27 | "verification_level", 28 | "explicit_content_filter", 29 | "default_message_notifications", 30 | "vanity_url_code", 31 | "position", 32 | "type", 33 | "topic", 34 | "bitrate", 35 | "nick", 36 | "deaf", 37 | "mute", 38 | "hoist", 39 | "mentionable", 40 | "code", 41 | "max_uses", 42 | "uses", 43 | "max_age", 44 | "temporary", 45 | "changed_id", 46 | "avatar", 47 | ) 48 | 49 | ID_ATTRS = ( 50 | "owner", 51 | "afk_channel", 52 | "system_channel", 53 | "widget_channel", 54 | "channel", 55 | "inviter", 56 | ) 57 | 58 | VALUE_ATTRS = ( 59 | "raw_role_permissions", 60 | "color", 61 | "raw_allow_permissions", 62 | "raw_deny_permissions", 63 | ) 64 | 65 | 66 | class AuditLogData: 67 | __slots__ = ( 68 | "entry", 69 | "guild", 70 | ) 71 | 72 | def __init__(self, entry: discord.AuditLogEntry, guild: discord.Guild): 73 | self.entry = entry 74 | self.guild = guild 75 | 76 | def values(self): 77 | return { 78 | "audit_entry_id": self.entry.id, 79 | "guild_id": self.guild.id, 80 | "action": self.entry.action, 81 | "int_user_id": int_hash(self.entry.user.id), 82 | "reason": self.entry.reason, 83 | "category": self.entry.category, 84 | "before": self.diff_values(self.entry.before), 85 | "after": self.diff_values(self.entry.after), 86 | } 87 | 88 | @staticmethod 89 | def _get_overwrites(overwrites): 90 | if overwrites is None: 91 | return None 92 | 93 | targets = [] 94 | allow_perms = [] 95 | deny_perms = [] 96 | 97 | for target, overwrite in overwrites: 98 | targets.append(target.id) 99 | allow, deny = overwrite.pair() 100 | allow_perms.append(allow.value) 101 | deny_perms.append(deny.value) 102 | 103 | return { 104 | "targets": targets, 105 | "allow": allow_perms, 106 | "deny": deny_perms, 107 | } 108 | 109 | def diff_values(self, diff): 110 | if self.entry.category is None: 111 | return None 112 | 113 | attributes = {} 114 | 115 | for attr in NAME_ATTRS: 116 | try: 117 | obj = getattr(diff, attr) 118 | attributes[attr] = obj 119 | except AttributeError: 120 | pass 121 | 122 | for attr in ID_ATTRS: 123 | try: 124 | obj = getattr(diff, attr) 125 | attributes[attr] = obj.id 126 | except AttributeError: 127 | pass 128 | 129 | for attr in VALUE_ATTRS: 130 | try: 131 | obj = getattr(diff, attr) 132 | attributes[attr] = obj.value 133 | except AttributeError: 134 | pass 135 | 136 | try: 137 | obj = getattr(diff, "mfa_level") 138 | attributes["mfa"] = bool(obj) 139 | except AttributeError: 140 | pass 141 | 142 | try: 143 | obj = getattr(diff, "roles") 144 | attributes["roles"] = list(map(lambda x: x.id, obj)) 145 | except AttributeError: 146 | pass 147 | 148 | try: 149 | obj = self._get_overwrites(getattr(diff, "overwrites")) 150 | attributes["overwrites"] = obj 151 | except AttributeError: 152 | pass 153 | 154 | return attributes 155 | -------------------------------------------------------------------------------- /statbot/config.py: -------------------------------------------------------------------------------- 1 | # 2 | # config.py 3 | # 4 | # statbot - Store Discord records for later analysis 5 | # Copyright (c) 2017-2018 Ammon Smith 6 | # 7 | # statbot is available free of charge under the terms of the MIT 8 | # License. You are free to redistribute and/or modify it under those 9 | # terms. It is distributed in the hopes that it will be useful, but 10 | # WITHOUT ANY WARRANTY. See the LICENSE file for more details. 11 | # 12 | 13 | from numbers import Number 14 | import yaml 15 | 16 | from .util import null_logger 17 | 18 | __all__ = [ 19 | "check", 20 | "load_config", 21 | ] 22 | 23 | 24 | def is_string_or_null(obj): 25 | """ 26 | Determines if the given object 27 | is of type str or is None. 28 | """ 29 | 30 | return isinstance(obj, str) or obj is None 31 | 32 | 33 | def is_int_list(obj): 34 | if not isinstance(obj, list): 35 | return False 36 | 37 | for item in obj: 38 | if not isinstance(item, int): 39 | return False 40 | return True 41 | 42 | 43 | def is_string_list(obj): 44 | if not isinstance(obj, list): 45 | return False 46 | 47 | for item in obj: 48 | if not isinstance(item, str): 49 | return False 50 | return True 51 | 52 | 53 | def check(cfg, logger=null_logger): 54 | """ 55 | Determines if the given dictionary has 56 | the correct fields and types. 57 | """ 58 | 59 | # pylint: disable=too-many-return-statements 60 | try: 61 | if not is_int_list(cfg["guild-ids"]): 62 | logger.error("Configuration field 'guilds' is not an int list") 63 | return False 64 | if not isinstance(cfg["cache"]["event-size"], int): 65 | logger.error("Configuration field 'cache.event-size' is not an int") 66 | return False 67 | if cfg["cache"]["event-size"] <= 0: 68 | logger.error("Configuration field 'cache.event-size' is zero or negative") 69 | return False 70 | if not isinstance(cfg["cache"]["lookup-size"], int): 71 | logger.error("Configuration field 'cache.lookup-size' is not an int") 72 | return False 73 | if cfg["cache"]["lookup-size"] <= 0: 74 | logger.error("Configuration field 'cache.lookup-size' is zero or negative") 75 | return False 76 | if not isinstance(cfg["logger"]["full-messages"], bool): 77 | logger.error("Configuration field 'logger.full-messages' is not a bool") 78 | return False 79 | if not isinstance(cfg["logger"]["ignored-events"], bool): 80 | logger.error("Configuration field 'logger.ignored-events' is not a bool") 81 | return False 82 | if not isinstance(cfg["crawler"]["batch-size"], Number): 83 | logger.error("Configuration field 'crawler.batch-size' is not a number") 84 | return False 85 | if cfg["crawler"]["batch-size"] <= 0: 86 | logger.error("Configuration field 'crawler.batch-size' is zero or negative") 87 | return False 88 | if not isinstance(cfg["crawler"]["delays"]["yield"], Number): 89 | logger.error("Configuration field 'crawler.yield.delay' is not a number") 90 | return False 91 | if cfg["crawler"]["delays"]["yield"] <= 0: 92 | logger.error( 93 | "Configuration field 'crawler.yield.delay' is zero or negative" 94 | ) 95 | return False 96 | if not isinstance(cfg["crawler"]["delays"]["empty-source"], Number): 97 | logger.error( 98 | "Configuration field 'crawler.delay.empty-source' is not a number" 99 | ) 100 | return False 101 | if cfg["crawler"]["delays"]["empty-source"] <= 0: 102 | logger.error( 103 | "Configuration field 'crawler.delay.empty-source' is zero or negative" 104 | ) 105 | return False 106 | if not isinstance(cfg["bot"]["token"], str): 107 | logger.error("Configuration field 'bot.token' is not a string") 108 | return False 109 | if not isinstance(cfg["bot"]["db-url"], str): 110 | logger.error("Configuration field 'bot.db-url' is not a string") 111 | return False 112 | 113 | except KeyError as err: 114 | logger.error(f"Configuration missing field: {err}") 115 | return False 116 | else: 117 | return True 118 | 119 | 120 | def load_config(fn, logger=null_logger): 121 | """ 122 | Loads a YAML config from the given file. 123 | This returns a tuple of the object and whether 124 | it is valid or not. 125 | """ 126 | 127 | with open(fn, "r") as fh: 128 | obj = yaml.safe_load(fh) 129 | return obj, check(obj, logger) 130 | -------------------------------------------------------------------------------- /statbot/__main__.py: -------------------------------------------------------------------------------- 1 | # 2 | # __main__.py 3 | # 4 | # statbot - Store Discord records for later analysis 5 | # Copyright (c) 2017-2018 Ammon Smith 6 | # 7 | # statbot is available free of charge under the terms of the MIT 8 | # License. You are free to redistribute and/or modify it under those 9 | # terms. It is distributed in the hopes that it will be useful, but 10 | # WITHOUT ANY WARRANTY. See the LICENSE file for more details. 11 | # 12 | 13 | import argparse 14 | import logging 15 | import sys 16 | 17 | from .client import EventIngestionClient 18 | from .config import load_config 19 | from .crawler import AuditLogCrawler, HistoryCrawler, ThreadCrawler 20 | from .sql import DiscordSqlHandler 21 | 22 | __all__ = [ 23 | "LOG_FILE", 24 | "LOG_FILE_MODE", 25 | ] 26 | 27 | LOG_FILE = "bot.log" 28 | LOG_FILE_MODE = "w" 29 | LOG_FORMAT = "[%(levelname)s] %(name)s: %(message)s" 30 | LOG_DATE_FORMAT = "[%d/%m/%Y %H:%M:%S]" 31 | 32 | 33 | class StderrTee: 34 | __slots__ = ( 35 | "fh", 36 | "stderr", 37 | ) 38 | 39 | def __init__(self, filename, mode): 40 | self.fh = open(filename, mode) 41 | self.stderr = sys.stderr 42 | 43 | def __del__(self): 44 | sys.stderr = self.stderr 45 | self.fh.close() 46 | 47 | def write(self, data): 48 | self.fh.write(data) 49 | self.stderr.write(data) 50 | 51 | 52 | ERR_FILE = "errors.log" 53 | ERR_FILE_MODE = "w" 54 | 55 | sys.stderr = StderrTee(ERR_FILE, ERR_FILE_MODE) 56 | 57 | if __name__ == "__main__": 58 | # Parse arguments 59 | argparser = argparse.ArgumentParser(description="Bot to track posting data") 60 | argparser.add_argument( 61 | "-q", 62 | "--quiet", 63 | "--no-stdout", 64 | dest="stdout", 65 | action="store_false", 66 | help="Don't output to standard out.", 67 | ) 68 | argparser.add_argument( 69 | "-v", 70 | "--verbose", 71 | dest="verbose", 72 | action="count", 73 | help="Increase the logger's verbosity.", 74 | ) 75 | argparser.add_argument( 76 | "-d", 77 | "--debug", 78 | dest="debug", 79 | action="store_true", 80 | help="Set logging level to debug.", 81 | ) 82 | argparser.add_argument( 83 | "-g", 84 | "--guild-id", 85 | dest="guild_ids", 86 | action="append", 87 | type=int, 88 | help="Override the list of guild IDs to look at.", 89 | ) 90 | argparser.add_argument( 91 | "-B", 92 | "--batch-size", 93 | dest="batch_size", 94 | type=int, 95 | help="Override the batch size used during crawling.", 96 | ) 97 | argparser.add_argument( 98 | "-Q", 99 | "--queue-size", 100 | dest="queue_size", 101 | type=int, 102 | help="Override the queue size used during crawling.", 103 | ) 104 | argparser.add_argument( 105 | "-Y", 106 | "--yield-delay", 107 | dest="yield_delay", 108 | type=float, 109 | help="Override the yield delay during crawling.", 110 | ) 111 | argparser.add_argument( 112 | "-E", 113 | "--empty-source-delay", 114 | dest="empty_source_delay", 115 | type=float, 116 | help="Override the empty source delay during crawling.", 117 | ) 118 | argparser.add_argument( 119 | "-T", "--token", dest="token", help="Override the bot token used to log in." 120 | ) 121 | argparser.add_argument( 122 | "-U", "--db-url", dest="db_url", help="Override the database URL to connect to." 123 | ) 124 | argparser.add_argument( 125 | "config_file", help="Specify a configuration file to use. Keep it secret!" 126 | ) 127 | args = argparser.parse_args() 128 | 129 | # Set up logging 130 | log_fmtr = logging.Formatter(LOG_FORMAT, datefmt=LOG_DATE_FORMAT) 131 | log_hndl = logging.FileHandler(filename=LOG_FILE, mode=LOG_FILE_MODE) 132 | log_hndl.setFormatter(log_fmtr) 133 | log_level = logging.DEBUG if args.debug else logging.INFO 134 | 135 | # Create instances 136 | def get_logger(name, level=log_level): 137 | logger = logging.getLogger(name) 138 | logger.setLevel(level=level) 139 | return logger 140 | 141 | discord_logger = get_logger("discord", logging.INFO) 142 | main_logger = get_logger("statbot") 143 | event_logger = get_logger("statbot.event") 144 | crawler_logger = get_logger("statbot.crawler") 145 | sql_logger = get_logger("statbot.sql") 146 | del get_logger 147 | 148 | # Map logging to outputs 149 | main_logger.addHandler(log_hndl) 150 | if args.debug: 151 | discord_logger.addHandler(log_hndl) 152 | 153 | if args.stdout: 154 | log_out_hndl = logging.StreamHandler(sys.stdout) 155 | log_out_hndl.setFormatter(log_fmtr) 156 | main_logger.addHandler(log_out_hndl) 157 | if args.debug: 158 | discord_logger.addHandler(log_out_hndl) 159 | 160 | # Get and verify configuration 161 | config, valid = load_config(args.config_file, main_logger) 162 | if not valid: 163 | main_logger.error("Configuration file was invalid.") 164 | sys.exit(1) 165 | 166 | # Override configuration settings 167 | verbosity = getattr(args, "verbosity", 0) 168 | if verbosity >= 1: 169 | config["logger"]["full-messages"] = True 170 | if verbosity >= 2: 171 | config["logger"]["ignored-events"] = True 172 | if verbosity >= 3: 173 | discord_logger.addHandler(log_hndl) 174 | 175 | if args.guild_ids is not None: 176 | config["guild-ids"] = args.guild_ids 177 | 178 | if args.batch_size is not None: 179 | config["crawler"]["batch-size"] = args.batch_size 180 | 181 | if args.queue_size is not None: 182 | config["crawler"]["queue-size"] = args.queue_size 183 | 184 | if args.yield_delay is not None: 185 | config["crawler"]["delays"]["yield"] = args.yield_delay 186 | 187 | if args.empty_source_delay is not None: 188 | config["crawler"]["delays"]["empty-source"] = args.empty_source_delay 189 | 190 | if args.token is not None: 191 | config["bot"]["token"] = args.token 192 | 193 | if args.db_url is not None: 194 | config["bot"]["db-url"] = args.db_url 195 | 196 | # Create SQL handler 197 | sql = DiscordSqlHandler(config["bot"]["db-url"], config["cache"], sql_logger) 198 | 199 | # Create client 200 | main_logger.info("Setting up bot") 201 | client = EventIngestionClient( 202 | config, 203 | sql, 204 | logger=event_logger, 205 | crawlers=[HistoryCrawler, AuditLogCrawler, ThreadCrawler], 206 | crawler_logger=crawler_logger 207 | ) 208 | main_logger.info("Starting bot, waiting for discord.py...") 209 | 210 | # Start main loop 211 | client.run_with_token() 212 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # Specify a configuration file. 4 | #rcfile= 5 | 6 | # Python code to execute, usually for sys.path manipulation such as 7 | # pygtk.require(). 8 | #init-hook= 9 | 10 | # Add files or directories to the blacklist. They should be base names, not 11 | # paths. 12 | ignore= 13 | 14 | # Pickle collected data for later comparisons. 15 | persistent=no 16 | 17 | # List of plugins (as comma separated values of python modules names) to load, 18 | # usually to register additional checkers. 19 | load-plugins= 20 | 21 | # Use multiple processes to speed up Pylint. 22 | jobs=2 23 | 24 | # Allow loading of arbitrary C extensions. Extensions are imported into the 25 | # active Python interpreter and may run arbitrary code. 26 | unsafe-load-any-extension=no 27 | 28 | # A comma-separated list of package or module names from where C extensions may 29 | # be loaded. Extensions are loading into the active Python interpreter and may 30 | # run arbitrary code 31 | extension-pkg-whitelist= 32 | 33 | # Allow optimization of some AST trees. This will activate a peephole AST 34 | # optimizer, which will apply various small optimizations. For instance, it can 35 | # be used to obtain the result of joining multiple strings with the addition 36 | # operator. Joining a lot of strings can lead to a maximum recursion error in 37 | # Pylint and this flag can prevent that. It has one side effect, the resulting 38 | # AST will be different than the one from reality. 39 | optimize-ast=yes 40 | 41 | 42 | [REPORTS] 43 | 44 | # Set the output format. Available formats are text, parseable, colorized, msvs 45 | # (visual studio) and html. You can also give a reporter class, eg 46 | # mypackage.mymodule.MyReporterClass. 47 | output-format=colorized 48 | 49 | # Put messages in a separate file for each module / package specified on the 50 | # command line instead of printing them on stdout. Reports (if any) will be 51 | # written in a file name "pylint_global.[txt|html]". 52 | files-output=no 53 | 54 | # Tells whether to display a full report or only the messages 55 | reports=no 56 | 57 | # Python expression which should return a note less than 10 (10 is the highest 58 | # note). You have access to the variables errors warning, statement which 59 | # respectively contain the number of errors / warnings messages and the total 60 | # number of statements analyzed. This is used by the global evaluation report 61 | # (RP0004). 62 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 63 | 64 | # Template used to display messages. This is a python new-style format string 65 | # used to format the message information. See doc for all details 66 | #msg-template={C}:{line:3d},{column:2d}: {msg} ({symbol}) 67 | 68 | 69 | [MESSAGES CONTROL] 70 | 71 | # Only show warnings with the listed confidence levels. Leave empty to show 72 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED 73 | confidence= 74 | enable= 75 | disable=missing-docstring,invalid-name,bad-continuation,unused-argument,import-error,too-many-locals,redefined-builtin,too-many-branches,undefined-loop-variable,bare-except,protected-access,broad-except,no-self-use,too-few-public-methods,no-init,too-many-instance-attributes,too-many-public-methods,no-else-return,pointless-string-statement,len-as-condition 76 | 77 | 78 | [TYPECHECK] 79 | 80 | # Tells whether missing members accessed in mixin class should be ignored. A 81 | # mixin class is detected if its name ends with "mixin" (case insensitive). 82 | ignore-mixin-members=yes 83 | 84 | # List of module names for which member attributes should not be checked 85 | # (useful for modules/projects where namespaces are manipulated during runtime 86 | # and thus existing member attributes cannot be deduced by static analysis 87 | ignored-modules= 88 | 89 | # List of classes names for which member attributes should not be checked 90 | # (useful for classes with attributes dynamically set). 91 | ignored-classes= 92 | 93 | # List of members which are set dynamically and missed by pylint inference 94 | # system, and so shouldn't trigger E0201 when accessed. Python regular 95 | # expressions are accepted. 96 | generated-members=__members__ 97 | 98 | 99 | [LOGGING] 100 | 101 | # Logging modules to check that the string format arguments are in logging 102 | # function parameter format 103 | logging-modules=logging 104 | 105 | 106 | [FORMAT] 107 | 108 | # Maximum number of characters on a single line. 109 | max-line-length=125 110 | 111 | # Regexp for a line that is allowed to be longer than the limit. 112 | ignore-long-lines=(# )??$ 113 | 114 | # Allow the body of an if to be on the same line as the test if there is no 115 | # else. 116 | single-line-if-stmt=no 117 | 118 | # List of optional constructs for which whitespace checking is disabled 119 | no-space-check=trailing-comma,dict-separator 120 | 121 | # Maximum number of lines in a module 122 | max-module-lines=2000 123 | 124 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 125 | # tab). 126 | indent-string=' ' 127 | 128 | # Number of spaces of indent required inside a hanging or continued line. 129 | indent-after-paren=4 130 | 131 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 132 | expected-line-ending-format= 133 | 134 | 135 | [VARIABLES] 136 | 137 | # Tells whether we should check for unused import in __init__ files. 138 | init-import=no 139 | 140 | # A regular expression matching the name of dummy variables (i.e. expectedly 141 | # not used). 142 | dummy-variables-rgx=_$|dummy 143 | 144 | # List of additional names supposed to be defined in builtins. Remember that 145 | # you should avoid to define new builtins when possible. 146 | additional-builtins= 147 | 148 | # List of strings which can identify a callback function by name. A callback 149 | # name must start or end with one of those strings. 150 | callbacks=cb_,_cb 151 | 152 | 153 | [BASIC] 154 | 155 | # List of builtins function names that should not be used, separated by a comma 156 | bad-functions= 157 | 158 | # Good variable names which should always be accepted, separated by a comma 159 | good-names=i,j,k,ex,Run,_ 160 | 161 | # Bad variable names which should always be refused, separated by a comma 162 | bad-names=foo,bar,baz,toto,tutu,tata 163 | 164 | # Colon-delimited sets of names that determine each other's naming style when 165 | # the name regexes allow several styles. 166 | name-group= 167 | 168 | # Include a hint for the correct naming format with invalid-name 169 | include-naming-hint=no 170 | 171 | # Regular expression matching correct method names 172 | method-rgx=[a-z_][a-z0-9_]{2,30}$ 173 | 174 | # Naming hint for method names 175 | method-name-hint=[a-z_][a-z0-9_]{2,30}$ 176 | 177 | # Regular expression matching correct argument names 178 | argument-rgx=[a-z_][a-z0-9_]{2,30}$ 179 | 180 | # Naming hint for argument names 181 | argument-name-hint=[a-z_][a-z0-9_]{2,30}$ 182 | 183 | # Regular expression matching correct attribute names 184 | attr-rgx=[a-z_][a-z0-9_]{2,30}$ 185 | 186 | # Naming hint for attribute names 187 | attr-name-hint=[a-z_][a-z0-9_]{2,30}$ 188 | 189 | # Regular expression matching correct class attribute names 190 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 191 | 192 | # Naming hint for class attribute names 193 | class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 194 | 195 | # Regular expression matching correct constant names 196 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 197 | 198 | # Naming hint for constant names 199 | const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 200 | 201 | # Regular expression matching correct class names 202 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 203 | 204 | # Naming hint for class names 205 | class-name-hint=[A-Z_][a-zA-Z0-9]+$ 206 | 207 | # Regular expression matching correct function names 208 | function-rgx=[a-z_][a-z0-9_]{2,30}$ 209 | 210 | # Naming hint for function names 211 | function-name-hint=[a-z_][a-z0-9_]{2,30}$ 212 | 213 | # Regular expression matching correct variable names 214 | variable-rgx=[a-z_][a-z0-9_]{2,30}$ 215 | 216 | # Naming hint for variable names 217 | variable-name-hint=[a-z_][a-z0-9_]{2,30}$ 218 | 219 | # Regular expression matching correct inline iteration names 220 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 221 | 222 | # Naming hint for inline iteration names 223 | inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ 224 | 225 | # Regular expression matching correct module names 226 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 227 | 228 | # Naming hint for module names 229 | module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 230 | 231 | # Regular expression which should only match function or class names that do 232 | # not require a docstring. 233 | no-docstring-rgx=__.*__ 234 | 235 | # Minimum line length for functions/classes that require docstrings, shorter 236 | # ones are exempt. 237 | docstring-min-length=-1 238 | 239 | 240 | [SIMILARITIES] 241 | 242 | # Minimum lines number of a similarity. 243 | min-similarity-lines=10 244 | 245 | # Ignore comments when computing similarities. 246 | ignore-comments=yes 247 | 248 | # Ignore docstrings when computing similarities. 249 | ignore-docstrings=yes 250 | 251 | # Ignore imports when computing similarities. 252 | ignore-imports=yes 253 | 254 | 255 | [MISCELLANEOUS] 256 | 257 | # List of note tags to take in consideration, separated by a comma. 258 | notes=FIXME,XXX,TODO 259 | 260 | 261 | [SPELLING] 262 | 263 | # Spelling dictionary name. Available dictionaries: none. To make it working 264 | # install python-enchant package. 265 | spelling-dict= 266 | 267 | # List of comma separated words that should not be checked. 268 | spelling-ignore-words= 269 | 270 | # A path to a file that contains private dictionary; one word per line. 271 | spelling-private-dict-file= 272 | 273 | # Tells whether to store unknown words to indicated private dictionary in 274 | # --spelling-private-dict-file option instead of raising a message. 275 | spelling-store-unknown-words=no 276 | 277 | 278 | [DESIGN] 279 | 280 | # Maximum number of arguments for function / method 281 | max-args=10 282 | 283 | # Argument names that match this expression will be ignored. Default to name 284 | # with leading underscore 285 | ignored-argument-names=_.* 286 | 287 | # Maximum number of locals for function / method body 288 | max-locals=15 289 | 290 | # Maximum number of return / yield for function / method body 291 | max-returns=6 292 | 293 | # Maximum number of branch for function / method body 294 | max-branches=20 295 | 296 | # Maximum number of statements in function / method body 297 | max-statements=100 298 | 299 | # Maximum number of parents for a class (see R0901). 300 | max-parents=7 301 | 302 | # Maximum number of attributes for a class (see R0902). 303 | max-attributes=7 304 | 305 | # Minimum number of public methods for a class (see R0903). 306 | min-public-methods=2 307 | 308 | # Maximum number of public methods for a class (see R0904). 309 | max-public-methods=20 310 | 311 | 312 | [CLASSES] 313 | 314 | # List of method names used to declare (i.e. assign) instance attributes. 315 | defining-attr-methods=__init__,__new__,setUp 316 | 317 | # List of valid names for the first argument in a class method. 318 | valid-classmethod-first-arg=cls 319 | 320 | # List of valid names for the first argument in a metaclass class method. 321 | valid-metaclass-classmethod-first-arg=mcs 322 | 323 | # List of member names, which should be excluded from the protected access 324 | # warning. 325 | exclude-protected=_asdict,_fields,_replace,_source,_make 326 | 327 | 328 | [IMPORTS] 329 | 330 | # Deprecated modules which should not be used, separated by a comma 331 | deprecated-modules=stringprep,optparse 332 | 333 | # Create a graph of every (i.e. internal and external) dependencies in the 334 | # given file (report RP0402 must not be disabled) 335 | import-graph= 336 | 337 | # Create a graph of external dependencies in the given file (report RP0402 must 338 | # not be disabled) 339 | ext-import-graph= 340 | 341 | # Create a graph of internal dependencies in the given file (report RP0402 must 342 | # not be disabled) 343 | int-import-graph= 344 | 345 | 346 | [EXCEPTIONS] 347 | 348 | # Exceptions that will emit a warning when being caught. Defaults to 349 | # "Exception" 350 | overgeneral-exceptions=Exception 351 | -------------------------------------------------------------------------------- /statbot/schema.py: -------------------------------------------------------------------------------- 1 | import discord 2 | from enum import Enum as BaseEnum 3 | from sqlalchemy import ( 4 | ARRAY, 5 | Boolean, 6 | BigInteger, 7 | Column, 8 | DateTime, 9 | Enum, 10 | Integer, 11 | JSON, 12 | LargeBinary, 13 | SmallInteger, 14 | String, 15 | Table, 16 | Unicode, 17 | UnicodeText, 18 | ForeignKey, 19 | MetaData, 20 | UniqueConstraint, 21 | ) 22 | 23 | from .mention import MentionType 24 | 25 | 26 | class DeprecatedVoiceRegion(BaseEnum): 27 | us_west = 'us-west' 28 | us_east = 'us-east' 29 | us_south = 'us-south' 30 | us_central = 'us-central' 31 | eu_west = 'eu-west' 32 | eu_central = 'eu-central' 33 | singapore = 'singapore' 34 | london = 'london' 35 | sydney = 'sydney' 36 | amsterdam = 'amsterdam' 37 | frankfurt = 'frankfurt' 38 | brazil = 'brazil' 39 | hongkong = 'hongkong' 40 | russia = 'russia' 41 | japan = 'japan' 42 | southafrica = 'southafrica' 43 | south_korea = 'south-korea' 44 | india = 'india' 45 | europe = 'europe' 46 | dubai = 'dubai' 47 | vip_us_east = 'vip-us-east' 48 | vip_us_west = 'vip-us-west' 49 | vip_amsterdam = 'vip-amsterdam' 50 | deprecated = 'deprecated' 51 | 52 | def __str__(self): 53 | return self.value 54 | 55 | 56 | class DiscordMetadata: 57 | def __init__(self, db): 58 | self.metadata_obj = MetaData(db) 59 | 60 | self.tb_messages = Table( 61 | "messages", 62 | self.metadata_obj, 63 | Column("message_id", BigInteger, primary_key=True), 64 | Column("created_at", DateTime), 65 | Column("edited_at", DateTime, nullable=True), 66 | Column("deleted_at", DateTime, nullable=True), 67 | Column("message_type", Enum(discord.MessageType)), 68 | Column("system_content", UnicodeText), 69 | Column("content", UnicodeText), 70 | Column("embeds", JSON), 71 | Column("attachments", SmallInteger), 72 | Column("webhook_id", BigInteger, nullable=True), 73 | Column("int_user_id", BigInteger), 74 | Column("channel_id", BigInteger, ForeignKey("channels.channel_id"), nullable=True), 75 | Column("thread_id", BigInteger, ForeignKey("threads.thread_id"), nullable=True), 76 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id")), 77 | ) 78 | 79 | self.tb_reactions = Table( 80 | "reactions", 81 | self.metadata_obj, 82 | Column("message_id", BigInteger), 83 | Column("emoji_id", BigInteger), 84 | Column("emoji_unicode", Unicode(7)), 85 | Column("int_user_id", BigInteger, ForeignKey("users.int_user_id")), 86 | Column("created_at", DateTime, nullable=True), 87 | Column("deleted_at", DateTime, nullable=True), 88 | Column("channel_id", BigInteger, ForeignKey("channels.channel_id")), 89 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id")), 90 | UniqueConstraint( 91 | "message_id", 92 | "emoji_id", 93 | "emoji_unicode", 94 | "int_user_id", 95 | "created_at", 96 | name="uq_reactions", 97 | ), 98 | ) 99 | 100 | self.tb_typing = Table( 101 | "typing", 102 | self.metadata_obj, 103 | Column("timestamp", DateTime), 104 | Column("int_user_id", BigInteger, ForeignKey("users.int_user_id")), 105 | Column("channel_id", BigInteger, ForeignKey("channels.channel_id"), nullable=True), 106 | Column("thread_id", BigInteger, ForeignKey("threads.thread_id"), nullable=True), 107 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id")), 108 | UniqueConstraint( 109 | "timestamp", "int_user_id", "channel_id", "thread_id", "guild_id", name="uq_typing" 110 | ), 111 | ) 112 | 113 | self.tb_pins = Table( 114 | "pins", 115 | self.metadata_obj, 116 | Column("pin_id", BigInteger, primary_key=True), 117 | Column( 118 | "message_id", 119 | BigInteger, 120 | ForeignKey("messages.message_id"), 121 | primary_key=True, 122 | ), 123 | Column("pinner_id", BigInteger, ForeignKey("users.int_user_id")), 124 | Column("int_user_id", BigInteger, ForeignKey("users.int_user_id")), 125 | Column("channel_id", BigInteger, ForeignKey("channels.channel_id")), 126 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id")), 127 | ) 128 | 129 | self.tb_mentions = Table( 130 | "mentions", 131 | self.metadata_obj, 132 | Column("mentioned_id", BigInteger, primary_key=True), 133 | Column("type", Enum(MentionType), primary_key=True), 134 | Column( 135 | "message_id", 136 | BigInteger, 137 | ForeignKey("messages.message_id"), 138 | primary_key=True, 139 | ), 140 | Column("channel_id", BigInteger, ForeignKey("channels.channel_id")), 141 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id")), 142 | UniqueConstraint("mentioned_id", "type", "message_id", name="uq_mention"), 143 | ) 144 | 145 | self.tb_guilds = Table( 146 | "guilds", 147 | self.metadata_obj, 148 | Column("guild_id", BigInteger, primary_key=True), 149 | Column("int_owner_id", BigInteger, ForeignKey("users.int_user_id")), 150 | Column("name", Unicode), 151 | Column("icon", String), 152 | Column("voice_region", Enum(DeprecatedVoiceRegion)), 153 | Column("afk_channel_id", BigInteger, nullable=True), 154 | Column("afk_timeout", Integer), 155 | Column("mfa", Boolean), 156 | Column("verification_level", Enum(discord.VerificationLevel)), 157 | Column("explicit_content_filter", Enum(discord.ContentFilter)), 158 | Column("features", ARRAY(String)), 159 | Column("splash", String, nullable=True), 160 | ) 161 | 162 | self.tb_channels = Table( 163 | "channels", 164 | self.metadata_obj, 165 | Column("channel_id", BigInteger, primary_key=True), 166 | Column("name", String), 167 | Column("is_nsfw", Boolean), 168 | Column("is_deleted", Boolean), 169 | Column("position", SmallInteger), 170 | Column("topic", UnicodeText, nullable=True), 171 | Column("changed_roles", ARRAY(BigInteger)), 172 | Column( 173 | "category_id", 174 | BigInteger, 175 | ForeignKey("channel_categories.category_id"), 176 | nullable=True, 177 | ), 178 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id")), 179 | ) 180 | 181 | self.tb_voice_channels = Table( 182 | "voice_channels", 183 | self.metadata_obj, 184 | Column("voice_channel_id", BigInteger, primary_key=True), 185 | Column("name", Unicode), 186 | Column("is_deleted", Boolean), 187 | Column("position", SmallInteger), 188 | Column("bitrate", Integer), 189 | Column("user_limit", SmallInteger), 190 | Column("changed_roles", ARRAY(BigInteger)), 191 | Column( 192 | "category_id", 193 | BigInteger, 194 | ForeignKey("channel_categories.category_id"), 195 | nullable=True, 196 | ), 197 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id")), 198 | ) 199 | 200 | self.tb_channel_categories = Table( 201 | "channel_categories", 202 | self.metadata_obj, 203 | Column("category_id", BigInteger, primary_key=True), 204 | Column("name", Unicode), 205 | Column("position", SmallInteger), 206 | Column("is_deleted", Boolean), 207 | Column("is_nsfw", Boolean), 208 | Column("changed_roles", ARRAY(BigInteger)), 209 | Column( 210 | "parent_category_id", 211 | BigInteger, 212 | ForeignKey("channel_categories.category_id"), 213 | nullable=True, 214 | ), 215 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id")), 216 | ) 217 | 218 | self.tb_users = Table( 219 | "users", 220 | self.metadata_obj, 221 | Column("int_user_id", BigInteger, primary_key=True), 222 | Column("real_user_id", BigInteger), 223 | Column("name", Unicode), 224 | Column("discriminator", SmallInteger), 225 | Column("avatar", String, nullable=True), 226 | Column("is_deleted", Boolean), 227 | Column("is_bot", Boolean), 228 | ) 229 | 230 | self.tb_guild_membership = Table( 231 | "guild_membership", 232 | self.metadata_obj, 233 | Column( 234 | "int_user_id", 235 | BigInteger, 236 | ForeignKey("users.int_user_id"), 237 | primary_key=True, 238 | ), 239 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id"), primary_key=True), 240 | Column("is_member", Boolean), 241 | Column("joined_at", DateTime, nullable=True), 242 | Column("nick", Unicode(32), nullable=True), 243 | UniqueConstraint("int_user_id", "guild_id", name="uq_guild_membership"), 244 | ) 245 | 246 | self.tb_role_membership = Table( 247 | "role_membership", 248 | self.metadata_obj, 249 | Column("role_id", BigInteger, ForeignKey("roles.role_id")), 250 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id")), 251 | Column("int_user_id", BigInteger, ForeignKey("users.int_user_id")), 252 | UniqueConstraint("role_id", "int_user_id", name="uq_role_membership"), 253 | ) 254 | 255 | self.tb_avatar_history = Table( 256 | "avatar_history", 257 | self.metadata_obj, 258 | Column("user_id", BigInteger, primary_key=True), 259 | Column("timestamp", DateTime, primary_key=True), 260 | Column("avatar", LargeBinary), 261 | Column("avatar_ext", String), 262 | ) 263 | 264 | self.tb_username_history = Table( 265 | "username_history", 266 | self.metadata_obj, 267 | Column("user_id", BigInteger, primary_key=True), 268 | Column("timestamp", DateTime, primary_key=True), 269 | Column("username", Unicode), 270 | ) 271 | 272 | self.tb_nickname_history = Table( 273 | "nickname_history", 274 | self.metadata_obj, 275 | Column("user_id", BigInteger, primary_key=True), 276 | Column("timestamp", DateTime, primary_key=True), 277 | Column("nickname", Unicode), 278 | ) 279 | 280 | self.tb_emojis = Table( 281 | "emojis", 282 | self.metadata_obj, 283 | Column("emoji_id", BigInteger), 284 | Column("emoji_unicode", Unicode(7)), 285 | Column("is_custom", Boolean), 286 | Column("is_managed", Boolean, nullable=True), 287 | Column("is_deleted", Boolean), 288 | Column("name", ARRAY(String)), 289 | Column("category", ARRAY(String)), 290 | Column("roles", ARRAY(BigInteger), nullable=True), 291 | Column("guild_id", BigInteger, nullable=True), 292 | UniqueConstraint("emoji_id", "emoji_unicode", name="uq_emoji"), 293 | ) 294 | 295 | self.tb_roles = Table( 296 | "roles", 297 | self.metadata_obj, 298 | Column("role_id", BigInteger, primary_key=True), 299 | Column("name", Unicode), 300 | Column("color", Integer), 301 | Column("raw_permissions", BigInteger), 302 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id")), 303 | Column("is_hoisted", Boolean), 304 | Column("is_managed", Boolean), 305 | Column("is_mentionable", Boolean), 306 | Column("is_deleted", Boolean), 307 | Column("position", SmallInteger), 308 | ) 309 | 310 | self.tb_audit_log = Table( 311 | "audit_log", 312 | self.metadata_obj, 313 | Column("audit_entry_id", BigInteger, primary_key=True), 314 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id")), 315 | Column("action", Enum(discord.AuditLogAction)), 316 | Column("int_user_id", BigInteger, ForeignKey("users.int_user_id")), 317 | Column("reason", Unicode, nullable=True), 318 | Column("category", Enum(discord.AuditLogActionCategory), nullable=True), 319 | Column("before", JSON), 320 | Column("after", JSON), 321 | UniqueConstraint("audit_entry_id", "guild_id", name="uq_audit_log"), 322 | ) 323 | 324 | self.tb_channel_crawl = Table( 325 | "channel_crawl", 326 | self.metadata_obj, 327 | Column( 328 | "channel_id", 329 | BigInteger, 330 | ForeignKey("channels.channel_id"), 331 | primary_key=True, 332 | ), 333 | Column("last_message_id", BigInteger), 334 | ) 335 | 336 | self.tb_audit_log_crawl = Table( 337 | "audit_log_crawl", 338 | self.metadata_obj, 339 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id"), primary_key=True), 340 | Column("last_audit_entry_id", BigInteger), 341 | ) 342 | 343 | self.tb_threads = Table( 344 | "threads", 345 | self.metadata_obj, 346 | Column("thread_id", BigInteger, primary_key=True), 347 | Column("name", String), 348 | Column("invitable", Boolean), 349 | Column("locked", Boolean), 350 | Column("archived", Boolean), 351 | Column("auto_archive_duration", Integer), 352 | Column("archive_timestamp", DateTime), 353 | Column("created_at", DateTime, nullable=True), 354 | Column("edited_at", DateTime, nullable=True), 355 | Column("deleted_at", DateTime, nullable=True), 356 | Column("is_deleted", Boolean), 357 | Column("int_owner_id", BigInteger, ForeignKey("users.int_user_id")), 358 | Column("parent_id", BigInteger, ForeignKey("channels.channel_id")), 359 | Column("guild_id", BigInteger, ForeignKey("guilds.guild_id")), 360 | ) 361 | 362 | self.tb_thread_members = Table( 363 | "thread_members", 364 | self.metadata_obj, 365 | Column("int_member_id", BigInteger, ForeignKey("users.int_user_id")), 366 | Column("thread_id", BigInteger, ForeignKey("threads.thread_id")), 367 | Column("joined_at", DateTime), 368 | Column("left_at", DateTime, nullable=True), 369 | UniqueConstraint("int_member_id", "thread_id", "joined_at", name="uq_thread_members"), 370 | ) 371 | 372 | self.tb_thread_crawl = Table( 373 | "thread_crawl", 374 | self.metadata_obj, 375 | Column( 376 | "thread_id", 377 | BigInteger, 378 | ForeignKey("threads.thread_id"), 379 | primary_key=True, 380 | ), 381 | Column("last_message_id", BigInteger), 382 | ) -------------------------------------------------------------------------------- /migrations/versions/initial_revision_discord_py_1_5.py: -------------------------------------------------------------------------------- 1 | """This initial revision assumes statbot is running discord.py v1.5 2 | 3 | Revision ID: initial_revision_discord_py_1_5 4 | Revises: 5 | Create Date: 2022-09-30 00:46:40.565202 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'initial_revision_discord_py_1_5' 14 | down_revision = None 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('avatar_history', 22 | sa.Column('user_id', sa.BigInteger(), nullable=False), 23 | sa.Column('timestamp', sa.DateTime(), nullable=False), 24 | sa.Column('avatar', sa.LargeBinary(), nullable=True), 25 | sa.Column('avatar_ext', sa.String(), nullable=True), 26 | sa.PrimaryKeyConstraint('user_id', 'timestamp') 27 | ) 28 | op.create_table('emojis', 29 | sa.Column('emoji_id', sa.BigInteger(), nullable=True), 30 | sa.Column('emoji_unicode', sa.Unicode(length=7), nullable=True), 31 | sa.Column('is_custom', sa.Boolean(), nullable=True), 32 | sa.Column('is_managed', sa.Boolean(), nullable=True), 33 | sa.Column('is_deleted', sa.Boolean(), nullable=True), 34 | sa.Column('name', sa.ARRAY(sa.String()), nullable=True), 35 | sa.Column('category', sa.ARRAY(sa.String()), nullable=True), 36 | sa.Column('roles', sa.ARRAY(sa.BigInteger()), nullable=True), 37 | sa.Column('guild_id', sa.BigInteger(), nullable=True), 38 | sa.UniqueConstraint('emoji_id', 'emoji_unicode', name='uq_emoji') 39 | ) 40 | op.create_table('nickname_history', 41 | sa.Column('user_id', sa.BigInteger(), nullable=False), 42 | sa.Column('timestamp', sa.DateTime(), nullable=False), 43 | sa.Column('nickname', sa.Unicode(), nullable=True), 44 | sa.PrimaryKeyConstraint('user_id', 'timestamp') 45 | ) 46 | op.create_table('username_history', 47 | sa.Column('user_id', sa.BigInteger(), nullable=False), 48 | sa.Column('timestamp', sa.DateTime(), nullable=False), 49 | sa.Column('username', sa.Unicode(), nullable=True), 50 | sa.PrimaryKeyConstraint('user_id', 'timestamp') 51 | ) 52 | op.create_table('users', 53 | sa.Column('int_user_id', sa.BigInteger(), nullable=False), 54 | sa.Column('real_user_id', sa.BigInteger(), nullable=True), 55 | sa.Column('name', sa.Unicode(), nullable=True), 56 | sa.Column('discriminator', sa.SmallInteger(), nullable=True), 57 | sa.Column('avatar', sa.String(), nullable=True), 58 | sa.Column('is_deleted', sa.Boolean(), nullable=True), 59 | sa.Column('is_bot', sa.Boolean(), nullable=True), 60 | sa.PrimaryKeyConstraint('int_user_id') 61 | ) 62 | op.create_table('guilds', 63 | sa.Column('guild_id', sa.BigInteger(), nullable=False), 64 | sa.Column('int_owner_id', sa.BigInteger(), nullable=True), 65 | sa.Column('name', sa.Unicode(), nullable=True), 66 | sa.Column('icon', sa.String(), nullable=True), 67 | sa.Column('voice_region', sa.Enum('us_west', 'us_east', 'us_south', 'us_central', 'eu_west', 'eu_central', 'singapore', 'london', 'sydney', 'amsterdam', 'frankfurt', 'brazil', 'hongkong', 'russia', 'japan', 'southafrica', 'south_korea', 'india', 'europe', 'dubai', 'vip_us_east', 'vip_us_west', 'vip_amsterdam', name='voiceregion'), nullable=True), 68 | sa.Column('afk_channel_id', sa.BigInteger(), nullable=True), 69 | sa.Column('afk_timeout', sa.Integer(), nullable=True), 70 | sa.Column('mfa', sa.Boolean(), nullable=True), 71 | sa.Column('verification_level', sa.Enum('none', 'low', 'medium', 'high', 'table_flip', 'extreme', 'double_table_flip', 'very_high', name='verificationlevel'), nullable=True), 72 | sa.Column('explicit_content_filter', sa.Enum('disabled', 'no_role', 'all_members', name='contentfilter'), nullable=True), 73 | sa.Column('features', sa.ARRAY(sa.String()), nullable=True), 74 | sa.Column('splash', sa.String(), nullable=True), 75 | sa.ForeignKeyConstraint(['int_owner_id'], ['users.int_user_id'], ), 76 | sa.PrimaryKeyConstraint('guild_id') 77 | ) 78 | op.create_table('audit_log', 79 | sa.Column('audit_entry_id', sa.BigInteger(), nullable=False), 80 | sa.Column('guild_id', sa.BigInteger(), nullable=True), 81 | sa.Column('action', sa.Enum('guild_update', 'channel_create', 'channel_update', 'channel_delete', 'overwrite_create', 'overwrite_update', 'overwrite_delete', 'kick', 'member_prune', 'ban', 'unban', 'member_update', 'member_role_update', 'member_move', 'member_disconnect', 'bot_add', 'role_create', 'role_update', 'role_delete', 'invite_create', 'invite_update', 'invite_delete', 'webhook_create', 'webhook_update', 'webhook_delete', 'emoji_create', 'emoji_update', 'emoji_delete', 'message_delete', 'message_bulk_delete', 'message_pin', 'message_unpin', 'integration_create', 'integration_update', 'integration_delete', name='auditlogaction'), nullable=True), 82 | sa.Column('int_user_id', sa.BigInteger(), nullable=True), 83 | sa.Column('reason', sa.Unicode(), nullable=True), 84 | sa.Column('category', sa.Enum('create', 'delete', 'update', name='auditlogactioncategory'), nullable=True), 85 | sa.Column('before', sa.JSON(), nullable=True), 86 | sa.Column('after', sa.JSON(), nullable=True), 87 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 88 | sa.ForeignKeyConstraint(['int_user_id'], ['users.int_user_id'], ), 89 | sa.PrimaryKeyConstraint('audit_entry_id'), 90 | sa.UniqueConstraint('audit_entry_id', 'guild_id', name='uq_audit_log') 91 | ) 92 | op.create_table('audit_log_crawl', 93 | sa.Column('guild_id', sa.BigInteger(), nullable=False), 94 | sa.Column('last_audit_entry_id', sa.BigInteger(), nullable=True), 95 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 96 | sa.PrimaryKeyConstraint('guild_id') 97 | ) 98 | op.create_table('channel_categories', 99 | sa.Column('category_id', sa.BigInteger(), nullable=False), 100 | sa.Column('name', sa.Unicode(), nullable=True), 101 | sa.Column('position', sa.SmallInteger(), nullable=True), 102 | sa.Column('is_deleted', sa.Boolean(), nullable=True), 103 | sa.Column('is_nsfw', sa.Boolean(), nullable=True), 104 | sa.Column('changed_roles', sa.ARRAY(sa.BigInteger()), nullable=True), 105 | sa.Column('parent_category_id', sa.BigInteger(), nullable=True), 106 | sa.Column('guild_id', sa.BigInteger(), nullable=True), 107 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 108 | sa.ForeignKeyConstraint(['parent_category_id'], ['channel_categories.category_id'], ), 109 | sa.PrimaryKeyConstraint('category_id') 110 | ) 111 | op.create_table('guild_membership', 112 | sa.Column('int_user_id', sa.BigInteger(), nullable=False), 113 | sa.Column('guild_id', sa.BigInteger(), nullable=False), 114 | sa.Column('is_member', sa.Boolean(), nullable=True), 115 | sa.Column('joined_at', sa.DateTime(), nullable=True), 116 | sa.Column('nick', sa.Unicode(length=32), nullable=True), 117 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 118 | sa.ForeignKeyConstraint(['int_user_id'], ['users.int_user_id'], ), 119 | sa.PrimaryKeyConstraint('int_user_id', 'guild_id'), 120 | sa.UniqueConstraint('int_user_id', 'guild_id', name='uq_guild_membership') 121 | ) 122 | op.create_table('roles', 123 | sa.Column('role_id', sa.BigInteger(), nullable=False), 124 | sa.Column('name', sa.Unicode(), nullable=True), 125 | sa.Column('color', sa.Integer(), nullable=True), 126 | sa.Column('raw_permissions', sa.BigInteger(), nullable=True), 127 | sa.Column('guild_id', sa.BigInteger(), nullable=True), 128 | sa.Column('is_hoisted', sa.Boolean(), nullable=True), 129 | sa.Column('is_managed', sa.Boolean(), nullable=True), 130 | sa.Column('is_mentionable', sa.Boolean(), nullable=True), 131 | sa.Column('is_deleted', sa.Boolean(), nullable=True), 132 | sa.Column('position', sa.SmallInteger(), nullable=True), 133 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 134 | sa.PrimaryKeyConstraint('role_id') 135 | ) 136 | op.create_table('channels', 137 | sa.Column('channel_id', sa.BigInteger(), nullable=False), 138 | sa.Column('name', sa.String(), nullable=True), 139 | sa.Column('is_nsfw', sa.Boolean(), nullable=True), 140 | sa.Column('is_deleted', sa.Boolean(), nullable=True), 141 | sa.Column('position', sa.SmallInteger(), nullable=True), 142 | sa.Column('topic', sa.UnicodeText(), nullable=True), 143 | sa.Column('changed_roles', sa.ARRAY(sa.BigInteger()), nullable=True), 144 | sa.Column('category_id', sa.BigInteger(), nullable=True), 145 | sa.Column('guild_id', sa.BigInteger(), nullable=True), 146 | sa.ForeignKeyConstraint(['category_id'], ['channel_categories.category_id'], ), 147 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 148 | sa.PrimaryKeyConstraint('channel_id') 149 | ) 150 | op.create_table('role_membership', 151 | sa.Column('role_id', sa.BigInteger(), nullable=True), 152 | sa.Column('guild_id', sa.BigInteger(), nullable=True), 153 | sa.Column('int_user_id', sa.BigInteger(), nullable=True), 154 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 155 | sa.ForeignKeyConstraint(['int_user_id'], ['users.int_user_id'], ), 156 | sa.ForeignKeyConstraint(['role_id'], ['roles.role_id'], ), 157 | sa.UniqueConstraint('role_id', 'int_user_id', name='uq_role_membership') 158 | ) 159 | op.create_table('voice_channels', 160 | sa.Column('voice_channel_id', sa.BigInteger(), nullable=False), 161 | sa.Column('name', sa.Unicode(), nullable=True), 162 | sa.Column('is_deleted', sa.Boolean(), nullable=True), 163 | sa.Column('position', sa.SmallInteger(), nullable=True), 164 | sa.Column('bitrate', sa.Integer(), nullable=True), 165 | sa.Column('user_limit', sa.SmallInteger(), nullable=True), 166 | sa.Column('changed_roles', sa.ARRAY(sa.BigInteger()), nullable=True), 167 | sa.Column('category_id', sa.BigInteger(), nullable=True), 168 | sa.Column('guild_id', sa.BigInteger(), nullable=True), 169 | sa.ForeignKeyConstraint(['category_id'], ['channel_categories.category_id'], ), 170 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 171 | sa.PrimaryKeyConstraint('voice_channel_id') 172 | ) 173 | op.create_table('channel_crawl', 174 | sa.Column('channel_id', sa.BigInteger(), nullable=False), 175 | sa.Column('last_message_id', sa.BigInteger(), nullable=True), 176 | sa.ForeignKeyConstraint(['channel_id'], ['channels.channel_id'], ), 177 | sa.PrimaryKeyConstraint('channel_id') 178 | ) 179 | op.create_table('messages', 180 | sa.Column('message_id', sa.BigInteger(), nullable=False), 181 | sa.Column('created_at', sa.DateTime(), nullable=True), 182 | sa.Column('edited_at', sa.DateTime(), nullable=True), 183 | sa.Column('deleted_at', sa.DateTime(), nullable=True), 184 | sa.Column('message_type', sa.Enum('default', 'recipient_add', 'recipient_remove', 'call', 'channel_name_change', 'channel_icon_change', 'pins_add', 'new_member', 'premium_guild_subscription', 'premium_guild_tier_1', 'premium_guild_tier_2', 'premium_guild_tier_3', 'channel_follow_add', name='messagetype'), nullable=True), 185 | sa.Column('system_content', sa.UnicodeText(), nullable=True), 186 | sa.Column('content', sa.UnicodeText(), nullable=True), 187 | sa.Column('embeds', sa.JSON(), nullable=True), 188 | sa.Column('attachments', sa.SmallInteger(), nullable=True), 189 | sa.Column('webhook_id', sa.BigInteger(), nullable=True), 190 | sa.Column('int_user_id', sa.BigInteger(), nullable=True), 191 | sa.Column('channel_id', sa.BigInteger(), nullable=True), 192 | sa.Column('guild_id', sa.BigInteger(), nullable=True), 193 | sa.ForeignKeyConstraint(['channel_id'], ['channels.channel_id'], ), 194 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 195 | sa.PrimaryKeyConstraint('message_id') 196 | ) 197 | op.create_table('reactions', 198 | sa.Column('message_id', sa.BigInteger(), nullable=True), 199 | sa.Column('emoji_id', sa.BigInteger(), nullable=True), 200 | sa.Column('emoji_unicode', sa.Unicode(length=7), nullable=True), 201 | sa.Column('int_user_id', sa.BigInteger(), nullable=True), 202 | sa.Column('created_at', sa.DateTime(), nullable=True), 203 | sa.Column('deleted_at', sa.DateTime(), nullable=True), 204 | sa.Column('channel_id', sa.BigInteger(), nullable=True), 205 | sa.Column('guild_id', sa.BigInteger(), nullable=True), 206 | sa.ForeignKeyConstraint(['channel_id'], ['channels.channel_id'], ), 207 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 208 | sa.ForeignKeyConstraint(['int_user_id'], ['users.int_user_id'], ), 209 | sa.UniqueConstraint('message_id', 'emoji_id', 'emoji_unicode', 'int_user_id', 'created_at', name='uq_reactions') 210 | ) 211 | op.create_table('typing', 212 | sa.Column('timestamp', sa.DateTime(), nullable=True), 213 | sa.Column('int_user_id', sa.BigInteger(), nullable=True), 214 | sa.Column('channel_id', sa.BigInteger(), nullable=True), 215 | sa.Column('guild_id', sa.BigInteger(), nullable=True), 216 | sa.ForeignKeyConstraint(['channel_id'], ['channels.channel_id'], ), 217 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 218 | sa.ForeignKeyConstraint(['int_user_id'], ['users.int_user_id'], ), 219 | sa.UniqueConstraint('timestamp', 'int_user_id', 'channel_id', 'guild_id', name='uq_typing') 220 | ) 221 | op.create_table('mentions', 222 | sa.Column('mentioned_id', sa.BigInteger(), nullable=False), 223 | sa.Column('type', sa.Enum('USER', 'ROLE', 'CHANNEL', name='mentiontype'), nullable=False), 224 | sa.Column('message_id', sa.BigInteger(), nullable=False), 225 | sa.Column('channel_id', sa.BigInteger(), nullable=True), 226 | sa.Column('guild_id', sa.BigInteger(), nullable=True), 227 | sa.ForeignKeyConstraint(['channel_id'], ['channels.channel_id'], ), 228 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 229 | sa.ForeignKeyConstraint(['message_id'], ['messages.message_id'], ), 230 | sa.PrimaryKeyConstraint('mentioned_id', 'type', 'message_id'), 231 | sa.UniqueConstraint('mentioned_id', 'type', 'message_id', name='uq_mention') 232 | ) 233 | op.create_table('pins', 234 | sa.Column('pin_id', sa.BigInteger(), nullable=False), 235 | sa.Column('message_id', sa.BigInteger(), nullable=False), 236 | sa.Column('pinner_id', sa.BigInteger(), nullable=True), 237 | sa.Column('int_user_id', sa.BigInteger(), nullable=True), 238 | sa.Column('channel_id', sa.BigInteger(), nullable=True), 239 | sa.Column('guild_id', sa.BigInteger(), nullable=True), 240 | sa.ForeignKeyConstraint(['channel_id'], ['channels.channel_id'], ), 241 | sa.ForeignKeyConstraint(['guild_id'], ['guilds.guild_id'], ), 242 | sa.ForeignKeyConstraint(['int_user_id'], ['users.int_user_id'], ), 243 | sa.ForeignKeyConstraint(['message_id'], ['messages.message_id'], ), 244 | sa.ForeignKeyConstraint(['pinner_id'], ['users.int_user_id'], ), 245 | sa.PrimaryKeyConstraint('pin_id', 'message_id') 246 | ) 247 | # ### end Alembic commands ### 248 | 249 | 250 | def downgrade() -> None: 251 | # ### commands auto generated by Alembic - please adjust! ### 252 | op.drop_table('pins') 253 | op.drop_table('mentions') 254 | op.drop_table('typing') 255 | op.drop_table('reactions') 256 | op.drop_table('messages') 257 | op.drop_table('channel_crawl') 258 | op.drop_table('voice_channels') 259 | op.drop_table('role_membership') 260 | op.drop_table('channels') 261 | op.drop_table('roles') 262 | op.drop_table('guild_membership') 263 | op.drop_table('channel_categories') 264 | op.drop_table('audit_log_crawl') 265 | op.drop_table('audit_log') 266 | op.drop_table('guilds') 267 | op.drop_table('users') 268 | op.drop_table('username_history') 269 | op.drop_table('nickname_history') 270 | op.drop_table('emojis') 271 | op.drop_table('avatar_history') 272 | # ### end Alembic commands ### 273 | -------------------------------------------------------------------------------- /statbot/crawler.py: -------------------------------------------------------------------------------- 1 | # 2 | # crawler.py 3 | # 4 | # statbot - Store Discord records for later analysis 5 | # Copyright (c) 2017-2018 Ammon Smith 6 | # 7 | # statbot is available free of charge under the terms of the MIT 8 | # License. You are free to redistribute and/or modify it under those 9 | # terms. It is distributed in the hopes that it will be useful, but 10 | # WITHOUT ANY WARRANTY. See the LICENSE file for more details. 11 | # 12 | 13 | from datetime import datetime 14 | import abc 15 | import asyncio 16 | 17 | from sqlalchemy.exc import SQLAlchemyError 18 | import discord 19 | 20 | from .sql import DiscordSqlHandler 21 | from .util import null_logger 22 | 23 | __all__ = [ 24 | "AbstractCrawler", 25 | "HistoryCrawler", 26 | "AuditLogCrawler", 27 | ] 28 | 29 | 30 | class AbstractCrawler: 31 | __slots__ = ( 32 | "name", 33 | "client", 34 | "sql", 35 | "config", 36 | "logger", 37 | "progress", 38 | "queue", 39 | "continuous", 40 | "current", 41 | ) 42 | 43 | def __init__( 44 | self, 45 | name, 46 | client, 47 | sql: DiscordSqlHandler, 48 | config, 49 | logger=null_logger, 50 | continuous=False, 51 | ): 52 | self.name = name 53 | self.client = client 54 | self.sql = sql 55 | self.config = config 56 | self.logger = logger 57 | self.progress = {} # { stream : last_id } 58 | self.queue = asyncio.Queue(self.config["crawler"]["queue-size"]) 59 | self.continuous = continuous 60 | self.current = None 61 | 62 | def _update_current(self): 63 | self.current = discord.utils.time_snowflake(datetime.now()) 64 | 65 | @staticmethod 66 | def get_last_id(objects): 67 | # pylint: disable=arguments-differ 68 | return max(map(lambda x: x.id, objects)) 69 | 70 | @abc.abstractmethod 71 | async def init(self): 72 | pass 73 | 74 | @abc.abstractmethod 75 | async def read(self, source, last_id): 76 | pass 77 | 78 | @abc.abstractmethod 79 | async def write(self, txact, source, events): 80 | pass 81 | 82 | @abc.abstractmethod 83 | async def update(self, txact, source, last_id): 84 | pass 85 | 86 | def start(self): 87 | self.client.loop.create_task(self.producer()) 88 | self.client.loop.create_task(self.consumer()) 89 | 90 | async def producer(self): 91 | self.logger.info(f"{self.name}: producer coroutine started!") 92 | 93 | # Setup 94 | await self.client.wait_until_ready() 95 | await self.init() 96 | 97 | yield_delay = self.config["crawler"]["delays"]["yield"] 98 | long_delay = self.config["crawler"]["delays"]["empty-source"] 99 | 100 | done = dict.fromkeys(self.progress.keys(), False) 101 | while True: 102 | self._update_current() 103 | 104 | # Round-robin between all sources: 105 | # Tuple because the underlying dictionary may change size 106 | for source, last_id in tuple(self.progress.items()): 107 | if done[source] and not self.continuous: 108 | continue 109 | 110 | try: 111 | events = await self.read(source, last_id) 112 | if events is None: 113 | # This source is exhausted 114 | done[source] = True 115 | await self.queue.put((source, None, self.current)) 116 | self.progress[source] = self.current 117 | else: 118 | # This source still has more 119 | done[source] = False 120 | last_id = self.get_last_id(events) 121 | await self.queue.put((source, events, last_id)) 122 | self.progress[source] = last_id 123 | except discord.DiscordException: 124 | self.logger.error( 125 | f"{self.name}: error during event read", exc_info=1 126 | ) 127 | 128 | if all(done.values()): 129 | self.logger.info( 130 | f"{self.name}: all sources are exhausted, sleeping for a while..." 131 | ) 132 | delay = long_delay 133 | else: 134 | delay = yield_delay 135 | await asyncio.sleep(delay) 136 | 137 | async def consumer(self): 138 | self.logger.info(f"{self.name}: consumer coroutine started!") 139 | 140 | while True: 141 | source, events, last_id = await self.queue.get() 142 | self.logger.info(f"{self.name}: got group of events from queue") 143 | 144 | try: 145 | with self.sql.transaction() as txact: 146 | if events is not None: 147 | await self.write(txact, source, events) 148 | await self.update(txact, source, last_id) 149 | except SQLAlchemyError: 150 | self.logger.error(f"{self.name}: error during event write", exc_info=1) 151 | 152 | self.queue.task_done() 153 | 154 | 155 | class HistoryCrawler(AbstractCrawler): 156 | def __init__(self, client, sql, config, logger=null_logger): 157 | AbstractCrawler.__init__(self, "Channels", client, sql, config, logger) 158 | 159 | def _channel_ok(self, channel): 160 | if channel.guild.id in self.config["guild-ids"]: 161 | return channel.permissions_for(channel.guild.me).read_message_history 162 | return False 163 | 164 | @staticmethod 165 | async def _channel_first(chan): 166 | async for msg in chan.history(limit=1, after=discord.utils.snowflake_time(0)): 167 | return msg.id 168 | return None 169 | 170 | async def init(self): 171 | with self.sql.transaction() as txact: 172 | for guild in map(self.client.get_guild, self.config["guild-ids"]): 173 | for channel in guild.text_channels: 174 | if channel.permissions_for(guild.me).read_message_history: 175 | last_id = self.sql.lookup_channel_crawl(txact, channel) 176 | if last_id is None: 177 | self.sql.insert_channel_crawl(txact, channel, 0) 178 | self.progress[channel] = last_id or 0 179 | 180 | self.client.hooks["on_guild_channel_create"] = self._channel_create_hook 181 | self.client.hooks["on_guild_channel_delete"] = self._channel_delete_hook 182 | self.client.hooks["on_guild_channel_update"] = self._channel_update_hook 183 | 184 | async def read( 185 | self, channel: discord.TextChannel, last_id 186 | ) -> list[discord.message.Message]: 187 | # pylint: disable=arguments-differ 188 | last = discord.utils.snowflake_time(last_id) 189 | limit = self.config["crawler"]["batch-size"] 190 | self.logger.info( 191 | f"Reading through channel {channel.id} ({channel.guild.name} #{channel.name}):" 192 | ) 193 | self.logger.info(f"Starting from ID {last_id} ({last})") 194 | 195 | messages = [ 196 | message async for message in channel.history(after=last, limit=limit) 197 | ] 198 | if messages: 199 | self.logger.info(f"Queued {len(messages)} messages for ingestion") 200 | return messages 201 | else: 202 | self.logger.info("No messages found in this range") 203 | return None 204 | 205 | async def write(self, txact, source, messages): 206 | # pylint: disable=arguments-differ 207 | for message in messages: 208 | self.sql.insert_message(txact, message) 209 | for reaction in message.reactions: 210 | try: 211 | users = [user async for user in reaction.users()] 212 | except discord.NotFound: 213 | self.logger.warn("Unable to find reaction users", exc_info=1) 214 | users = [] 215 | 216 | self.sql.upsert_emoji(txact, reaction.emoji) 217 | self.sql.insert_reaction(txact, reaction, users) 218 | 219 | async def update(self, txact, channel, last_id): 220 | # pylint: disable=arguments-differ 221 | self.sql.update_channel_crawl(txact, channel, last_id) 222 | 223 | def _create_progress(self, channel): 224 | self.progress[channel] = None 225 | 226 | with self.sql.transaction() as txact: 227 | self.sql.insert_channel_crawl(txact, channel, 0) 228 | 229 | def _update_progress(self, channel): 230 | with self.sql.transaction() as txact: 231 | self.sql.update_channel_crawl(txact, channel, self.progress[channel]) 232 | 233 | def _delete_progress(self, channel): 234 | self.progress.pop(channel, None) 235 | 236 | with self.sql.transaction() as txact: 237 | self.sql.delete_channel_crawl(txact, channel) 238 | 239 | async def _channel_create_hook(self, channel): 240 | if not self._channel_ok(channel) or channel in self.progress: 241 | return 242 | 243 | self.logger.info(f"Adding #{channel.name} to tracked channels") 244 | self._create_progress(channel) 245 | 246 | async def _channel_delete_hook(self, channel): 247 | self.logger.info(f"Removing #{channel.name} from tracked channels") 248 | self._delete_progress(channel) 249 | 250 | async def _channel_update_hook(self, before, after): 251 | if not self._channel_ok(before): 252 | return 253 | 254 | if self._channel_ok(after): 255 | if after.id in self.progress: 256 | return 257 | 258 | self.logger.info(f"Updating #{after.name} - adding to list") 259 | self._update_progress(after) 260 | else: 261 | self.logger.info(f"Updating #{after.name} - removing from list") 262 | self._delete_progress(after) 263 | 264 | 265 | class AuditLogCrawler(AbstractCrawler): 266 | def __init__(self, client, sql, config, logger=null_logger): 267 | AbstractCrawler.__init__( 268 | self, "Audit Log", client, sql, config, logger, continuous=True 269 | ) 270 | 271 | async def init(self): 272 | with self.sql.transaction() as txact: 273 | for guild in map(self.client.get_guild, self.config["guild-ids"]): 274 | if guild.me.guild_permissions.view_audit_log: 275 | last_id = self.sql.lookup_audit_log_crawl(txact, guild) 276 | if last_id is None: 277 | self.sql.insert_audit_log_crawl(txact, guild, 0) 278 | self.progress[guild] = last_id or 0 279 | 280 | async def read(self, guild: discord.Guild, last_id) -> list[discord.AuditLogEntry]: 281 | # pylint: disable=arguments-differ 282 | last = discord.utils.snowflake_time(last_id) 283 | limit = self.config["crawler"]["batch-size"] 284 | self.logger.info(f"Reading through {guild.name}'s audit logs") 285 | self.logger.info(f"Starting from ID {last_id} ({last})") 286 | 287 | # Weirdly, .audit_logs() behaves differently from other history functions. 288 | # It will give us entries not in our specified range of "after=last". 289 | # As a simple remedy, we keep on slamming it with requests until it gives 290 | # us the same list twice in a row, and then we know we're done. 291 | entries = [entry async for entry in guild.audit_logs(after=last, limit=limit)] 292 | if entries and self.get_last_id(entries) != last_id: 293 | self.logger.info(f"Queued {len(entries)} audit log entries for ingestion") 294 | return entries 295 | else: 296 | self.logger.info("No audit log entries found in this range") 297 | return None 298 | 299 | async def write(self, txact, guild, entries: list[discord.AuditLogEntry]): 300 | # pylint: disable=arguments-differ 301 | for entry in entries: 302 | self.sql.insert_audit_log_entry(txact, guild, entry) 303 | 304 | async def update(self, txact, guild, last_id): 305 | # pylint: disable=arguments-differ 306 | self.sql.update_audit_log_crawl(txact, guild, last_id) 307 | 308 | 309 | class ThreadCrawler(AbstractCrawler): 310 | def __init__(self, client, sql, config, logger=null_logger): 311 | AbstractCrawler.__init__(self, "Threads", client, sql, config, logger) 312 | 313 | def _channel_ok(self, channel: discord.TextChannel): 314 | if channel.guild.id in self.config["guild-ids"]: 315 | return channel.permissions_for(channel.guild.me).read_message_history 316 | return False 317 | 318 | def _init_progress_for_thread(self, txact, thread: discord.Thread): 319 | last_id = self.sql.lookup_thread_crawl(txact, thread) 320 | if last_id is None: 321 | self.sql.insert_thread_crawl(txact, thread, 0) 322 | self.progress[thread] = last_id or 0 323 | 324 | async def init(self): 325 | with self.sql.transaction() as txact: 326 | for guild in map(self.client.get_guild, self.config["guild-ids"]): 327 | for channel in guild.text_channels: 328 | if not self._channel_ok(channel): 329 | continue 330 | 331 | # public threads 332 | if not channel.permissions_for(guild.me).read_message_history: 333 | continue 334 | for thread in channel.threads: 335 | self._init_progress_for_thread(txact, thread) 336 | async for thread in channel.archived_threads(private=False): 337 | self._init_progress_for_thread(txact, thread) 338 | 339 | # private threads 340 | if not channel.permissions_for(guild.me).manage_threads: 341 | continue 342 | async for thread in channel.archived_threads(private=True): 343 | self._init_progress_for_thread(txact, thread) 344 | 345 | self.client.hooks["on_thread_create"] = self._thread_create_hook 346 | self.client.hooks["on_thread_delete"] = self._thread_delete_hook 347 | self.client.hooks["on_thread_update"] = self._thread_update_hook 348 | 349 | async def read(self, thread: discord.Thread, last_id): 350 | # pylint: disable=arguments-differ 351 | last = discord.utils.snowflake_time(last_id) 352 | limit = self.config["crawler"]["batch-size"] 353 | self.logger.info( 354 | f"Reading through thread {thread.id} (guild {thread.guild.name}, #{thread.parent.name}):" 355 | ) 356 | self.logger.info(f"Starting from ID {last_id} ({last})") 357 | 358 | messages = [ 359 | message async for message in thread.history(after=last, limit=limit) 360 | ] 361 | if messages: 362 | self.logger.info(f"Queued {len(messages)} messages for ingestion") 363 | return messages 364 | else: 365 | self.logger.info("No messages found in this range") 366 | return None 367 | 368 | async def write(self, txact, source, messages): 369 | # pylint: disable=arguments-differ 370 | for message in messages: 371 | self.sql.insert_message(txact, message) 372 | for reaction in message.reactions: 373 | try: 374 | users = [user async for user in reaction.users()] 375 | except discord.NotFound: 376 | self.logger.warn("Unable to find reaction users", exc_info=1) 377 | users = [] 378 | 379 | self.sql.upsert_emoji(txact, reaction.emoji) 380 | self.sql.insert_reaction(txact, reaction, users) 381 | 382 | async def update(self, txact, thread: discord.Thread, last_id): 383 | # pylint: disable=arguments-differ 384 | self.sql.update_thread_crawl(txact, thread, last_id) 385 | 386 | def _create_progress(self, thread: discord.Thread): 387 | self.logger.info(f"Adding #{thread.name} to tracked threads") 388 | 389 | self.progress[thread] = None 390 | 391 | with self.sql.transaction() as txact: 392 | self.sql.insert_thread_crawl(txact, thread, 0) 393 | 394 | def _update_progress(self, thread: discord.Thread): 395 | self.logger.info(f"Updating #{thread.name} in tracked threads") 396 | 397 | with self.sql.transaction() as txact: 398 | self.sql.update_thread_crawl(txact, thread, self.progress[thread]) 399 | 400 | def _delete_progress(self, thread: discord.Thread): 401 | self.logger.info(f"Removing #{thread.name} from tracked threads") 402 | 403 | self.progress.pop(thread, None) 404 | 405 | with self.sql.transaction() as txact: 406 | self.sql.delete_thread_crawl(txact, thread) 407 | 408 | async def _thread_create_hook(self, thread: discord.Thread): 409 | if not self._channel_ok(thread.parent) or thread in self.progress: 410 | return 411 | 412 | self._create_progress(thread) 413 | 414 | async def _thread_delete_hook(self, thread: discord.Thread): 415 | self._delete_progress(thread) 416 | 417 | async def _thread_update_hook(self, before: discord.Thread, after: discord.Thread): 418 | if not self._channel_ok(before.parent): 419 | return 420 | 421 | if not self._channel_ok(after.parent): 422 | self._delete_progress(after) 423 | return 424 | 425 | if after.id in self.progress: 426 | return 427 | 428 | self._update_progress(after) 429 | -------------------------------------------------------------------------------- /SCHEMA.md: -------------------------------------------------------------------------------- 1 | ## Schemas by Table 2 | 3 | ### messages 4 | | Column Name | Type | Other | 5 | |---------------------|---------------|-----------------------------| 6 | | `message_id` | `BigInteger` | Primary key | 7 | | `created_at` | `DateTime` | | 8 | | `edited_at` | `DateTime` | Nullable | 9 | | `deleted_at` | `DateTime` | Nullable | 10 | | `message_type` | `Enum` | `discord.MessageType` | 11 | | `system_content` | `UnicodeText` | | 12 | | `content` | `UnicodeText` | | 13 | | `embeds` | `JSON` | | 14 | | `attachments` | `SmallInteger`| | 15 | | `webhook_id` | `BigInteger` | | 16 | | `user_id` | `BigInteger` | | 17 | | `channel_id` | `BigInteger` | | 18 | | `guild_id` | `BigInteger` | | 19 | 20 | This table is the real heart of Statbot. It stores every single message in all Discord guilds that the bot is in. The fields you most likely care about are `message_id`, that message's globally unique identifier, `content`, what the message contains, and `user_id`, `channel_id`, and `guild_id`, where this message was sent and who sent it. 21 | 22 | These records are only ever appended or updated, not deleted. Messages that have been altered (edited or deleted) are tracked as such by setting those corresponding columns to timestamps telling when the action occurred. 23 | 24 | For information on [`message_type`](https://discordpy.readthedocs.io/en/rewrite/api.html#discord.Message.system_content) or [`system_content`](https://discordpy.readthedocs.io/en/rewrite/api.html#discord.Message.type), see the [discord.py API documentation](https://discordpy.readthedocs.io/en/rewrite/api.html). 25 | 26 | The `embeds` column is a JSON field containing a list of the embeds stored with this messages. This includes both manual embeds (the kind bots send) and automatic embeds (the kind that appear when you post links). For a typical message this will be `{}`. For more information on what fields it may contain, see [`discord.Embed`](https://discordpy.readthedocs.io/en/rewrite/api.html#embed). 27 | 28 | The `attachments` column only stores how many attachments were added to this message. 29 | The actual links to those files are automatically appended to the message's `content`s. 30 | 31 | ### reactions 32 | | Column Name | Type | Other | 33 | |---------------------|---------------|-----------------------------| 34 | | `message_id` | `BigInteger` | | 35 | | `emoji_id` | `BigInteger` | | 36 | | `emoji_unicode` | `Unicode(7)` | | 37 | | `user_id` | `BigInteger` | | 38 | | `created_at` | `BigInteger` | Nullable | 39 | | `deleted_at` | `BigInteger` | Nullable | 40 | | `channel_id` | `BigInteger` | | 41 | | `guild_id` | `BigInteger` | | 42 | 43 | Unique constraint `uq_reactions`: `message_id`, `emoji_id`, `emoji_unicode`, `user_id`, `created_at`. 44 | 45 | This table stores reactions on messages. There is no primary key for this table since you can have duplicate reactions on the same message with the same emote by the same person. However, they are nearly always going to happen at different times, making the unique constraint useful. In cases where this is not the case, the reaction will not be tracked. 46 | 47 | Live reactions have more data than crawled ones. Crawled reactions that are later deleted cannot be seen by Statbot (since they're deleted), and those it does find do not have a corresponding creation timestamp. In either of these cases, the row will either not exist or will have `NULL` for the corresponding column. 48 | 49 | For an explanation of `emoji_id` / `emoji_unicode`, see the "Emoji Data". 50 | 51 | ### typing 52 | | Column Name | Type | Other | 53 | |---------------------|---------------|-----------------------------| 54 | | `timestamp` | `DateTime` | | 55 | | `user_id` | `BigInteger` | | 56 | | `channel_id` | `BigInteger` | | 57 | | `guild_id` | `BigInteger` | | 58 | 59 | Unique constraint `uq_typing`: `timestamp`, `user_id`, `channel_id`, `guild_id`. 60 | 61 | This table tracks the "... is typing" events received by the client. Since they are live, the crawler cannot capture any events of this type. They do not have IDs, so instead the timestamp is sampled from the bot, and then inserted with the other data. 62 | 63 | If you are trying to recreate a channel's activity, note that typing events are intended to last 10 seconds or until that user sends a message in that channel, whichever is earliest. 64 | 65 | ### pins 66 | | Column Name | Type | Other | 67 | |---------------------|---------------|-----------------------------| 68 | | `pin_id` | `BigInteger` | Primary key | 69 | | `message_id` | `BigInteger` | | 70 | | `pinner_id` | `BigInteger` | | 71 | | `user_id` | `BigInteger` | | 72 | | `channel_id` | `BigInteger` | | 73 | | `guild_id` | `BigInteger` | | 74 | 75 | **This is likely what the table will look like when this feature is ready. Anything can change in the mean time.** 76 | 77 | This schema is similar to the `messages` schema, but there are some important details to clarify. Every time you have a pin, there are two users involved: the pinner and the author. These may or may not be the same person. Likewise, you have two IDs: the pin message ID, and the message ID of the item being pinned. 78 | 79 | Whenever you pin something, it creates a system message, that "[user] pinned a message to this channel." This message's "author" is the person who did the pinning. This message's ID is the `pin_id`, and is what is used to track pin ownership (i.e. who pinned which message). 80 | 81 | ### mentions 82 | | Column Name | Type | Other | 83 | |---------------------|---------------|-----------------------------| 84 | | `mentioned_id` | `BigInteger` | Primary key | 85 | | `type` | `Enum` | Primary key | 86 | | `message_id` | `BigInteger` | Primary key | 87 | | `channel_id` | `BigInteger` | | 88 | | `guild_id` | `BigInteger` | | 89 | 90 | Unique constraint `uq_mention`: `mentioned_id`, `type`, `message_id` 91 | 92 | This table tracks mentions. There are three kinds that are noted, which correspond to the possible values of the `type` column: `USER`, `ROLE`, and `CHANNEL`. 93 | 94 | The `mentioned_id` column is the actual thing being mentioned. So if the `type` is `USER`, then this value is likely a user's ID. However, **this value can be invalid!** If users manually construct mentions, or they mention somebody not known to Statbot, these IDs may not correspond to anything. 95 | 96 | A separate row is produced for each mention in a message. Duplicates in a message are ignored, as you can see from the unique constraint. 97 | 98 | Note that `@everyone` and `@here` are special. They are not currently tracked by this table. If you wish to search for them, query `messages` based on the literal strings `@everyone` or `@here`. 99 | 100 | ### guilds 101 | | Column Name | Type | Other | 102 | |---------------------------|-----------------|-----------------------------| 103 | | `guild_id` | `BigInteger` | Primary key | 104 | | `owner_id` | `BigInteger` | | 105 | | `name` | `Unicode` | | 106 | | `icon` | `String` | | 107 | | `voice_region` | `Enum` | `discord.VoiceRegion` | 108 | | `afk_channel_id` | `BigInteger` | Nullable | 109 | | `afk_timeout` | `Integer` | | 110 | | `mfa` | `Boolean` | | 111 | | `verification_level` | `Enum` | `discord.VerificationLevel` | 112 | | `explicit_content_filter` | `Enum` | `discord.ContentFilter` | 113 | | `features` | `Array[String]` | | 114 | | `splash` | `String` | Nullable | 115 | 116 | This table provides look-up information about known guilds. The columns in this schema more or less correspond to the [`discord.Guild` object](https://discordpy.readthedocs.io/en/rewrite/api.html#guild). The biggest change is `mfa` is a boolean instead of an integer, reflecting whether it's enabled or not. 117 | 118 | ### channels 119 | | Column Name | Type | Other | 120 | |---------------------|---------------------|-----------------------------| 121 | | `channel_id` | `BigInteger` | Primary key | 122 | | `name` | `String` | | 123 | | `is_nsfw` | `Boolean` | | 124 | | `is_deleted` | `Boolean` | | 125 | | `position` | `SmallInteger` | | 126 | | `topic` | `UnicodeText` | Nullable | 127 | | `changed_roles` | `Array[BigInteger]` | | 128 | | `category_id` | `BigInteger` | Nullable | 129 | | `guild_id` | `BigInteger` | | 130 | 131 | This table provides look-up information about text channels. The columns in this schema more or less correspond to the [`discord.TextChannel` object](https://discordpy.readthedocs.io/en/rewrite/api.html#textchannel). 132 | 133 | Channels that are deleted while the bot is running are marked as such, with all meta information in the last state the channel was observed in. 134 | 135 | ### voice\_channels 136 | | Column Name | Type | Other | 137 | |---------------------|---------------------|-----------------------------| 138 | | `voice_channel_id` | `BigInteger` | Primary key | 139 | | `name` | `String` | | 140 | | `is_deleted` | `Boolean` | | 141 | | `position` | `SmallInteger` | | 142 | | `bitrate` | `Integer` | | 143 | | `user_limit` | `SmallInteger` | | 144 | | `changed_roles` | `Array[BigInteger]` | | 145 | | `category_id` | `BigInteger` | Nullable | 146 | | `guild_id` | `BigInteger` | | 147 | 148 | This table provides look-up information about voice channels. The columns in this schema more or less correspond to the [`discord.VoiceChannel` object](https://discordpy.readthedocs.io/en/rewrite/api.html#voicechannel). 149 | 150 | Like with text channels, voice channels that are deleted are tracked as such. 151 | 152 | ### channel\_categories 153 | | Column Name | Type | Other | 154 | |---------------------|---------------------|-----------------------------| 155 | | `category_id` | `BigInteger` | Primary key | 156 | | `name` | `Unicode` | | 157 | | `position` | `SmallInteger` | | 158 | | `is_nsfw` | `Boolean` | | 159 | | `is_deleted` | `Boolean` | | 160 | | `changed_roles` | `Array[BigInteger]` | | 161 | | `parent_category_id`| `BigInteger` | Nullable | 162 | | `guild_id` | `BigInteger` | | 163 | 164 | This table provides look-up information about channel categories. The columns in this schema more or less correspond to the [`discord.CategoryChannel` object](https://discordpy.readthedocs.io/en/rewrite/api.html#categorychannel). 165 | 166 | Like with text channels, channel categories that are deleted are tracked as such. 167 | 168 | ### users 169 | | Column Name | Type | Other | 170 | |---------------------|---------------|-----------------------------| 171 | | `user_id` | `BigInteger` | Primary key | 172 | | `name` | `Unicode` | | 173 | | `discriminator` | `SmallInteger`| | 174 | | `avatar` | `String` | Nullable | 175 | | `is_deleted` | `Boolean` | | 176 | | `is_bot` | `Boolean` | | 177 | 178 | This table provides look-up information about all known users. The columns in this schema more or less correspond to the [`discord.User` object](https://discordpy.readthedocs.io/en/rewrite/api.html#user). 179 | 180 | Like with text channels, users that delete their accounts are tracked as such. 181 | 182 | If you want to query a user's nicknames, see the `guild_membership` table. 183 | 184 | ### roles 185 | | Column Name | Type | Other | 186 | |---------------------|---------------|-----------------------------| 187 | | `role_id` | `BigInteger` | | 188 | | `name` | `Unicode` | | 189 | | `color` | `Integer` | | 190 | | `raw_permissions` | `BigInteger` | | 191 | | `guild_id` | `BigInteger` | | 192 | | `is_hoisted` | `Boolean` | | 193 | | `is_managed` | `Boolean` | | 194 | | `is_mentionable` | `Boolean` | | 195 | | `is_deleted` | `Boolean` | | 196 | | `position` | `SmallInteger`| | 197 | 198 | This table provides look-up information about roles. The columns in this schema more or less correspond to the [`discord.Role` object](https://discordpy.readthedocs.io/en/rewrite/api.html#role). 199 | 200 | Like with text channels, roles that are deleted are tracked as such. 201 | 202 | ### guild\_membership 203 | | Column Name | Type | Other | 204 | |---------------------|---------------|-----------------------------| 205 | | `user_id` | `BigInteger` | Primary key | 206 | | `guild_id` | `BigInteger` | Primary key | 207 | | `is_member` | `Boolean` | | 208 | | `joined_at` | `DateTime` | Nullable | 209 | | `nick` | `Unicode(32)` | Nullable | 210 | 211 | Unique constraint `uq_guild_membership`: `user_id`, `guild_id`. 212 | 213 | This table tracks which users are currently members in which guilds. If a user is currently a member of a guild, then the `is_member` column will be `true` and the `joined_at` column will be non-`NULL`. Note that it is possible for `is_member` to be `false` but have the `joined_at` column not be `NULL`. This is likely because the user joined the guild but then left. 214 | 215 | This table also tracks user's nicknames per guild. If the `nick` column is 216 | `NULL`, they have no nickname, and their display name is just their username. 217 | 218 | ### role\_membership 219 | | Column Name | Type | Other | 220 | |---------------------|---------------|-----------------------------| 221 | | `role_id` | `BigInteger` | | 222 | | `guild_id` | `BigInteger` | | 223 | | `user_id` | `BigInteger` | | 224 | 225 | Unique constraint `uq_role_membership`: `role_id`, `user_id`. 226 | 227 | This table tracks which users have which roles. At the very least, this includes the `@everyone` role of that guild, which you can see from the presence of a row with a `role_id` and `guild_id` that have the same value. 228 | 229 | Note that this is _not_ a way to check for membership. When a member leaves 230 | a guild, all the rows in this table are preserved until they decide to rejoin 231 | (and their roles are reassigned). If you want to see if a user is aa member of 232 | a guild, use the `guild_membership` table. 233 | 234 | ### emojis 235 | | Column Name | Type | Other | 236 | |---------------------|---------------------|-----------------------------| 237 | | `emoji_id` | `BigInteger` | | 238 | | `emoji_unicode` | `Unicode(7)` | | 239 | | `is_custom` | `Boolean` | | 240 | | `is_managed` | `Boolean` | Nullable | 241 | | `is_deleted` | `Boolean` | | 242 | | `name` | `Array[String]` | | 243 | | `category` | `Array[String]` | | 244 | | `roles` | `Array[BigInteger]` | | 245 | | `guild_id` | `BigInteger` | | 246 | 247 | Unique constraint `uq_emoji`: `emoji_id` and `emoji_unicode`. 248 | 249 | For an explanation of `emoji_id` / `emoji_unicode`, see the "Emoji Data". 250 | 251 | If the emoji is custom, then: 252 | * the `is_custom` column will be set to `true` 253 | * the `name` column will be an array of one, holding its assigned name 254 | * the `category` column will be an array of one, with the string `custom` 255 | * the `roles` column will be a list of which role IDs are permitted to use this emoji 256 | * if the above array is empty it means that there are no restrictions imposed 257 | * the `guild_id` column will have ID of the guild this emoji belongs to 258 | 259 | If the emoji is unicode, then: 260 | * the `is_custom` column will be set to `false` 261 | * the `is_managed` and `is_deleted` columns will be set to `false` 262 | * the `name` column will be an array of each unicode character's name in the string 263 | * the `category` column will be an array of each unicode character's category in the string 264 | * the `roles` column will be an empty array 265 | * the `guild_id` column will be null 266 | 267 | ### audit\_log 268 | | Column Name | Type | Other | 269 | |---------------------|---------------------|--------------------------------------------| 270 | | `audit_entry_id` | `BigInteger` | Primary key | 271 | | `guild_id` | `BigInteger` | | 272 | | `action` | `Enum` | `discord.AuditLogAction` | 273 | | `user_id` | `BigInteger` | | 274 | | `reason` | `Unicode` | Nullable | 275 | | `category` | `Enum` | `discord.AuditLogActionCategory`, nullable | 276 | | `before` | `JSON` | | 277 | | `after` | `JSON` | | 278 | 279 | Unique constraint `uq_audit_log`: `audit_entry_id` and `guild_id`. 280 | 281 | This table records a particular entry in the audit log. It stores the entry ID and the associated guild, as well as which action was being performed. The `user_id` column refers to who was _performing_ the action, not the recipeient. `category` is a general type dictating whether this was some kind of creation, deletion, or update. It can also be `NULL`. 282 | 283 | The `before` and `after` columns contain the change storted by this log entry. These JSON objects should have the same set of keys, and may contain `null` values. For instance, a record of a person removing their nickname would contain the following before and after JSON objects: 284 | 285 | ```js 286 | /* Before */ 287 | { 288 | "nick": "Johnny" 289 | } 290 | 291 | /* After */ 292 | { 293 | "nick": null 294 | } 295 | ``` 296 | 297 | (See also: the [discord.py documention](https://discordpy.readthedocs.io/en/rewrite/api.html#audit-log-data) 298 | on the subject) 299 | 300 | ### Crawl Tables 301 | The tables `channel_crawl` and `audit_log_crawl` are used internally by the crawler to track how far along its progress is. These tables should not be used by clients, and their schemas can change at any time for any reason. 302 | 303 | ### Emoji Data 304 | The distinction between "`emoji_id`" and "`emoji_unicode`" exists because Discord labels two very different objects as "emojis". One are "native" emojis, which are really unicode "characters"\*. The other are "discord" or custom emojis, which are an image uploaded to a particular Discord guild. They have an assigned emoji ID and a name. Custom emojis are always attached to a guild, and may also be provided from outside sources, such as Twitch. 305 | 306 | To capture this union of two different types, two columns are used. If the emote is a unicode one, then the `emoji_id` column is set to `0` and the `emoji_unicode` column is a literal copy of the unicode string presented as the emoji. Likewise, if the emote is a custom one, the `emoji_id` is set to that emoji's globally unique ID and the `emoji_unicode` column is set to an empty string. 307 | 308 | \* Unicode emojis may be more than one code point, as it may have modifiers like skin tone, or be composite emojis, such as the US flag, which is made of regional indicator characters, including spacer characters in between. For this reason, the type of the `emoji_unicode` column is `Unicode(7)`. 309 | -------------------------------------------------------------------------------- /statbot/client.py: -------------------------------------------------------------------------------- 1 | # 2 | # client.py 3 | # 4 | # statbot - Store Discord records for later analysis 5 | # Copyright (c) 2017-2018 Ammon Smith 6 | # 7 | # statbot is available free of charge under the terms of the MIT 8 | # License. You are free to redistribute and/or modify it under those 9 | # terms. It is distributed in the hopes that it will be useful, but 10 | # WITHOUT ANY WARRANTY. See the LICENSE file for more details. 11 | # 12 | 13 | import re 14 | import sys 15 | 16 | from datetime import datetime 17 | from io import BytesIO 18 | import asyncio 19 | import discord 20 | 21 | from .emoji import EmojiData 22 | from .sql import DiscordSqlHandler 23 | from .util import null_logger 24 | 25 | __all__ = [ 26 | "EventIngestionClient", 27 | ] 28 | 29 | EXTENSION_REGEX = re.compile(r"/\w+\.(\w+)(?:\?.+)?$") 30 | 31 | 32 | def user_needs_update(before, after): 33 | """ 34 | See if the given user update is something 35 | we care about. 36 | 37 | Returns 'False' for no difference or 38 | change we will ignore. 39 | """ 40 | 41 | for attr in ("name", "discriminator", "avatar"): 42 | if getattr(before, attr) != getattr(after, attr): 43 | return True 44 | return False 45 | 46 | 47 | def member_needs_update(before, after): 48 | """ 49 | See if the given member update is something 50 | we care about. 51 | 52 | Returns 'False' for no difference or 53 | change we will ignore. 54 | """ 55 | 56 | for attr in ("nick", "avatar", "roles"): 57 | if getattr(before, attr) != getattr(after, attr): 58 | return True 59 | return False 60 | 61 | 62 | class EventIngestionClient(discord.Client): 63 | __slots__ = ( 64 | "config", 65 | "logger", 66 | "sql", 67 | "crawlers", 68 | "crawler_logger", 69 | "ready", 70 | "sql_init", 71 | "hooks", 72 | ) 73 | 74 | def __init__( 75 | self, 76 | config, 77 | sql: DiscordSqlHandler, 78 | logger=null_logger, 79 | crawlers=None, 80 | crawler_logger=null_logger, 81 | ): 82 | super().__init__(intents=discord.Intents.all()) 83 | self.config = config 84 | self.logger = logger 85 | self.sql = sql 86 | self.crawlers = crawlers 87 | self.crawler_logger = crawler_logger 88 | self.sql_init = False 89 | self.hooks = { 90 | "on_guild_channel_create": None, 91 | "on_guild_channel_delete": None, 92 | "on_guild_channel_update": None, 93 | "on_thread_create": None, 94 | "on_thread_delete": None, 95 | "on_thread_update": None, 96 | } 97 | 98 | def run_with_token(self): 99 | return self.run(self.config["bot"]["token"]) 100 | 101 | # Async initialization hook. See 102 | # https://gist.github.com/Rapptz/6706e1c8f23ac27c98cee4dd985c8120 103 | async def setup_hook(self): 104 | self.ready = asyncio.Event(loop=self.loop) 105 | 106 | if self.crawlers is None: 107 | return 108 | 109 | for Crawler in self.crawlers: 110 | crawler = Crawler(self, self.sql, self.config, self.crawler_logger) 111 | crawler.start() 112 | 113 | async def wait_until_ready(self): 114 | # Override wait method to wait until SQL data is also ready 115 | # At least as long as "await super().wait_until_ready()" 116 | await self.ready.wait() 117 | 118 | async def _accept_message(self, message): 119 | await self.wait_until_ready() 120 | 121 | if not hasattr(message, "guild"): 122 | self._log_ignored("Message not from a guild.") 123 | self._log_ignored("Ignoring message.") 124 | return False 125 | elif getattr(message.guild, "id", None) not in self.config["guild-ids"]: 126 | self._log_ignored("Message from a guild we don't care about.") 127 | self._log_ignored("Ignoring message.") 128 | return False 129 | elif message.type != discord.MessageType.default: 130 | self._log_ignored("Special type of message receieved.") 131 | self._log_ignored("Ignoring message.") 132 | else: 133 | return True 134 | 135 | async def _accept_channel(self, channel): 136 | await self.wait_until_ready() 137 | 138 | if not hasattr(channel, "guild"): 139 | self._log_ignored("Channel not in a guild.") 140 | self._log_ignored("Ignoring message.") 141 | elif getattr(channel.guild, "id", None) not in self.config["guild-ids"]: 142 | self._log_ignored("Event from a guild we don't care about.") 143 | self._log_ignored("Ignoring message.") 144 | return False 145 | else: 146 | return True 147 | 148 | async def _accept_guild(self, guild): 149 | await self.wait_until_ready() 150 | 151 | if getattr(guild, "id", None) not in self.config["guild-ids"]: 152 | self._log_ignored("Event from a guild we don't care about.") 153 | self._log_ignored("Ignoring message.") 154 | return False 155 | else: 156 | return True 157 | 158 | def _log(self, message, action): 159 | name = message.author.display_name 160 | guild = message.guild.name 161 | chan = message.channel.name 162 | 163 | self.logger.debug(f"Message {action} by {name} in {guild} #{chan}") 164 | if self.config["logger"]["full-messages"]: 165 | self.logger.info("") 166 | self.logger.info(message.content) 167 | self.logger.info("") 168 | 169 | def _log_typing(self, channel, user): 170 | name = user.display_name 171 | guild = channel.guild.name 172 | chan = channel.name 173 | 174 | self.logger.debug(f"Typing by {name} on {guild} #{chan}") 175 | 176 | def _log_react(self, reaction, user, action): 177 | name = user.display_name 178 | emote = EmojiData(reaction.emoji) 179 | count = reaction.count 180 | id = reaction.message.id 181 | 182 | self.logger.debug(f"{name} {action} {emote} (total {count}) on message id {id}") 183 | 184 | def _log_ignored(self, message): 185 | if self.config["logger"]["ignored-events"]: 186 | self.logger.debug(message) 187 | 188 | def _init_sql(self, txact): 189 | self.logger.info(f"Processing {len(self.users)} users...") 190 | for user in self.users: 191 | self.sql.upsert_user(txact, user) 192 | 193 | self.logger.info(f"Processing {len(self.guilds)} guilds...") 194 | allowed_guilds = [ 195 | guild for guild in self.guilds if guild.id in set(self.config["guild-ids"]) 196 | ] 197 | for guild in allowed_guilds: 198 | self.sql.upsert_guild(txact, guild) 199 | 200 | self.logger.info(f"Processing {len(guild.roles)} roles...") 201 | for role in guild.roles: 202 | self.sql.upsert_role(txact, role) 203 | 204 | self.logger.info(f"Processing {len(guild.emojis)} emojis...") 205 | for emoji in guild.emojis: 206 | self.sql.upsert_emoji(txact, emoji) 207 | 208 | self.logger.info(f"Processing {len(guild.members)} members...") 209 | for member in guild.members: 210 | self.sql.upsert_member(txact, member) 211 | 212 | # In case people left while the bot was down 213 | self.sql.remove_old_members(txact, guild) 214 | 215 | text_channels = [] 216 | voice_channels = [] 217 | categories = [] 218 | for channel in guild.channels: 219 | if isinstance(channel, discord.TextChannel): 220 | text_channels.append(channel) 221 | elif isinstance(channel, discord.VoiceChannel): 222 | voice_channels.append(channel) 223 | elif isinstance(channel, discord.CategoryChannel): 224 | categories.append(channel) 225 | 226 | self.logger.info(f"Processing {len(categories)} channel categories...") 227 | for category in categories: 228 | self.sql.upsert_channel_category(txact, category) 229 | 230 | self.logger.info(f"Processing {len(text_channels)} channels...") 231 | for channel in text_channels: 232 | self.sql.upsert_channel(txact, channel) 233 | 234 | self.logger.info(f"Processing {len(voice_channels)} voice channels...") 235 | for channel in voice_channels: 236 | self.sql.upsert_voice_channel(txact, channel) 237 | 238 | async def on_ready(self): 239 | # Print welcome string 240 | self.logger.info(f"Logged in as {self.user.name} ({self.user.id})") 241 | self.logger.info("Recording activity in the following guilds:") 242 | for id in self.config["guild-ids"]: 243 | guild = self.get_guild(id) 244 | if guild is not None: 245 | self.logger.info(f"* {guild.name} ({id})") 246 | else: 247 | self.logger.error(f"Unable to find guild ID {id}") 248 | sys.exit(1) 249 | 250 | if not self.sql_init: 251 | self.logger.info("Initializing SQL lookup tables...") 252 | with self.sql.transaction() as txact: 253 | self._init_sql(txact) 254 | self.sql_init = True 255 | 256 | # All done setting up 257 | self.logger.info("") 258 | self.logger.info("Ready!") 259 | self.ready.set() 260 | 261 | async def on_message(self, message): 262 | self._log_ignored(f"Message id {message.id} created") 263 | if not await self._accept_message(message): 264 | return 265 | 266 | self._log(message, "created") 267 | 268 | with self.sql.transaction() as txact: 269 | self.sql.add_message(txact, message) 270 | 271 | async def on_message_edit(self, before, after): 272 | self._log_ignored(f"Message id {after.id} edited") 273 | if not await self._accept_message(after): 274 | return 275 | 276 | self._log(after, "edited") 277 | 278 | with self.sql.transaction() as txact: 279 | self.sql.edit_message(txact, before, after) 280 | 281 | async def on_message_delete(self, message): 282 | self._log_ignored(f"Message id {message.id} deleted") 283 | if not await self._accept_message(message): 284 | return 285 | 286 | self._log(message, "deleted") 287 | 288 | with self.sql.transaction() as txact: 289 | self.sql.remove_message(txact, message) 290 | 291 | async def on_typing(self, channel, user, when): 292 | self._log_ignored(f"User id {user.id} is typing") 293 | if not await self._accept_channel(channel): 294 | return 295 | 296 | self._log_typing(channel, user) 297 | 298 | with self.sql.transaction() as txact: 299 | self.sql.typing(txact, channel, user, when) 300 | 301 | async def on_reaction_add(self, reaction, user): 302 | self._log_ignored(f"Reaction {reaction.emoji} added") 303 | if not await self._accept_message(reaction.message): 304 | return 305 | 306 | self._log_react(reaction, user, "reacted with") 307 | 308 | with self.sql.transaction() as txact: 309 | self.sql.add_reaction(txact, reaction, user) 310 | 311 | async def on_reaction_remove(self, reaction, user): 312 | self._log_ignored(f"Reaction {reaction.emoji} removed") 313 | if not await self._accept_message(reaction.message): 314 | return 315 | 316 | self._log_react(reaction, user, "removed a reaction of ") 317 | 318 | with self.sql.transaction() as txact: 319 | self.sql.remove_reaction(txact, reaction, user) 320 | 321 | async def on_reaction_clear(self, message, reactions): 322 | self._log_ignored(f"Reactions from {message.id} cleared") 323 | if not await self._accept_message(message): 324 | return 325 | 326 | self.logger.info(f"All reactions on message id {message.id} cleared") 327 | 328 | with self.sql.transaction() as txact: 329 | self.sql.clear_reactions(txact, message) 330 | 331 | async def on_guild_channel_create(self, channel): 332 | self._log_ignored(f"Channel was created in guild {channel.guild.id}") 333 | if not await self._accept_channel(channel): 334 | return 335 | 336 | if isinstance(channel, discord.VoiceChannel): 337 | self.logger.info( 338 | f"Voice channel {channel.name} created in {channel.guild.name}" 339 | ) 340 | with self.sql.transaction() as txact: 341 | self.sql.add_voice_channel(txact, channel) 342 | return 343 | 344 | self.logger.info(f"Channel #{channel.name} created in {channel.guild.name}") 345 | with self.sql.transaction() as txact: 346 | self.sql.add_channel(txact, channel) 347 | 348 | # pylint: disable=not-callable 349 | hook = self.hooks["on_guild_channel_create"] 350 | if hook: 351 | self.logger.debug(f"Found hook {hook!r}, calling it") 352 | await hook(channel) 353 | 354 | async def on_guild_channel_delete(self, channel): 355 | self._log_ignored(f"Channel was deleted in guild {channel.guild.id}") 356 | if not await self._accept_channel(channel): 357 | return 358 | 359 | if isinstance(channel, discord.VoiceChannel): 360 | self.logger.info( 361 | f"Voice channel {channel.name} deleted in {channel.guild.name}" 362 | ) 363 | with self.sql.transaction() as txact: 364 | self.sql.remove_voice_channel(txact, channel) 365 | return 366 | 367 | self.logger.info(f"Channel #{channel.name} deleted in {channel.guild.name}") 368 | with self.sql.transaction() as txact: 369 | self.sql.remove_channel(txact, channel) 370 | 371 | # pylint: disable=not-callable 372 | hook = self.hooks["on_guild_channel_delete"] 373 | if hook: 374 | self.logger.debug(f"Found hook {hook!r}, calling it") 375 | await hook(channel) 376 | 377 | async def on_guild_channel_update(self, before, after): 378 | self._log_ignored(f"Channel was updated in guild {after.guild.id}") 379 | if not await self._accept_channel(after): 380 | return 381 | 382 | if before.name != after.name: 383 | changed = f" (now {after.name})" 384 | else: 385 | changed = "" 386 | 387 | if isinstance(after, discord.TextChannel): 388 | self.logger.info( 389 | f"Channel #{before.name}{changed} was changed in {after.guild.name}" 390 | ) 391 | 392 | with self.sql.transaction() as txact: 393 | self.sql.update_channel(txact, after) 394 | 395 | # pylint: disable=not-callable 396 | hook = self.hooks["on_guild_channel_update"] 397 | if hook: 398 | self.logger.debug(f"Found hook {hook!r}, calling it") 399 | await hook(before, after) 400 | elif isinstance(after, discord.VoiceChannel): 401 | self.logger.info( 402 | "Voice channel {before.name}{changed} was changed in {after.guild.name}" 403 | ) 404 | 405 | with self.sql.transaction() as txact: 406 | self.sql.update_voice_channel(txact, after) 407 | elif isinstance(after, discord.CategoryChannel): 408 | self.logger.info( 409 | f"Channel category {before.name}{changed} was changed in {after.guild.name}" 410 | ) 411 | 412 | with self.sql.transaction() as txact: 413 | self.sql.update_channel_category(txact, after) 414 | 415 | async def on_guild_channel_pins_update(self, channel, last_pin): 416 | self._log_ignored(f"Channel {channel.id} got a pin update") 417 | if not await self._accept_channel(channel): 418 | return 419 | 420 | self.logger.debug(f"Channel #{channel.name} got a pin update") 421 | self.logger.warn("TODO: handling for on_guild_channel_pins_update") 422 | 423 | async def on_member_join(self, member): 424 | self._log_ignored(f"Member {member.id} joined guild {member.guild.id}") 425 | if not await self._accept_guild(member.guild): 426 | return 427 | 428 | self.logger.debug(f"Member {member.name} has joined {member.guild.name}") 429 | 430 | with self.sql.transaction() as txact: 431 | self.sql.upsert_user(txact, member) 432 | self.sql.upsert_member(txact, member) 433 | 434 | async def on_member_remove(self, member): 435 | self._log_ignored(f"Member {member.id} left guild {member.guild.id}") 436 | if not await self._accept_guild(member.guild): 437 | return 438 | 439 | self.logger.debug(f"Member {member.name} has left {member.guild.name}") 440 | 441 | with self.sql.transaction() as txact: 442 | self.sql.remove_user(txact, member) 443 | self.sql.remove_member(txact, member) 444 | 445 | async def on_member_update(self, before, after): 446 | self._log_ignored(f"Member {after.id} was updated in guild {after.guild.id}") 447 | if not await self._accept_guild(after.guild): 448 | return 449 | 450 | if not member_needs_update(before, after): 451 | self._log_ignored("We don't care about this type of member update") 452 | return 453 | 454 | if before.display_name != after.display_name: 455 | changed = f" (now {after.name})" 456 | else: 457 | changed = "" 458 | self.logger.debug( 459 | f"Member {before.display_name}{changed} was changed in {after.guild.name}" 460 | ) 461 | 462 | with self.sql.transaction() as txact: 463 | now = datetime.now() 464 | self.sql.update_member(txact, after) 465 | 466 | if before.nick != after.nick and after.nick is not None: 467 | self.sql.add_nickname(txact, before, now, after.nick) 468 | 469 | async def on_user_update(self, before: discord.User, after: discord.User): 470 | self._log_ignored(f"User {after.id} was updated") 471 | 472 | if not user_needs_update(before, after): 473 | self._log_ignored("We don't care about this kind user update") 474 | return 475 | 476 | if before.display_name != after.display_name: 477 | changed = f" (now {after.name})" 478 | else: 479 | changed = "" 480 | self.logger.debug(f"User {before.display_name}{changed} was changed") 481 | 482 | with self.sql.transaction() as txact: 483 | now = datetime.now() 484 | self.sql.update_user(txact, after) 485 | 486 | if before.avatar != after.avatar and after.avatar is not None: 487 | avatar, avatar_ext = await self.get_avatar(after.avatar) 488 | self.sql.add_avatar(txact, before, now, avatar, avatar_ext) 489 | 490 | if before.name != after.name: 491 | self.sql.add_username(txact, before, now, after.name) 492 | 493 | async def get_avatar(self, asset: discord.Asset) -> tuple[BytesIO, str]: 494 | avatar = BytesIO() 495 | await asset.save(avatar) 496 | 497 | match = EXTENSION_REGEX.findall(asset.url) 498 | if not match: 499 | raise ValueError(f"Avatar URL does not match extension regex: {asset.url}") 500 | avatar_ext = match[0] 501 | return avatar, avatar_ext 502 | 503 | async def on_guild_role_create(self, role): 504 | self._log_ignored(f"Role {role.id} was created in guild {role.guild.id}") 505 | if not await self._accept_guild(role.guild): 506 | return 507 | 508 | self.logger.info(f"Role {role.name} was created in {role.guild.name}") 509 | 510 | with self.sql.transaction() as txact: 511 | self.sql.add_role(txact, role) 512 | 513 | async def on_guild_role_delete(self, role): 514 | self._log_ignored(f"Role {role.id} was created in guild {role.guild.id}") 515 | if not await self._accept_guild(role.guild): 516 | return 517 | 518 | self.logger.info(f"Role {role.name} was deleted in {role.guild.name}") 519 | 520 | with self.sql.transaction() as txact: 521 | self.sql.remove_role(txact, role) 522 | 523 | async def on_guild_role_update(self, before, after): 524 | self._log_ignored(f"Role {after.id} was created in guild {after.guild.id}") 525 | if not await self._accept_guild(after.guild): 526 | return 527 | 528 | if before.name != after.name: 529 | changed = f" (now {after.name})" 530 | else: 531 | changed = "" 532 | self.logger.info( 533 | f"Role {before.name}{changed} was changed in {after.guild.name}" 534 | ) 535 | 536 | with self.sql.transaction() as txact: 537 | self.sql.update_role(txact, after) 538 | 539 | async def on_guild_emojis_update(self, guild, before, after): 540 | before = set(before) 541 | after = set(before) 542 | 543 | with self.sql.transaction() as txact: 544 | for emoji in after - before: 545 | self.sql.add_emoji(txact, emoji) 546 | for emoji in before - after: 547 | self.sql.remove_emoji(txact, emoji) 548 | 549 | async def on_thread_create(self, thread: discord.Thread): 550 | self._log_ignored(f"Thread was created in guild {thread.guild.id}") 551 | if not await self._accept_channel(thread.parent): 552 | return 553 | 554 | self.logger.info( 555 | f"Thread {thread.name} created in guild {thread.guild.name}, channel {thread.parent.name}" 556 | ) 557 | with self.sql.transaction() as txact: 558 | self.sql.add_thread(txact, thread) 559 | 560 | hook = self.hooks["on_thread_create"] 561 | if hook: 562 | self.logger.debug(f"Found hook {hook!r}, calling it") 563 | await hook(thread) 564 | 565 | async def on_thread_delete(self, thread: discord.Thread): 566 | self._log_ignored(f"Thread was deleted in guild {thread.guild.id}") 567 | if not await self._accept_channel(thread.parent): 568 | return 569 | 570 | self.logger.info( 571 | f"Thread {thread.name} deleted in guild {thread.guild.name}, channel {thread.parent.name}" 572 | ) 573 | with self.sql.transaction() as txact: 574 | self.sql.remove_thread(txact, thread) 575 | 576 | hook = self.hooks["on_thread_delete"] 577 | if hook: 578 | self.logger.debug(f"Found hook {hook!r}, calling it") 579 | await hook(thread) 580 | 581 | async def on_thread_update(self, before: discord.Thread, after: discord.Thread): 582 | self._log_ignored(f"Thread was updated in guild {after.guild.id}") 583 | if not await self._accept_channel(after.parent): 584 | return 585 | 586 | changed = f" (now {after.name})" if before.name != after.name else "" 587 | 588 | self.logger.info( 589 | ( 590 | f"Thread {before.name}{changed} changed in guild {after.guild.name}, " 591 | f"channel {after.parent.name}" 592 | ) 593 | ) 594 | with self.sql.transaction() as txact: 595 | self.sql.update_thread(txact, after) 596 | 597 | hook = self.hooks["on_thread_update"] 598 | if hook: 599 | self.logger.debug(f"Found hook {hook!r}, calling it") 600 | await hook(before, after) 601 | 602 | async def on_thread_member_join(self, member: discord.ThreadMember): 603 | self._log_ignored(f"User id {member.id} joined thread {member.thread.name}") 604 | if not await self._accept_channel(member.thread.parent): 605 | return 606 | 607 | with self.sql.transaction() as txact: 608 | self.sql.add_thread_member(txact, member) 609 | 610 | async def on_thread_member_remove(self, member: discord.ThreadMember): 611 | self._log_ignored(f"User id {member.id} left thread {member.thread.name}") 612 | if not await self._accept_channel(member.thread.parent): 613 | return 614 | 615 | with self.sql.transaction() as txact: 616 | self.sql.remove_thread_member(txact, member) 617 | -------------------------------------------------------------------------------- /statbot/sql.py: -------------------------------------------------------------------------------- 1 | # 2 | # sql.py 3 | # 4 | # statbot - Store Discord records for later analysis 5 | # Copyright (c) 2017-2018 Ammon Smith 6 | # 7 | # statbot is available free of charge under the terms of the MIT 8 | # License. You are free to redistribute and/or modify it under those 9 | # terms. It is distributed in the hopes that it will be useful, but 10 | # WITHOUT ANY WARRANTY. See the LICENSE file for more details. 11 | # 12 | 13 | from collections import namedtuple 14 | from datetime import datetime 15 | import functools 16 | import io 17 | import random 18 | 19 | from alembic import command 20 | from alembic.config import Config 21 | from alembic.migration import MigrationContext 22 | import discord 23 | from sqlalchemy import create_engine, and_, Column, inspect 24 | from sqlalchemy.sql import select 25 | from sqlalchemy.dialects.postgresql import insert as p_insert 26 | 27 | from .audit_log import AuditLogData 28 | from .cache import LruCache 29 | from .emoji import EmojiData 30 | from .mention import MentionType 31 | from .schema import DiscordMetadata 32 | from .util import int_hash, null_logger 33 | 34 | Column = functools.partial(Column, nullable=False) 35 | FakeMember = namedtuple("FakeMember", ("guild", "id")) 36 | 37 | MAX_ID = 2**63 - 1 38 | 39 | __all__ = [ 40 | "DiscordSqlHandler", 41 | ] 42 | 43 | 44 | # Value builders 45 | def guild_values(guild: discord.Guild): 46 | return { 47 | "guild_id": guild.id, 48 | "int_owner_id": int_hash(guild.owner.id), 49 | "name": guild.name, 50 | "icon": "" if guild.icon is None else guild.icon.url, 51 | "voice_region": "deprecated", 52 | "afk_channel_id": getattr(guild.afk_channel, "id", None), 53 | "afk_timeout": guild.afk_timeout, 54 | "mfa": bool(guild.mfa_level), 55 | "verification_level": guild.verification_level, 56 | "explicit_content_filter": guild.explicit_content_filter, 57 | "features": guild.features, 58 | "splash": "" if guild.splash is None else guild.splash.key, 59 | } 60 | 61 | 62 | def message_values(message: discord.Message, is_in_thread=False): 63 | if message.type == discord.MessageType.default: 64 | system_content = "" 65 | else: 66 | system_content = message.system_content 67 | 68 | attach_urls = "\n".join(attach.url for attach in message.attachments) 69 | if message.content: 70 | content = "\n".join((message.content, attach_urls)) 71 | else: 72 | content = attach_urls 73 | 74 | return { 75 | "message_id": message.id, 76 | "created_at": message.created_at, 77 | "edited_at": message.edited_at, 78 | "deleted_at": None, 79 | "message_type": message.type, 80 | "system_content": system_content, 81 | "content": content.replace("\0", " "), 82 | "embeds": [embed.to_dict() for embed in message.embeds], 83 | "attachments": len(message.attachments), 84 | "webhook_id": message.webhook_id, 85 | "int_user_id": int_hash(message.author.id), 86 | "channel_id": message.channel.id if not is_in_thread else None, 87 | "thread_id": message.channel.id if is_in_thread else None, 88 | "guild_id": message.guild.id, 89 | } 90 | 91 | 92 | def channel_values(channel): 93 | return { 94 | "channel_id": channel.id, 95 | "name": channel.name, 96 | "is_nsfw": channel.is_nsfw(), 97 | "is_deleted": False, 98 | "position": channel.position, 99 | "topic": channel.topic, 100 | "changed_roles": [role.id for role in channel.changed_roles], 101 | "category_id": getattr(channel.category, "id", None), 102 | "guild_id": channel.guild.id, 103 | } 104 | 105 | 106 | def voice_channel_values(channel): 107 | return { 108 | "voice_channel_id": channel.id, 109 | "name": channel.name, 110 | "is_deleted": False, 111 | "position": channel.position, 112 | "bitrate": channel.bitrate, 113 | "user_limit": channel.user_limit, 114 | "changed_roles": [role.id for role in channel.changed_roles], 115 | "category_id": getattr(channel.category, "id", None), 116 | "guild_id": channel.guild.id, 117 | } 118 | 119 | 120 | def channel_categories_values(category): 121 | return { 122 | "category_id": category.id, 123 | "name": category.name, 124 | "position": category.position, 125 | "is_deleted": False, 126 | "is_nsfw": category.is_nsfw(), 127 | "parent_category_id": getattr(category.category, "id", None), 128 | "changed_roles": [role.id for role in category.changed_roles], 129 | "guild_id": category.guild.id, 130 | } 131 | 132 | 133 | def user_values(user, deleted=False): 134 | return { 135 | "int_user_id": int_hash(user.id), 136 | "real_user_id": user.id, 137 | "name": user.name, 138 | "discriminator": user.discriminator, 139 | "avatar": None if user.avatar is None else user.avatar.url, 140 | "is_deleted": deleted, 141 | "is_bot": user.bot, 142 | } 143 | 144 | 145 | def guild_member_values(member): 146 | return { 147 | "int_user_id": int_hash(member.id), 148 | "guild_id": member.guild.id, 149 | "is_member": True, 150 | "joined_at": member.joined_at, 151 | "nick": member.nick, 152 | } 153 | 154 | 155 | def role_member_values(member, role): 156 | return { 157 | "role_id": role.id, 158 | "guild_id": role.guild.id, 159 | "int_user_id": int_hash(member.id), 160 | } 161 | 162 | 163 | def role_values(role): 164 | return { 165 | "role_id": role.id, 166 | "name": role.name, 167 | "color": role.color.value, 168 | "raw_permissions": role.permissions.value, 169 | "guild_id": role.guild.id, 170 | "is_hoisted": role.hoist, 171 | "is_managed": role.managed, 172 | "is_mentionable": role.mentionable, 173 | "is_deleted": False, 174 | "position": role.position, 175 | } 176 | 177 | 178 | def reaction_values(reaction, user, current): 179 | data = EmojiData(reaction.emoji) 180 | return { 181 | "message_id": reaction.message.id, 182 | "emoji_id": data.id, 183 | "emoji_unicode": data.unicode, 184 | "int_user_id": int_hash(user.id), 185 | "created_at": datetime.now() if current else None, 186 | "deleted_at": None, 187 | "channel_id": reaction.message.channel.id, 188 | "guild_id": reaction.message.guild.id, 189 | } 190 | 191 | 192 | def thread_values(thread: discord.Thread, deleted=False): 193 | return { 194 | "thread_id": thread.id, 195 | "name": thread.name, 196 | "invitable": thread.invitable, 197 | "locked": thread.locked, 198 | "archived": thread.archived, 199 | "auto_archive_duration": thread.auto_archive_duration, 200 | "archive_timestamp": thread.archive_timestamp, 201 | "created_at": thread.created_at, 202 | "edited_at": datetime.now(), 203 | "deleted_at": datetime.now() if deleted else None, 204 | "is_deleted": deleted, 205 | "int_owner_id": int_hash(thread.owner_id), 206 | "parent_id": thread.parent_id, 207 | "guild_id": thread.guild.id, 208 | } 209 | 210 | 211 | def thread_member_values(member: discord.ThreadMember, removed=False): 212 | return { 213 | "int_member_id": int_hash(member.id), 214 | "thread_id": member.thread_id, 215 | "joined_at": member.joined_at, 216 | "left_at": datetime.now() if removed else None, 217 | } 218 | 219 | 220 | class _Transaction: 221 | __slots__ = ( 222 | "conn", 223 | "logger", 224 | "txact", 225 | "ok", 226 | ) 227 | 228 | def __init__(self, conn, logger): 229 | self.conn = conn 230 | self.logger = logger 231 | self.txact = None 232 | self.ok = True 233 | 234 | def __enter__(self): 235 | self.logger.debug("Starting transaction...") 236 | self.txact = self.conn.begin() 237 | return self 238 | 239 | def __exit__(self, type, value, traceback): 240 | if (type, value, traceback) == (None, None, None): 241 | self.logger.debug("Committing transaction...") 242 | self.txact.commit() 243 | else: 244 | self.logger.error("Exception occurred in 'with' scope!", exc_info=1) 245 | self.logger.debug("Rolling back transaction...") 246 | self.ok = False 247 | self.txact.rollback() 248 | 249 | def execute(self, *args, **kwargs): 250 | return self.conn.execute(*args, **kwargs) 251 | 252 | 253 | class DiscordSqlHandler: 254 | """ 255 | An abstract handling class that bridges the gap between 256 | the SQLAlchemy code and the discord.py code. 257 | 258 | It can correctly handle discord objects and ingest or 259 | process them into the SQL database accordingly. 260 | """ 261 | 262 | # disable because we get false positives for dml in sqlalchemy insert/delete 263 | # pylint: disable=no-value-for-parameter 264 | 265 | __slots__ = ( 266 | "db", 267 | "conn", 268 | "logger", 269 | "tb_messages", 270 | "tb_reactions", 271 | "tb_typing", 272 | "tb_pins", 273 | "tb_mentions", 274 | "tb_guilds", 275 | "tb_channels", 276 | "tb_voice_channels", 277 | "tb_channel_categories", 278 | "tb_users", 279 | "tb_guild_membership", 280 | "tb_role_membership", 281 | "tb_avatar_history", 282 | "tb_username_history", 283 | "tb_nickname_history", 284 | "tb_emojis", 285 | "tb_roles", 286 | "tb_audit_log", 287 | "tb_channel_crawl", 288 | "tb_audit_log_crawl", 289 | "tb_threads", 290 | "tb_thread_members", 291 | "tb_thread_crawl", 292 | "message_cache", 293 | "typing_cache", 294 | "guild_cache", 295 | "channel_cache", 296 | "voice_channel_cache", 297 | "channel_category_cache", 298 | "user_cache", 299 | "emoji_cache", 300 | "role_cache", 301 | "thread_cache", 302 | ) 303 | 304 | def __init__(self, addr, cache_size, logger=null_logger): 305 | logger.info(f"Opening database: '{addr}'") 306 | self.db = create_engine(addr) 307 | self.conn = self.db.connect() 308 | meta = DiscordMetadata(self.db) 309 | self.logger = logger 310 | 311 | self.tb_messages = meta.tb_messages 312 | self.tb_reactions = meta.tb_reactions 313 | self.tb_typing = meta.tb_typing 314 | self.tb_pins = meta.tb_pins 315 | self.tb_mentions = meta.tb_mentions 316 | self.tb_guilds = meta.tb_guilds 317 | self.tb_channels = meta.tb_channels 318 | self.tb_voice_channels = meta.tb_voice_channels 319 | self.tb_channel_categories = meta.tb_channel_categories 320 | self.tb_users = meta.tb_users 321 | self.tb_guild_membership = meta.tb_guild_membership 322 | self.tb_role_membership = meta.tb_role_membership 323 | self.tb_avatar_history = meta.tb_avatar_history 324 | self.tb_username_history = meta.tb_username_history 325 | self.tb_nickname_history = meta.tb_nickname_history 326 | self.tb_emojis = meta.tb_emojis 327 | self.tb_roles = meta.tb_roles 328 | self.tb_audit_log = meta.tb_audit_log 329 | self.tb_channel_crawl = meta.tb_channel_crawl 330 | self.tb_audit_log_crawl = meta.tb_audit_log_crawl 331 | self.tb_threads = meta.tb_threads 332 | self.tb_thread_members = meta.tb_thread_members 333 | self.tb_thread_crawl = meta.tb_thread_crawl 334 | 335 | # Caches 336 | if cache_size is not None: 337 | self.message_cache = LruCache(cache_size["event-size"]) 338 | self.typing_cache = LruCache(cache_size["event-size"]) 339 | self.guild_cache = LruCache(cache_size["lookup-size"]) 340 | self.channel_cache = LruCache(cache_size["lookup-size"]) 341 | self.voice_channel_cache = LruCache(cache_size["lookup-size"]) 342 | self.channel_category_cache = LruCache(cache_size["lookup-size"]) 343 | self.user_cache = LruCache(cache_size["lookup-size"]) 344 | self.emoji_cache = LruCache(cache_size["lookup-size"]) 345 | self.role_cache = LruCache(cache_size["lookup-size"]) 346 | self.thread_cache = LruCache(cache_size["lookup-size"]) 347 | 348 | alembic_cfg = Config("alembic.ini") 349 | alembic_cfg.set_main_option("sqlalchemy.url", addr) 350 | 351 | # This is used in migrations/env.py and prevents Alembic from replacing 352 | # our logging handlers. This has the drawback of not getting any log 353 | # output from Alembic. 354 | alembic_cfg.attributes["configure_logger"] = False 355 | 356 | if not inspect(self.db).has_table("messages"): 357 | # No tables exist (probably), so create all of them, and mark the 358 | # current revision as up-to-date 359 | self.logger.info("Creating tables") 360 | meta.metadata_obj.create_all(self.db) 361 | command.stamp(alembic_cfg, "head") 362 | else: 363 | self.logger.info("Performing migrations") 364 | migration_context = MigrationContext.configure(self.conn) 365 | current_rev = migration_context.get_current_revision() 366 | if current_rev is None: 367 | # This means the db is in a state prior to when Alembic was 368 | # added. This assumes discord.py is at v1.5, but it shouldn't be 369 | # a big problem if it isn't, since the first few migrations are 370 | # fairly trivial. 371 | command.stamp(alembic_cfg, "initial_revision_discord_py_1_5") 372 | 373 | # Perform any pending migrations for the current revision. 374 | command.upgrade(alembic_cfg, "head") 375 | self.logger.info("Created all tables.") 376 | 377 | # Transaction logic 378 | def transaction(self): 379 | return _Transaction(self.conn, self.logger) 380 | 381 | # Guild 382 | def upsert_guild(self, txact, guild): 383 | values = guild_values(guild) 384 | if self.guild_cache.get(guild.id) == values: 385 | self.logger.debug(f"Guild lookup for {guild.id} is already up-to-date") 386 | return 387 | 388 | self.logger.info(f"Updating lookup data for guild {guild.name}") 389 | ups = ( 390 | p_insert(self.tb_guilds) 391 | .values(values) 392 | .on_conflict_do_update( 393 | index_elements=["guild_id"], 394 | index_where=(self.tb_guilds.c.guild_id == guild.id), 395 | set_=values, 396 | ) 397 | ) 398 | txact.conn.execute(ups) 399 | self.guild_cache[guild.id] = values 400 | 401 | # Messages 402 | def add_message(self, txact, message: discord.Message): 403 | is_in_thread = isinstance(message.channel, discord.Thread) 404 | 405 | values = message_values(message, is_in_thread) 406 | 407 | if self.message_cache.get(message.id) == values: 408 | self.logger.debug(f"Message lookup for {message.id} is already up-to-date") 409 | return 410 | 411 | if is_in_thread: 412 | self.upsert_thread(txact, message.channel) 413 | 414 | self.logger.debug(f"Inserting message {message.id}") 415 | ins = self.tb_messages.insert().values(values) 416 | txact.execute(ins) 417 | self.message_cache[message.id] = values 418 | 419 | self.upsert_user(txact, message.author) 420 | self.insert_mentions(txact, message) 421 | 422 | def edit_message(self, txact, before, after): 423 | self.logger.debug(f"Updating message {after.id}") 424 | upd = ( 425 | self.tb_messages.update() 426 | .values( 427 | { 428 | "edited_at": after.edited_at, 429 | "content": after.content, 430 | "embeds": [embed.to_dict() for embed in after.embeds], 431 | } 432 | ) 433 | .where(self.tb_messages.c.message_id == after.id) 434 | ) 435 | txact.execute(upd) 436 | 437 | self.insert_mentions(txact, after) 438 | 439 | def remove_message(self, txact, message): 440 | self.logger.debug(f"Deleting message {message.id}") 441 | upd = ( 442 | self.tb_messages.update() 443 | .values(deleted_at=datetime.now()) 444 | .where(self.tb_messages.c.message_id == message.id) 445 | ) 446 | txact.execute(upd) 447 | self.message_cache.pop(message.id, None) 448 | 449 | def insert_message(self, txact, message: discord.Message): 450 | is_in_thread = isinstance(message.channel, discord.Thread) 451 | 452 | values = message_values(message, is_in_thread) 453 | 454 | if self.message_cache.get(message.id) == values: 455 | self.logger.debug(f"Message lookup for {message.id} is already up-to-date") 456 | return 457 | 458 | if is_in_thread: 459 | self.upsert_thread(txact, message.channel) 460 | 461 | self.logger.debug(f"Inserting message {message.id}") 462 | ins = ( 463 | p_insert(self.tb_messages) 464 | .values(values) 465 | .on_conflict_do_nothing(index_elements=["message_id"]) 466 | ) 467 | txact.execute(ins) 468 | self.message_cache[message.id] = values 469 | 470 | self.upsert_user(txact, message.author) 471 | self.insert_mentions(txact, message) 472 | 473 | # Mentions 474 | def insert_mentions(self, txact, message): 475 | self.logger.debug(f"Inserting all mentions in message {message.id}") 476 | 477 | for id in message.raw_mentions: 478 | if id > MAX_ID: 479 | self.logger.error(f"User mention was too long: {id}") 480 | continue 481 | 482 | self.logger.debug(f"User mention: {id}") 483 | ins = ( 484 | p_insert(self.tb_mentions) 485 | .values( 486 | { 487 | "mentioned_id": id, 488 | "type": MentionType.USER, 489 | "message_id": message.id, 490 | "channel_id": message.channel.id, 491 | "guild_id": message.guild.id, 492 | } 493 | ) 494 | .on_conflict_do_nothing( 495 | index_elements=["mentioned_id", "type", "message_id"] 496 | ) 497 | ) 498 | txact.execute(ins) 499 | 500 | for id in message.raw_role_mentions: 501 | if id > MAX_ID: 502 | self.logger.error(f"Role mention was too long: {id}") 503 | continue 504 | 505 | self.logger.debug(f"Role mention: {id}") 506 | ins = ( 507 | p_insert(self.tb_mentions) 508 | .values( 509 | { 510 | "mentioned_id": id, 511 | "type": MentionType.ROLE, 512 | "message_id": message.id, 513 | "channel_id": message.channel.id, 514 | "guild_id": message.guild.id, 515 | } 516 | ) 517 | .on_conflict_do_nothing( 518 | index_elements=["mentioned_id", "type", "message_id"] 519 | ) 520 | ) 521 | txact.execute(ins) 522 | 523 | for id in message.raw_channel_mentions: 524 | if id > MAX_ID: 525 | self.logger.error(f"Channel mention was too long: {id}") 526 | continue 527 | 528 | self.logger.debug(f"Channel mention: {id}") 529 | ins = ( 530 | p_insert(self.tb_mentions) 531 | .values( 532 | { 533 | "mentioned_id": id, 534 | "type": MentionType.CHANNEL, 535 | "message_id": message.id, 536 | "channel_id": message.channel.id, 537 | "guild_id": message.guild.id, 538 | } 539 | ) 540 | .on_conflict_do_nothing( 541 | index_elements=["mentioned_id", "type", "message_id"] 542 | ) 543 | ) 544 | txact.execute(ins) 545 | 546 | # Typing 547 | def typing(self, txact, channel, user, when): 548 | key = (when, user.id, channel.id) 549 | if self.typing_cache.get(key, False): 550 | self.logger.debug("Typing lookup is up-to-date") 551 | return 552 | 553 | is_in_thread = isinstance(channel, discord.Thread) 554 | 555 | if is_in_thread: 556 | self.upsert_thread(txact, channel) 557 | 558 | self.logger.debug(f"Inserting typing event for user {user.id}") 559 | ins = self.tb_typing.insert().values( 560 | { 561 | "timestamp": when, 562 | "int_user_id": int_hash(user.id), 563 | "channel_id": channel.id if not is_in_thread else None, 564 | "thread_id": channel.id if is_in_thread else None, 565 | "guild_id": channel.guild.id, 566 | } 567 | ) 568 | txact.execute(ins) 569 | self.typing_cache[key] = True 570 | 571 | # Reactions 572 | def add_reaction(self, txact, reaction, user): 573 | self.logger.debug( 574 | f"Inserting live reaction for user {user.id} on message {reaction.message.id}" 575 | ) 576 | self.upsert_emoji(txact, reaction.emoji) 577 | self.upsert_user(txact, user) 578 | values = reaction_values(reaction, user, True) 579 | ins = self.tb_reactions.insert().values(values) 580 | txact.execute(ins) 581 | 582 | def remove_reaction(self, txact, reaction, user): 583 | self.logger.debug( 584 | f"Deleting reaction for user {user.id} on message {reaction.message.id}" 585 | ) 586 | data = EmojiData(reaction.emoji) 587 | upd = ( 588 | self.tb_reactions.update() 589 | .values(deleted_at=datetime.now()) 590 | .where(self.tb_reactions.c.message_id == reaction.message.id) 591 | .where(self.tb_reactions.c.emoji_id == data.id) 592 | .where(self.tb_reactions.c.emoji_unicode == data.unicode) 593 | .where(self.tb_reactions.c.int_user_id == int_hash(user.id)) 594 | ) 595 | txact.execute(upd) 596 | 597 | def insert_reaction(self, txact, reaction, users): 598 | self.logger.debug(f"Inserting past reactions for {reaction.message.id}") 599 | self.upsert_emoji(txact, reaction.emoji) 600 | data = EmojiData(reaction.emoji) 601 | for user in users: 602 | self.upsert_user(txact, user) 603 | values = reaction_values(reaction, user, False) 604 | self.logger.debug(f"Inserting single reaction {data} from {user.id}") 605 | ins = ( 606 | p_insert(self.tb_reactions) 607 | .values(values) 608 | .on_conflict_do_nothing( 609 | index_elements=[ 610 | "message_id", 611 | "emoji_id", 612 | "emoji_unicode", 613 | "int_user_id", 614 | "created_at", 615 | ] 616 | ) 617 | ) 618 | txact.execute(ins) 619 | 620 | def clear_reactions(self, txact, message): 621 | self.logger.debug(f"Deleting all reactions on message {message.id}") 622 | upd = ( 623 | self.tb_reactions.update() 624 | .values(deleted_at=datetime.now()) 625 | .where(self.tb_reactions.c.message_id == message.id) 626 | ) 627 | txact.execute(upd) 628 | 629 | # Pins (TODO) 630 | def add_pin(self, txact, announce, message): 631 | # pylint: disable=unreachable 632 | raise NotImplementedError 633 | 634 | self.logger.debug(f"Inserting pin for message {message.id}") 635 | ins = self.tb_pins.insert().values( 636 | { 637 | "pin_id": announce.id, 638 | "message_id": message.id, 639 | "pinner_id": announce.author.id, 640 | "int_user_id": int_hash(message.author.id), 641 | "channel_id": message.channel.id, 642 | "guild_id": message.guild.id, 643 | } 644 | ) 645 | txact.execute(ins) 646 | 647 | def remove_pin(self, txact, announce, message): 648 | # pylint: disable=unreachable 649 | raise NotImplementedError 650 | 651 | self.logger.debug(f"Deleting pin for message {message.id}") 652 | delet = ( 653 | self.tb_pins.delete() 654 | .where(self.tb_pins.c.pin_id == announce.id) 655 | .where(self.tb_pins.c.message_id == message.id) 656 | ) 657 | txact.execute(delet) 658 | 659 | # Roles 660 | def add_role(self, txact, role): 661 | if role.id in self.role_cache: 662 | self.logger.debug(f"Role {role.id} already inserted.") 663 | return 664 | 665 | self.logger.info(f"Inserting role {role.id}") 666 | values = role_values(role) 667 | ins = self.tb_roles.insert().values(values) 668 | txact.execute(ins) 669 | self.role_cache[role.id] = values 670 | 671 | def _update_role(self, txact, role): 672 | self.logger.info(f"Updating role {role.id} in guild {role.guild.id}") 673 | values = role_values(role) 674 | upd = ( 675 | self.tb_roles.update() 676 | .where(self.tb_roles.c.role_id == role.id) 677 | .values(values) 678 | ) 679 | txact.execute(upd) 680 | self.role_cache[role.id] = values 681 | 682 | def update_role(self, txact, role): 683 | if role.id in self.role_cache: 684 | self._update_role(txact, role) 685 | else: 686 | self.upsert_role(txact, role) 687 | 688 | def remove_role(self, txact, role): 689 | self.logger.info(f"Deleting role {role.id}") 690 | upd = ( 691 | self.tb_roles.update() 692 | .values(is_deleted=True) 693 | .where(self.tb_roles.c.role_id == role.id) 694 | ) 695 | txact.execute(upd) 696 | self.role_cache.pop(role.id, None) 697 | 698 | def upsert_role(self, txact, role): 699 | values = role_values(role) 700 | if self.role_cache.get(role.id) == values: 701 | self.logger.debug(f"Role lookup for {role.id} is already up-to-date") 702 | return 703 | 704 | self.logger.debug(f"Updating lookup data for role {role.name}") 705 | ups = ( 706 | p_insert(self.tb_roles) 707 | .values(values) 708 | .on_conflict_do_update( 709 | index_elements=["role_id"], 710 | index_where=(self.tb_roles.c.role_id == role.id), 711 | set_=values, 712 | ) 713 | ) 714 | txact.execute(ups) 715 | self.role_cache[role.id] = values 716 | 717 | # Channels 718 | def add_channel(self, txact, channel): 719 | if channel.id in self.channel_cache: 720 | self.logger.debug(f"Channel {channel.id} already inserted.") 721 | return 722 | 723 | self.logger.info( 724 | f"Inserting new channel {channel.id} for guild {channel.guild.id}" 725 | ) 726 | values = channel_values(channel) 727 | ins = self.tb_channels.insert().values(values) 728 | txact.execute(ins) 729 | self.channel_cache[channel.id] = values 730 | 731 | def _update_channel(self, txact, channel): 732 | self.logger.info(f"Updating channel {channel.id} in guild {channel.guild.id}") 733 | values = channel_values(channel) 734 | upd = ( 735 | self.tb_channels.update() 736 | .where(self.tb_channels.c.channel_id == channel.id) 737 | .values(values) 738 | ) 739 | txact.execute(upd) 740 | self.channel_cache[channel.id] = values 741 | 742 | def update_channel(self, txact, channel): 743 | if channel.id in self.channel_cache: 744 | self._update_channel(txact, channel) 745 | else: 746 | self.upsert_channel(txact, channel) 747 | 748 | def remove_channel(self, txact, channel): 749 | self.logger.info(f"Deleting channel {channel.id} in guild {channel.guild.id}") 750 | upd = ( 751 | self.tb_channels.update() 752 | .values(is_deleted=True) 753 | .where(self.tb_channels.c.channel_id == channel.id) 754 | ) 755 | txact.execute(upd) 756 | self.channel_cache.pop(channel.id, None) 757 | 758 | def upsert_channel(self, txact, channel): 759 | values = channel_values(channel) 760 | if self.channel_cache.get(channel.id) == values: 761 | self.logger.debug(f"Channel lookup for {channel.id} is already up-to-date") 762 | return 763 | 764 | self.logger.debug(f"Updating lookup data for channel #{channel.name}") 765 | ups = ( 766 | p_insert(self.tb_channels) 767 | .values(values) 768 | .on_conflict_do_update( 769 | index_elements=["channel_id"], 770 | index_where=(self.tb_channels.c.channel_id == channel.id), 771 | set_=values, 772 | ) 773 | ) 774 | txact.execute(ups) 775 | self.channel_cache[channel.id] = values 776 | 777 | # Voice Channels 778 | def add_voice_channel(self, txact, channel): 779 | if channel in self.voice_channel_cache: 780 | self.logger.debug(f"Voice channel {channel.id} already inserted") 781 | return 782 | 783 | self.logger.info( 784 | "Inserting new voice channel {channel.id} for guild {channel.guild.id}" 785 | ) 786 | values = voice_channel_values(channel) 787 | ins = self.tb_voice_channels.insert().values(values) 788 | txact.execute(ins) 789 | self.voice_channel_cache[channel.id] = values 790 | 791 | def _update_voice_channel(self, txact, channel): 792 | self.logger.info( 793 | f"Updating voice channel {channel.id} in guild {channel.guild.id}" 794 | ) 795 | values = voice_channel_values(channel) 796 | upd = ( 797 | self.tb_voice_channels.update() 798 | .where(self.tb_voice_channels.c.voice_channel_id == channel.id) 799 | .values(values) 800 | ) 801 | txact.execute(upd) 802 | self.voice_channel_cache[channel.id] = values 803 | 804 | def update_voice_channel(self, txact, channel): 805 | if channel.id in self.voice_channel_cache: 806 | self._update_voice_channel(txact, channel) 807 | else: 808 | self.upsert_channel(txact, channel) 809 | 810 | def remove_voice_channel(self, txact, channel): 811 | self.logger.info( 812 | f"Deleting voice channel {channel.id} in guild {channel.guild.id}" 813 | ) 814 | upd = ( 815 | self.tb_voice_channels.update() 816 | .values(is_deleted=True) 817 | .where(self.tb_voice_channels.c.voice_channel_id == channel.id) 818 | ) 819 | txact.execute(upd) 820 | self.voice_channel_cache.pop(channel.id, None) 821 | 822 | def upsert_voice_channel(self, txact, channel): 823 | values = voice_channel_values(channel) 824 | if self.voice_channel_cache.get(channel.id) == values: 825 | self.logger.debug( 826 | f"Voice channel lookup for {channel.id} is already up-to-date" 827 | ) 828 | return 829 | 830 | self.logger.debug(f"Updating lookup data for voice channel '{channel.name}'") 831 | ups = ( 832 | p_insert(self.tb_voice_channels) 833 | .values(values) 834 | .on_conflict_do_update( 835 | index_elements=["voice_channel_id"], 836 | index_where=(self.tb_voice_channels.c.voice_channel_id == channel.id), 837 | set_=values, 838 | ) 839 | ) 840 | txact.execute(ups) 841 | self.voice_channel_cache[channel.id] = values 842 | 843 | # Channel Categories 844 | def add_channel_category(self, txact, category): 845 | if category.id in self.channel_category_cache: 846 | self.logger.debug(f"Channel category {category.id} already inserted.") 847 | return 848 | 849 | self.logger.info( 850 | f"Inserting new category {category.id} for guild {category.guild.id}" 851 | ) 852 | values = channel_categories_values(category) 853 | ins = self.tb_channel_categories.insert().values(values) 854 | txact.execute(ins) 855 | self.channel_category_cache[category.id] = values 856 | 857 | def _update_channel_category(self, txact, category): 858 | self.logger.info( 859 | f"Updating channel category {category.id} in guild {category.guild.id}" 860 | ) 861 | values = channel_categories_values(category) 862 | upd = ( 863 | self.tb_channel_categories.update() 864 | .where(self.tb_channel_categories.c.category_id == category.id) 865 | .values(values) 866 | ) 867 | txact.execute(upd) 868 | self.channel_category_cache[category.id] = values 869 | 870 | def update_channel_category(self, txact, category): 871 | if category.id in self.channel_category_cache: 872 | self._update_channel_category(txact, category) 873 | else: 874 | self.upsert_channel_category(txact, category) 875 | 876 | def remove_channel_category(self, txact, category): 877 | self.logger.info( 878 | f"Deleting channel category {category.id} in guild {category.guild.id}" 879 | ) 880 | upd = ( 881 | self.tb_channel_categories.update() 882 | .values(is_deleted=True) 883 | .where(self.tb_channels.c.category_id == category.id) 884 | ) 885 | txact.execute(upd) 886 | self.channel_category_cache.pop(category.id, None) 887 | 888 | def upsert_channel_category(self, txact, category): 889 | values = channel_categories_values(category) 890 | if self.channel_cache.get(category.id) == values: 891 | self.logger.debug( 892 | f"Channel category lookup for {category.id} is already up-to-date" 893 | ) 894 | return 895 | 896 | self.logger.debug(f"Updating lookup data for channel category {category.name}") 897 | ups = ( 898 | p_insert(self.tb_channel_categories) 899 | .values(values) 900 | .on_conflict_do_update( 901 | index_elements=["category_id"], 902 | index_where=(self.tb_channel_categories.c.category_id == category.id), 903 | set_=values, 904 | ) 905 | ) 906 | txact.execute(ups) 907 | self.channel_category_cache[category.id] = values 908 | 909 | # Users 910 | def add_user(self, txact, user): 911 | if user.id in self.user_cache: 912 | self.logger.debug(f"User {user.id} already inserted.") 913 | return 914 | 915 | self.logger.debug(f"Inserting user {user.id}") 916 | values = user_values(user) 917 | ins = self.tb_users.insert().values(values) 918 | txact.execute(ins) 919 | self.user_cache[user.id] = values 920 | 921 | def _update_user(self, txact, user): 922 | self.logger.debug(f"Updating user {user.id}") 923 | values = user_values(user) 924 | upd = ( 925 | self.tb_users.update() 926 | .where(self.tb_users.c.int_user_id == int_hash(user.id)) 927 | .values(values) 928 | ) 929 | txact.execute(upd) 930 | self.user_cache[user.id] = values 931 | 932 | def update_user(self, txact, user): 933 | if user.id in self.user_cache: 934 | self._update_user(txact, user) 935 | else: 936 | self.upsert_user(txact, user) 937 | 938 | def remove_user(self, txact, user): 939 | self.logger.debug(f"Removing user {user.id}") 940 | upd = ( 941 | self.tb_users.update() 942 | .values(is_deleted=True) 943 | .where(self.tb_users.c.int_user_id == int_hash(user.id)) 944 | ) 945 | txact.execute(upd) 946 | self.user_cache.pop(user.id, None) 947 | 948 | def upsert_user(self, txact, user): 949 | self.logger.debug(f"Upserting user {user.id}") 950 | values = user_values(user) 951 | if self.user_cache.get(user.id) == values: 952 | self.logger.debug(f"User lookup for {user.id} is already up-to-date") 953 | return 954 | 955 | ups = ( 956 | p_insert(self.tb_users) 957 | .values(values) 958 | .on_conflict_do_update( 959 | index_elements=["int_user_id"], 960 | index_where=(self.tb_users.c.int_user_id == int_hash(user.id)), 961 | set_=values, 962 | ) 963 | ) 964 | txact.execute(ups) 965 | self.user_cache[user.id] = values 966 | 967 | # Members 968 | def update_member(self, txact, member): 969 | self.logger.debug(f"Updating member data for {member.id}") 970 | upd = ( 971 | self.tb_guild_membership.update() 972 | .where( 973 | and_( 974 | self.tb_guild_membership.c.int_user_id == int_hash(member.id), 975 | self.tb_guild_membership.c.guild_id == member.guild.id, 976 | ) 977 | ) 978 | .values(nick=member.nick) 979 | ) 980 | txact.execute(upd) 981 | 982 | self._delete_role_membership(txact, member) 983 | self._insert_role_membership(txact, member) 984 | 985 | def _delete_role_membership(self, txact, member): 986 | delet = self.tb_role_membership.delete().where( 987 | and_( 988 | self.tb_role_membership.c.int_user_id == int_hash(member.id), 989 | self.tb_role_membership.c.guild_id == member.guild.id, 990 | self.tb_role_membership.c.role_id not in member.roles, 991 | ) 992 | ) 993 | txact.execute(delet) 994 | 995 | def _insert_role_membership(self, txact, member): 996 | for role in member.roles: 997 | values = role_member_values(member, role) 998 | ins = self.tb_role_membership.insert().values(values) 999 | txact.execute(ins) 1000 | 1001 | def remove_member(self, txact, member): 1002 | self.logger.debug(f"Removing member {member.id} from guild {member.guild.id}") 1003 | upd = ( 1004 | self.tb_guild_membership.update() 1005 | .where( 1006 | and_( 1007 | self.tb_guild_membership.c.int_user_id == int_hash(member.id), 1008 | self.tb_guild_membership.c.guild_id == member.guild.id, 1009 | ) 1010 | ) 1011 | .values(is_member=False) 1012 | ) 1013 | txact.execute(upd) 1014 | 1015 | # Don't delete role membership 1016 | 1017 | def remove_old_members(self, txact, guild): 1018 | # Since pylint complains about == True. 1019 | # We need to do this otherwise silly comparison 1020 | # because it's not a comparison at all, it's actually 1021 | # creating a SQLAlchemy "equality" object that is used 1022 | # to generate the query. 1023 | # 1024 | # pylint: disable=singleton-comparison 1025 | 1026 | self.logger.debug(f"Deleting old members from guild {guild.name}") 1027 | sel = select([self.tb_guild_membership]).where( 1028 | and_( 1029 | self.tb_guild_membership.c.guild_id == guild.id, 1030 | self.tb_guild_membership.c.is_member == True, 1031 | ) 1032 | ) 1033 | result = txact.execute(sel) 1034 | 1035 | for row in result.fetchall(): 1036 | user_id = row[0] 1037 | member = guild.get_member(user_id) 1038 | if member is None: 1039 | self.remove_member(txact, FakeMember(id=int_hash(user_id), guild=guild)) 1040 | 1041 | def upsert_member(self, txact, member): 1042 | self.logger.debug(f"Upserting member data for {member.id}") 1043 | values = guild_member_values(member) 1044 | ups = ( 1045 | p_insert(self.tb_guild_membership) 1046 | .values(values) 1047 | .on_conflict_do_update( 1048 | constraint="uq_guild_membership", 1049 | set_=values, 1050 | ) 1051 | ) 1052 | txact.execute(ups) 1053 | 1054 | self._delete_role_membership(txact, member) 1055 | self._insert_role_membership(txact, member) 1056 | 1057 | # User alias information 1058 | def add_avatar(self, txact, user, timestamp, avatar: io.BytesIO, ext: str): 1059 | self.logger.debug("Adding user avatar update for '%s' (%d)", user.name, user.id) 1060 | ins = self.tb_avatar_history.insert().values( 1061 | user_id=user.id, 1062 | timestamp=timestamp, 1063 | avatar=avatar.getbuffer().tobytes(), 1064 | avatar_ext=ext, 1065 | ) 1066 | txact.execute(ins) 1067 | 1068 | def add_username(self, txact, user, timestamp, username): 1069 | self.logger.debug( 1070 | "Adding username update for '%s', now '%s' (%d)", 1071 | user.name, 1072 | username, 1073 | user.id, 1074 | ) 1075 | ins = self.tb_username_history.insert().values( 1076 | user_id=user.id, timestamp=timestamp, username=username 1077 | ) 1078 | txact.execute(ins) 1079 | 1080 | def add_nickname(self, txact, user, timestamp, nickname): 1081 | self.logger.debug( 1082 | "Adding nickname update for '%s', now '%s' (%d)", 1083 | user.display_name, 1084 | nickname, 1085 | user.id, 1086 | ) 1087 | ins = self.tb_nickname_history.insert().values( 1088 | user_id=user.id, timestamp=timestamp, nickname=nickname 1089 | ) 1090 | txact.execute(ins) 1091 | 1092 | # Emojis 1093 | def add_emoji(self, txact, emoji): 1094 | data = EmojiData(emoji) 1095 | if data.cache_id in self.emoji_cache: 1096 | self.logger.debug(f"Emoji {data} already inserted.") 1097 | return 1098 | 1099 | self.logger.info(f"Inserting emoji {data}") 1100 | values = emoji.values() 1101 | ins = self.tb_emojis.insert().values(values) 1102 | txact.execute(ins) 1103 | self.emoji_cache[data.cache_id] = values 1104 | 1105 | def remove_emoji(self, txact, emoji): 1106 | data = EmojiData(emoji) 1107 | self.logger.info(f"Deleting emoji {data}") 1108 | 1109 | upd = ( 1110 | self.tb_emojis.update() 1111 | .values(is_deleted=True) 1112 | .where(self.tb_emojis.c.emoji_id == data.id) 1113 | .where(self.tb_emojis.c.emoji_unicode == data.unicode) 1114 | ) 1115 | txact.execute(upd) 1116 | self.emoji_cache.pop(data.cache_id, None) 1117 | 1118 | def upsert_emoji(self, txact, emoji): 1119 | data = EmojiData(emoji) 1120 | values = data.values() 1121 | if self.emoji_cache.get(data.cache_id) == values: 1122 | self.logger.debug(f"Emoji lookup for {data} is already up-to-date") 1123 | return 1124 | 1125 | self.logger.debug(f"Upserting emoji {data}") 1126 | ups = ( 1127 | p_insert(self.tb_emojis) 1128 | .values(values) 1129 | .on_conflict_do_update( 1130 | index_elements=["emoji_id", "emoji_unicode"], 1131 | index_where=and_( 1132 | self.tb_emojis.c.emoji_id == data.id, 1133 | self.tb_emojis.c.emoji_unicode == data.unicode, 1134 | ), 1135 | set_=values, 1136 | ) 1137 | ) 1138 | txact.execute(ups) 1139 | self.emoji_cache[data.cache_id] = values 1140 | 1141 | # Audit log 1142 | def insert_audit_log_entry( 1143 | self, txact, guild: discord.Guild, entry: discord.AuditLogEntry 1144 | ): 1145 | self.logger.debug(f"Inserting audit log entry {entry.id} from {guild.name}") 1146 | data = AuditLogData(entry, guild) 1147 | values = data.values() 1148 | ins = ( 1149 | p_insert(self.tb_audit_log) 1150 | .values(values) 1151 | .on_conflict_do_nothing(index_elements=["audit_entry_id"]) 1152 | ) 1153 | txact.execute(ins) 1154 | 1155 | # Crawling history 1156 | def lookup_channel_crawl(self, txact, channel): 1157 | self.logger.info( 1158 | f"Looking up channel crawl progress for {channel.guild.name} #{channel.name}" 1159 | ) 1160 | sel = select([self.tb_channel_crawl]).where( 1161 | self.tb_channel_crawl.c.channel_id == channel.id 1162 | ) 1163 | result = txact.execute(sel) 1164 | 1165 | if result.rowcount: 1166 | _, last_id = result.fetchone() 1167 | return last_id 1168 | else: 1169 | return None 1170 | 1171 | def insert_channel_crawl(self, txact, channel, last_id): 1172 | self.logger.info( 1173 | f"Inserting new channel crawl progress for {channel.guild.name} #{channel.name}" 1174 | ) 1175 | 1176 | ins = self.tb_channel_crawl.insert().values( 1177 | { 1178 | "channel_id": channel.id, 1179 | "last_message_id": last_id, 1180 | } 1181 | ) 1182 | txact.execute(ins) 1183 | 1184 | def update_channel_crawl(self, txact, channel, last_id): 1185 | self.logger.info( 1186 | f"Updating channel crawl progress for {channel.guild.name} #{channel.name}: {last_id}" 1187 | ) 1188 | 1189 | upd = ( 1190 | self.tb_channel_crawl.update() 1191 | .values(last_message_id=last_id) 1192 | .where(self.tb_channel_crawl.c.channel_id == channel.id) 1193 | ) 1194 | txact.execute(upd) 1195 | 1196 | def delete_channel_crawl(self, txact, channel): 1197 | self.logger.info( 1198 | f"Deleting channel crawl progress for {channel.guild.name} #{channel.name}" 1199 | ) 1200 | 1201 | delet = self.tb_channel_crawl.delete().where( 1202 | self.tb_channel_crawl.c.channel_id == channel.id 1203 | ) 1204 | txact.execute(delet) 1205 | 1206 | def lookup_audit_log_crawl(self, txact, guild): 1207 | self.logger.info(f"Looking for audit log crawl progress for {guild.name}") 1208 | sel = select([self.tb_audit_log_crawl]).where( 1209 | self.tb_audit_log_crawl.c.guild_id == guild.id 1210 | ) 1211 | result = txact.execute(sel) 1212 | 1213 | if result.rowcount: 1214 | _, last_id = result.fetchone() 1215 | return last_id 1216 | else: 1217 | return None 1218 | 1219 | def insert_audit_log_crawl(self, txact, guild, last_id): 1220 | self.logger.info(f"Inserting new audit log crawl progress for {guild.name}") 1221 | 1222 | ins = self.tb_audit_log_crawl.insert().values( 1223 | { 1224 | "guild_id": guild.id, 1225 | "last_audit_entry_id": last_id, 1226 | } 1227 | ) 1228 | txact.execute(ins) 1229 | 1230 | def update_audit_log_crawl(self, txact, guild, last_id): 1231 | self.logger.info(f"Updating audit log crawl progress for {guild.name}") 1232 | 1233 | upd = ( 1234 | self.tb_audit_log_crawl.update() 1235 | .values(last_audit_entry_id=last_id) 1236 | .where(self.tb_audit_log_crawl.c.guild_id == guild.id) 1237 | ) 1238 | txact.execute(upd) 1239 | 1240 | def delete_audit_log_crawl(self, txact, guild): 1241 | self.logger.info(f"Delete audit log crawl progress for {guild.name}") 1242 | 1243 | delet = self.tb_audit_log_crawl.delete().where( 1244 | self.tb_audit_log_crawl.c.guild_id == guild.id 1245 | ) 1246 | txact.execute(delet) 1247 | 1248 | def lookup_thread_crawl(self, txact, thread: discord.Thread): 1249 | self.logger.info( 1250 | ( 1251 | f"Looking up thread crawl progress for thread {thread.name} " 1252 | f"in guild {thread.guild.name}, channel #{thread.parent.name}" 1253 | ) 1254 | ) 1255 | 1256 | sel = select([self.tb_thread_crawl]).where( 1257 | self.tb_thread_crawl.c.thread_id == thread.id 1258 | ) 1259 | result = txact.execute(sel) 1260 | 1261 | if result.rowcount: 1262 | _, last_id = result.fetchone() 1263 | return last_id 1264 | else: 1265 | return None 1266 | 1267 | def insert_thread_crawl(self, txact, thread: discord.Thread, last_id): 1268 | self.logger.info( 1269 | ( 1270 | f"Inserting new thread crawl progress {last_id} for thread {thread.name} " 1271 | f"in guild {thread.guild.name}, channel #{thread.parent.name}" 1272 | ) 1273 | ) 1274 | 1275 | self.update_thread(txact, thread) 1276 | 1277 | ins = self.tb_thread_crawl.insert().values( 1278 | { 1279 | "thread_id": thread.id, 1280 | "last_message_id": last_id, 1281 | } 1282 | ) 1283 | txact.execute(ins) 1284 | 1285 | def update_thread_crawl(self, txact, thread: discord.Thread, last_id): 1286 | self.logger.info( 1287 | ( 1288 | f"Updating thread crawl progress {last_id} for thread {thread.name} " 1289 | f"in guild {thread.guild.name}, channel #{thread.parent.name}" 1290 | ) 1291 | ) 1292 | 1293 | upd = ( 1294 | self.tb_thread_crawl.update() 1295 | .values(last_message_id=last_id) 1296 | .where(self.tb_thread_crawl.c.thread_id == thread.id) 1297 | ) 1298 | txact.execute(upd) 1299 | 1300 | def delete_thread_crawl(self, txact, thread: discord.Thread): 1301 | self.logger.info( 1302 | ( 1303 | f"Deleting thread crawl progress for thread {thread.name} " 1304 | f"in guild {thread.guild.name}, channel #{thread.parent.name}" 1305 | ) 1306 | ) 1307 | 1308 | delet = self.tb_thread_crawl.delete().where( 1309 | self.tb_thread_crawl.c.thread_id == thread.id 1310 | ) 1311 | txact.execute(delet) 1312 | 1313 | # Threads 1314 | def add_thread(self, txact, thread: discord.Thread): 1315 | if thread in self.thread_cache: 1316 | self.logger.debug(f"Thread {thread.id} already inserted") 1317 | return 1318 | 1319 | self.logger.info( 1320 | f"Inserting new thread {thread.id} for guild {thread.guild.id}" 1321 | ) 1322 | values = thread_values(thread) 1323 | ins = self.tb_threads.insert().values(values) 1324 | txact.execute(ins) 1325 | self.thread_cache[thread.id] = values 1326 | 1327 | def _update_thread(self, txact, thread: discord.Thread): 1328 | self.logger.info(f"Updating thread {thread.id} in guild {thread.guild.id}") 1329 | values = thread_values(thread) 1330 | upd = ( 1331 | self.tb_threads.update() 1332 | .where(self.tb_threads.c.thread_id == thread.id) 1333 | .values(values) 1334 | ) 1335 | txact.execute(upd) 1336 | self.thread_cache[thread.id] = values 1337 | 1338 | def update_thread(self, txact, thread: discord.Thread): 1339 | if thread.id in self.thread_cache: 1340 | self._update_thread(txact, thread) 1341 | else: 1342 | self.upsert_thread(txact, thread) 1343 | 1344 | def remove_thread(self, txact, thread: discord.Thread): 1345 | self.logger.info(f"Deleting thread {thread.id} in guild {thread.guild.id}") 1346 | upd = ( 1347 | self.tb_threads.update() 1348 | .values(is_deleted=True) 1349 | .where(self.tb_threads.c.thread_id == thread.id) 1350 | ) 1351 | txact.execute(upd) 1352 | self.thread_cache.pop(thread.id, None) 1353 | 1354 | def upsert_thread(self, txact, thread: discord.Thread): 1355 | values = thread_values(thread) 1356 | if self.thread_cache.get(thread.id) == values: 1357 | self.logger.debug(f"Thread lookup for {thread.id} is already up-to-date") 1358 | return 1359 | 1360 | self.logger.debug(f"Updating lookup data for thread #{thread.name}") 1361 | ups = ( 1362 | p_insert(self.tb_threads) 1363 | .values(values) 1364 | .on_conflict_do_update( 1365 | index_elements=["thread_id"], 1366 | index_where=(self.tb_threads.c.thread_id == thread.id), 1367 | set_=values, 1368 | ) 1369 | ) 1370 | txact.execute(ups) 1371 | self.thread_cache[thread.id] = values 1372 | 1373 | # Thread Members 1374 | def add_thread_member(self, txact, member: discord.ThreadMember): 1375 | self.logger.debug( 1376 | f"Inserting thread member {member.id} for thread {member.thread_id}" 1377 | ) 1378 | values = thread_member_values(member) 1379 | ins = self.tb_thread_members.insert().values(values) 1380 | txact.execute(ins) 1381 | 1382 | def remove_thread_member(self, txact, member: discord.ThreadMember): 1383 | self.logger.debug( 1384 | f"Removing thread member {member.id} for thread {member.thread_id}" 1385 | ) 1386 | upd = ( 1387 | self.tb_thread_members.update() 1388 | .values(left_at=datetime.now()) 1389 | .where(self.tb_thread_members.c.int_member_id == member.id) 1390 | .where(self.tb_thread_members.c.thread_id == member.thread_id) 1391 | .where(self.tb_thread_members.c.left_at == None) 1392 | ) 1393 | txact.execute(upd) 1394 | 1395 | # Privacy operations 1396 | def privacy_scrub(self, user): 1397 | self.logger.info(f"Scrubbing user {user.name} for privacy reasons") 1398 | 1399 | upd = ( 1400 | self.tb_users.update() 1401 | .values( 1402 | real_user_id=0, 1403 | name=f"Removed for legal reasons - {random.getrandbits(24):06x}", 1404 | discriminator=0000, 1405 | avatar="00000000000000000000000000000000", 1406 | ) 1407 | .where(self.tb_users.c.real_user_id == user.id) 1408 | ) 1409 | 1410 | with self.transaction() as txact: 1411 | txact.execute(upd) 1412 | --------------------------------------------------------------------------------