├── tests ├── __init__.py ├── core │ ├── __init__.py │ ├── util │ │ ├── __init__.py │ │ ├── data │ │ │ └── demo.jpg │ │ ├── test_get_closest_image_match.py │ │ └── test_repost_helpers.py │ ├── celery │ │ ├── __init__.py │ │ └── task_logic │ │ │ ├── __init__.py │ │ │ └── test_ingest_task_logic.py │ ├── services │ │ ├── __init__.py │ │ └── test_reponse_handler.py │ ├── notification │ │ └── __init__.py │ ├── test_objectmapping.py │ ├── test_config.py │ └── test_duplicateImageService.py ├── adminsvc │ ├── __init__.py │ ├── test_subreddit_config_update.py │ └── test_deleted_post_monitor.py ├── common │ ├── __init__.py │ └── util │ │ └── __init__.py ├── summonssvc │ ├── __init__.py │ ├── commandparsing │ │ ├── __init__.py │ │ └── test_commandParser.py │ └── test_summonsHandler.py └── submonitorsvc │ └── __init__.py ├── redditrepostsleuth ├── core │ ├── db │ │ ├── __init__.py │ │ ├── uow │ │ │ ├── __init__.py │ │ │ └── unitofworkmanager.py │ │ ├── repository │ │ │ ├── __init__.py │ │ │ ├── config_settings_repo.py │ │ │ ├── indexbuildtimesrepository.py │ │ │ ├── post_type_repo.py │ │ │ ├── site_admin_repo.py │ │ │ ├── stat_top_repost_repo.py │ │ │ ├── post_hash_repo.py │ │ │ ├── meme_hash_repo.py │ │ │ ├── stat_daily_count_repo.py │ │ │ ├── monitored_sub_config_revision_repo.py │ │ │ ├── audiofingerprintrepo.py │ │ │ ├── banned_subreddit_repo.py │ │ │ ├── image_index_map_rep.py │ │ │ ├── user_review_repo.py │ │ │ ├── subreddit_repo.py │ │ │ ├── http_proxy_repo.py │ │ │ ├── monitored_sub_config_change_repo.py │ │ │ ├── banned_user_repo.py │ │ │ ├── videohashrepository.py │ │ │ ├── memetemplaterepository.py │ │ │ ├── config_message_template_repo.py │ │ │ ├── user_whitelist_repo.py │ │ │ ├── investigatepostrepo.py │ │ │ ├── meme_template_potential_votes_repo.py │ │ │ ├── stats_top_reposter_repo.py │ │ │ ├── monitoredsubcheckrepository.py │ │ │ ├── monitoredsubrepository.py │ │ │ ├── meme_template_potential_repo.py │ │ │ ├── bot_private_message_repo.py │ │ │ ├── user_report_repo.py │ │ │ ├── botcommentrepo.py │ │ │ ├── repost_watch_repo.py │ │ │ └── summonsrepository.py │ │ └── db_utils.py │ ├── model │ │ ├── __init__.py │ │ ├── events │ │ │ ├── __init__.py │ │ │ ├── ingestsubmissionevent.py │ │ │ ├── response_event.py │ │ │ ├── RedditAdminActionEvent.py │ │ │ ├── reddit_api_event.py │ │ │ ├── annoysearchevent.py │ │ │ ├── influxevent.py │ │ │ └── celerytask.py │ │ ├── search │ │ │ ├── __init__.py │ │ │ ├── text_search_match.py │ │ │ ├── search_match.py │ │ │ ├── link_search_results.py │ │ │ ├── search_results.py │ │ │ └── image_search_match.py │ │ ├── link_search_times.py │ │ ├── repostresponse.py │ │ ├── wiki_stats.py │ │ ├── repostmatch.py │ │ ├── image_index_api_result.py │ │ ├── dummy_comment.py │ │ ├── misc_models.py │ │ ├── link_search_settings.py │ │ ├── search_times.py │ │ ├── image_search_settings.py │ │ ├── image_search_times.py │ │ └── search_settings.py │ ├── celery │ │ ├── tasks │ │ │ ├── __init__.py │ │ │ └── maintenance_tasks.py │ │ ├── task_logic │ │ │ ├── __init__.py │ │ │ ├── repost_task_logic.py │ │ │ └── repost_image.py │ │ ├── response_tasks.py │ │ ├── __init__.py │ │ └── app.py │ ├── notification │ │ ├── __init__.py │ │ ├── agent_class_maps.py │ │ ├── notification_agent.py │ │ └── notification_service.py │ ├── util │ │ ├── repost │ │ │ ├── __init__.py │ │ │ └── text_repost.py │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── constants.py │ │ ├── audiohelpers.py │ │ ├── default_bot_config.py │ │ └── objectmapping.py │ ├── __init__.py │ ├── services │ │ ├── __init__.py │ │ ├── managed_subreddit.py │ │ └── reddit_manager.py │ ├── jsonencoders.py │ ├── logfilters.py │ └── logging.py ├── adminsvc │ ├── __init__.py │ ├── scheduled_task_runner.py │ ├── requirements.txt │ ├── banned-subs.md │ ├── Dockerfile │ ├── stats.md │ └── bot_config.md ├── hotpostsvc │ ├── __init__.py │ ├── requirements.txt │ ├── Dockerfile │ └── hotpostsvc.py ├── ingestsvc │ ├── __init__.py │ └── requirements.txt ├── summonssvc │ ├── __init__.py │ ├── commandparsing │ │ ├── __init__.py │ │ ├── argumentparserthrow.py │ │ └── command_parser.py │ └── requirements.txt ├── queue_monitor_svc │ ├── __init__.py │ ├── requirements.txt │ └── queue_monitor.py ├── repostsleuthsiteapi │ ├── __init__.py │ ├── models │ │ └── __init__.py │ ├── util │ │ ├── __init__.py │ │ ├── image_store.py │ │ └── helpers.py │ ├── endpoints │ │ ├── __init__.py │ │ ├── admin │ │ │ ├── __init__.py │ │ │ └── general_admin.py │ │ ├── image_repost_endpoint.py │ │ ├── posts.py │ │ └── image_search_history.py │ ├── Dockerfile │ └── requirements.txt ├── __init__.py ├── submonitorsvc │ ├── __init__.py │ ├── requirements.txt │ └── Dockerfile ├── requirements.txt └── post_import │ └── import_posts_new_db.py ├── alembic ├── README ├── script.py.mako ├── versions-pre-refactor │ ├── cf449578fbf7_init.py │ ├── ae154e44a9b8_add_config_tables.py │ ├── 77a4e176572e_image_repost_index.py │ ├── 18c6ae18a160_index_to_link_repost.py │ ├── 5111c30c2895_voting_column_in_report.py │ ├── 700576ba9d88_subconfigchange.py │ ├── 8e501b1ac31c_edit_image_index_table.py │ ├── cf751ec0db2c_add_is_private_to_monitor_sub.py │ ├── e7b3e28cbe72_repost_modmail_to_monitored_sub.py │ ├── 279f1e8d64eb_notificaiton_to_monitored_sub.py │ ├── 7ecb5b67d5c9_add_filter_removed_matched_to_montiored_.py │ ├── f438cebc0e2e_nsfw_for_subs.py │ ├── e7d28bf7f564_reddit_comment.py │ ├── ee1c9310194b_add_new_indexes.py │ ├── c8f1e18b7ebc_comment_permalink.py │ ├── 319d257ae614_add_template_slug.py │ ├── 380abce0d196_new_monitored_sub_options.py │ ├── 26e2f11e1955_add_annoy_map.py │ ├── 8d3377b141a8_change_monitoredsub_column_names.py │ ├── 31d210ea9abe_add_proxy_table.py │ ├── b2b67bea8e6e_modify_image_post_current_table.py │ ├── 8b4d674b700b_add_site_admin_table.py │ ├── a4829f4a5121_subconfigchange.py │ ├── a53c1ffe8f99_change_meme_vote_f_key.py │ ├── 7332736c6ef4_create_author_index.py │ ├── 126af4529c2d_meme_voting.py │ ├── 505caf95a77e_iamge_search_update.py │ ├── 0fac44af5a9c_add_config_tables.py │ └── cfffe117cd7b_add_stats_image_repost.py └── env.py ├── hero.png ├── .gitignore ├── .dockerignore ├── .travis.yml ├── docs └── dev_docs │ └── modifying_monitored_sub.md ├── wiki ├── banned-subs.md ├── stats.md ├── bot_config.md ├── bot_usage.md ├── add-your-sub.md ├── message_templates.md └── support-sleuth-bot.md ├── docker ├── WorkerDockerFile ├── IngestSvcDockerfile ├── SummonsHandlerDockerfile └── QueueMonitorDockerfile ├── worker-requirements.txt ├── requirements.txt ├── docker-compose-public.yml ├── utility_scripts ├── cleanup_watches.py ├── push_shift_backfill.py ├── push_shift_backfill_beta.py └── ingest_pushshift_archive_pipe.py ├── .github └── workflows │ └── pythonapp.yml ├── CONTRIBUTING.md └── docker-compose-infra.yml /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/adminsvc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/summonssvc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/common/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/celery/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/services/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/submonitorsvc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/notification/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/adminsvc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/uow/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/hotpostsvc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/ingestsvc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/summonssvc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/celery/task_logic/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/summonssvc/commandparsing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/celery/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/events/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/search/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/notification/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/util/repost/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/queue_monitor_svc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /alembic/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /redditrepostsleuth/core/celery/task_logic/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/repostsleuthsiteapi/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/repostsleuthsiteapi/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/repostsleuthsiteapi/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/summonssvc/commandparsing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/celery/response_tasks.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/celery/task_logic/repost_task_logic.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/repostsleuthsiteapi/endpoints/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/repostsleuthsiteapi/endpoints/admin/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/celery/__init__.py: -------------------------------------------------------------------------------- 1 | from .app import celery 2 | 3 | -------------------------------------------------------------------------------- /hero.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/barrycarey/RedditRepostSleuth/HEAD/hero.png -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/__init__.py: -------------------------------------------------------------------------------- 1 | # TODO: Make repository super class -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | temp_scratch 2 | venv 3 | .idea 4 | scratch_files 5 | __pycache__ 6 | .ann 7 | .csv -------------------------------------------------------------------------------- /redditrepostsleuth/queue_monitor_svc/requirements.txt: -------------------------------------------------------------------------------- 1 | redis 2 | influxdb-client 3 | requests -------------------------------------------------------------------------------- /redditrepostsleuth/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | log = logging.getLogger(__name__) 4 | 5 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | log = logging.getLogger(__name__) 4 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/util/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | log = logging.getLogger(__name__) 4 | -------------------------------------------------------------------------------- /redditrepostsleuth/submonitorsvc/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | log = logging.getLogger(__name__) -------------------------------------------------------------------------------- /redditrepostsleuth/core/services/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | log = logging.getLogger(__name__) 4 | -------------------------------------------------------------------------------- /tests/core/test_objectmapping.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | 4 | class Test(TestCase): 5 | pass -------------------------------------------------------------------------------- /tests/core/util/data/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/barrycarey/RedditRepostSleuth/HEAD/tests/core/util/data/demo.jpg -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | *.ann 2 | *.npy 3 | venv 4 | alembic 5 | logs 6 | temp 7 | tests 8 | video 9 | .git 10 | .idea 11 | *.log -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/config_settings_repo.py: -------------------------------------------------------------------------------- 1 | 2 | class ConfigSettingsRepo: 3 | def __init__(self, db_session): 4 | self.db_session = db_session 5 | 6 | -------------------------------------------------------------------------------- /redditrepostsleuth/hotpostsvc/requirements.txt: -------------------------------------------------------------------------------- 1 | praw 2 | sqlalchemy 3 | annoy 4 | pymysql 5 | influxdb 6 | imagehash 7 | distance 8 | redlock 9 | python-Levenshtein 10 | falcon 11 | redlock -------------------------------------------------------------------------------- /redditrepostsleuth/core/notification/agent_class_maps.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.notification.discord_notification_agent import DiscordAgent 2 | 3 | AGENT_MAP = { 4 | 'discord': DiscordAgent 5 | } -------------------------------------------------------------------------------- /redditrepostsleuth/adminsvc/scheduled_task_runner.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | def main(): 5 | pass 6 | 7 | if __name__ == '__main__': 8 | main() 9 | while True: 10 | time.sleep(60) -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.10" 4 | - "3.11" 5 | # command to install dependencies 6 | install: 7 | - pip install -r requirements.txt 8 | # command to run tests 9 | script: 10 | - pytest -------------------------------------------------------------------------------- /redditrepostsleuth/submonitorsvc/requirements.txt: -------------------------------------------------------------------------------- 1 | celery[redis]==4.4.7 2 | praw 3 | pymysql 4 | sqlalchemy 5 | redlock 6 | imagehash 7 | influxdb 8 | annoy 9 | distance 10 | python-Levenshtein 11 | falcon 12 | redlock -------------------------------------------------------------------------------- /redditrepostsleuth/adminsvc/requirements.txt: -------------------------------------------------------------------------------- 1 | pymysql 2 | praw 3 | sqlalchemy 4 | influxdb-client 5 | imagehash 6 | apscheduler 7 | redis 8 | celery[redis] 9 | falcon 10 | redlock 11 | aiohttp 12 | Brotli 13 | influxdb-client -------------------------------------------------------------------------------- /docs/dev_docs/modifying_monitored_sub.md: -------------------------------------------------------------------------------- 1 | 2 | ### Adding or Removing Config Values 3 | * Add / Remove config values in core/db/databasemodels.py 4 | * Add/Remove in core/util/default_bot_config.py 5 | * Update sub_monitor_exposed_config_options in the config json -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/indexbuildtimesrepository.py: -------------------------------------------------------------------------------- 1 | 2 | class IndexBuildTimesRepository: 3 | 4 | def __init__(self, db_session): 5 | self.db_session = db_session 6 | 7 | def add(self, item): 8 | self.db_session.add(item) -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/link_search_times.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.model.search_times import SearchTimes 2 | 3 | 4 | class LinkSearchTimes(SearchTimes): 5 | def __init__(self): 6 | super().__init__() 7 | self.query_time: float = float(0) 8 | 9 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/notification/notification_agent.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | 4 | class NotificationAgent: 5 | def __init__(self, name: Text): 6 | self.name = name 7 | 8 | def send(self, message: Text, **kwargs): 9 | raise NotImplementedError -------------------------------------------------------------------------------- /redditrepostsleuth/ingestsvc/requirements.txt: -------------------------------------------------------------------------------- 1 | celery[redis]==5.3.1 2 | praw==7.7.1 3 | redlock==1.2.0 4 | sqlalchemy==2.0.20 5 | pymysql==1.1.0 6 | imagehash==4.3.1 7 | influxdb-client==1.37.0 8 | aiohttp==3.9.0 9 | sentry-sdk==1.29.2 10 | cryptography==41.0.6 11 | redgifs==1.9.1 -------------------------------------------------------------------------------- /redditrepostsleuth/core/jsonencoders.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from redditrepostsleuth.core.model.search.image_search_results import ImageSearchResults 4 | 5 | 6 | class ImageRepostWrapperEncoder(json.JSONEncoder): 7 | def default(self, o): 8 | if isinstance(o, ImageSearchResults): 9 | return o.to_dict() -------------------------------------------------------------------------------- /wiki/banned-subs.md: -------------------------------------------------------------------------------- 1 | ### Repost Sleuth Ban List 2 | 3 | Below is a list of subs that the bot is currently banned on. Generally it is because the mods do not want bots there. 4 | 5 | This is for information only, please don't harass the mods of any of the below subs. 6 | 7 | --- 8 | Current Total: {total} 9 | --- 10 | {banned_subs} -------------------------------------------------------------------------------- /redditrepostsleuth/summonssvc/requirements.txt: -------------------------------------------------------------------------------- 1 | celery[redis]==5.3.1 2 | sqlalchemy==2.0.20 3 | pymysql==1.1.0 4 | praw==7.7.1 5 | requests==2.31.0 6 | redlock==1.2.0 7 | influxdb-client==1.37.0 8 | imagehash==4.3.1 9 | python-Levenshtein==0.21.1 10 | distance==0.1.3 11 | pydantic==1.10.9 12 | sentry-sdk==1.29.2 13 | cryptography==41.0.6 -------------------------------------------------------------------------------- /docker/WorkerDockerFile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.3-buster 2 | MAINTAINER Barry Carey 3 | 4 | VOLUME /src 5 | COPY worker-requirements.txt /src/requirements.txt 6 | ADD sleuth_config_dev.json /src/sleuth_config.json 7 | ADD redditrepostsleuth /src/redditrepostsleuth 8 | WORKDIR /src 9 | 10 | RUN pip install -r requirements.txt 11 | -------------------------------------------------------------------------------- /worker-requirements.txt: -------------------------------------------------------------------------------- 1 | celery[redis]==5.3.1 2 | sqlalchemy==2.0.20 3 | pymysql==1.1.0 4 | praw==7.7.1 5 | requests==2.31.0 6 | redlock==1.2.0 7 | influxdb-client==1.37.0 8 | imagehash==4.3.1 9 | python-Levenshtein==0.21.1 10 | distance==0.1.3 11 | pydantic==1.10.9 12 | sentry-sdk==1.29.2 13 | pyjwt==2.8.0 14 | cryptography==41.0.6 15 | redgifs==1.9.0 -------------------------------------------------------------------------------- /redditrepostsleuth/adminsvc/banned-subs.md: -------------------------------------------------------------------------------- 1 | ### Repost Sleuth Ban List 2 | 3 | Below is a list of subs that the bot is currently banned on. Generally it is because the mods do not want bots there. 4 | 5 | This is for information only, please don't harass the mods of any of the below subs. 6 | 7 | --- 8 | Current Total: {total} 9 | --- 10 | {banned_subs} -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/repostresponse.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from redditrepostsleuth.core.db.databasemodels import Post, Summons 4 | 5 | 6 | @dataclass 7 | class SummonsResponse: 8 | summons: Summons 9 | message: str = None 10 | comment_reply_id: int = None 11 | pm_reply_id: int = None 12 | reply_failure_reason: str = None 13 | -------------------------------------------------------------------------------- /redditrepostsleuth/summonssvc/commandparsing/argumentparserthrow.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | from typing import Text, NoReturn 3 | 4 | from redditrepostsleuth.core.exception import InvalidCommandException 5 | 6 | 7 | class ArgumentParserThrow(ArgumentParser): 8 | def error(self, message: Text) -> NoReturn: 9 | raise InvalidCommandException(message) 10 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/uow/unitofworkmanager.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.orm import sessionmaker 2 | 3 | from redditrepostsleuth.core.db.uow.unitofwork import UnitOfWork 4 | 5 | 6 | class UnitOfWorkManager: 7 | def __init__(self, db_engine): 8 | self.session_maker = sessionmaker(bind=db_engine, expire_on_commit=False) 9 | def start(self): 10 | return UnitOfWork(self.session_maker) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | celery[redis]==5.3.1 2 | sqlalchemy==2.0.20 3 | pymysql==1.1.0 4 | praw==7.7.1 5 | requests==2.31.0 6 | redlock==1.2.0 7 | influxdb-client==1.37.0 8 | imagehash==4.3.1 9 | python-Levenshtein==0.21.1 10 | distance==0.1.3 11 | pydantic==1.10.9 12 | sentry-sdk==1.29.2 13 | aiohttp==3.9.0 14 | pyjwt==2.8.0 15 | gunicorn==21.2.0 16 | falcon==3.1.1 17 | cryptography==41.0.6 18 | redgifs==1.9.0 -------------------------------------------------------------------------------- /docker/IngestSvcDockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.3-buster 2 | MAINTAINER Barry Carey 3 | 4 | VOLUME /src/ 5 | ADD sleuth_config_dev.json /src/sleuth_config.json 6 | ADD redditrepostsleuth/ingestsvc/requirements.txt /src/ 7 | ADD redditrepostsleuth/ingestsvc/ingestsvc.py /src/ 8 | ADD redditrepostsleuth /src/redditrepostsleuth/ 9 | WORKDIR /src 10 | 11 | RUN pip install -r requirements.txt 12 | -------------------------------------------------------------------------------- /redditrepostsleuth/requirements.txt: -------------------------------------------------------------------------------- 1 | praw 2 | sqlalchemy 3 | alembic 4 | pymysql 5 | pillow 6 | requests 7 | distance 8 | celery[redis]==4.4.7 9 | numpy 10 | dataclasses 11 | flower 12 | eventlet 13 | ImageHash 14 | annoy 15 | RedLock 16 | 17 | requests 18 | gensim 19 | ffmpeg-python 20 | youtube-dl 21 | wavio 22 | pydub==0.20.0 23 | numpy 24 | matplotlib 25 | scipy 26 | numpy 27 | pytesseract 28 | falcon 29 | redlock -------------------------------------------------------------------------------- /docker/SummonsHandlerDockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.3-buster 2 | MAINTAINER Barry Carey 3 | 4 | VOLUME /src/ 5 | ADD redditrepostsleuth /src/redditrepostsleuth/ 6 | ADD sleuth_config_dev.json /src/sleuth_config.json 7 | ADD /redditrepostsleuth/summonssvc/requirements.txt /src 8 | ADD redditrepostsleuth/summonssvc/summons_monitor.py /src 9 | WORKDIR /src 10 | 11 | RUN pip install -r requirements.txt 12 | -------------------------------------------------------------------------------- /docker/QueueMonitorDockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.2-buster 2 | MAINTAINER Barry Carey 3 | 4 | VOLUME /src/ 5 | ADD sleuth_config_dev.json /src/sleuth_config.json 6 | ADD redditrepostsleuth/queue_monitor_svc/requirements.txt /src 7 | ADD redditrepostsleuth/queue_monitor_svc/queue_monitor.py /src 8 | ADD redditrepostsleuth /src/redditrepostsleuth/ 9 | WORKDIR /src 10 | 11 | RUN pip install -r requirements.txt 12 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/post_type_repo.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from redditrepostsleuth.core.db.databasemodels import PostType 4 | 5 | 6 | class PostTypeRepo: 7 | 8 | def __init__(self, db_session): 9 | self.db_session = db_session 10 | 11 | def get_by_name(self, name: str) -> Optional[PostType]: 12 | return self.db_session.query(PostType).filter(PostType.name == name).first() -------------------------------------------------------------------------------- /redditrepostsleuth/adminsvc/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.2-buster 2 | MAINTAINER Barry Carey 3 | 4 | VOLUME /src/ 5 | COPY sleuth_config.json /src/ 6 | COPY /wiki/bot_config.md /src/ 7 | COPY /wiki/banned-subs.md /src/ 8 | COPY /wiki/stats.md /src/ 9 | COPY redditrepostsleuth/adminsvc/requirements.txt /src/ 10 | ADD redditrepostsleuth /src/redditrepostsleuth/ 11 | WORKDIR /src 12 | 13 | RUN pip install -r requirements.txt 14 | -------------------------------------------------------------------------------- /redditrepostsleuth/repostsleuthsiteapi/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.3-buster 2 | MAINTAINER Barry Carey 3 | 4 | VOLUME /src/ 5 | ADD sleuth_config_dev.json /src/sleuth_config.json 6 | COPY /redditrepostsleuth/repostsleuthsiteapi/requirements.txt /src/ 7 | ADD redditrepostsleuth /src/redditrepostsleuth/ 8 | WORKDIR /src 9 | 10 | RUN apt-get update && apt-get install -y \ 11 | libgl1-mesa-glx \ 12 | && pip install -r requirements.txt -------------------------------------------------------------------------------- /redditrepostsleuth/hotpostsvc/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8.7-buster 2 | MAINTAINER Barry Carey 3 | 4 | VOLUME /src/ 5 | COPY sleuth_config.json /src/ 6 | COPY redditrepostsleuth/hotpostsvc/requirements.txt /src/ 7 | ADD redditrepostsleuth /src/redditrepostsleuth/ 8 | WORKDIR /src 9 | 10 | RUN apt-get update && apt-get install -y \ 11 | build-essential \ 12 | libssl-dev \ 13 | libffi-dev \ 14 | python-dev && pip install -r requirements.txt 15 | -------------------------------------------------------------------------------- /redditrepostsleuth/repostsleuthsiteapi/requirements.txt: -------------------------------------------------------------------------------- 1 | falcon==3.1.1 2 | gunicorn==21.2.0 3 | requests==2.31.0 4 | falcon_cors 5 | sqlalchemy==2.0.20 6 | pymysql==1.1.0 7 | python-Levenshtein==0.21.1 8 | distance==0.1.3 9 | influxdb-client==1.37.0 10 | imagehash==4.3.1 11 | praw==7.7.1 12 | opencv-python 13 | imutils 14 | matplotlib 15 | pytesseract 16 | falcon_multipart 17 | redlock==1.2.0 18 | celery[redis]==5.3.1 19 | pydantic==1.10.9 20 | sentry-sdk==1.29.2 21 | cryptography==41.0.6 -------------------------------------------------------------------------------- /redditrepostsleuth/submonitorsvc/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8.7-buster 2 | MAINTAINER Barry Carey 3 | 4 | VOLUME /src/ 5 | COPY sleuth_config.json /src/ 6 | COPY /redditrepostsleuth/submonitorsvc/requirements.txt /src/ 7 | ADD redditrepostsleuth /src/redditrepostsleuth/ 8 | WORKDIR /src 9 | 10 | RUN apt-get update && apt-get install -y \ 11 | build-essential \ 12 | libssl-dev \ 13 | libffi-dev \ 14 | python-dev && pip install -r requirements.txt 15 | 16 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/search/text_search_match.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.db.databasemodels import Post 2 | from redditrepostsleuth.core.model.search.search_match import SearchMatch 3 | 4 | 5 | class TextSearchMatch(SearchMatch): 6 | 7 | def __init__( 8 | self, 9 | post: Post, 10 | distance: float, 11 | title_similarity: int = 0 12 | ): 13 | self.distance = distance 14 | super().__init__(post.url, post, title_similarity) -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/site_admin_repo.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | from redditrepostsleuth.core.db.databasemodels import SiteAdmin 4 | 5 | 6 | class SiteAdminRepo: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | 10 | def get_by_id(self, id: int) -> SiteAdmin: 11 | return self.db_session.query(SiteAdmin).filter(SiteAdmin.id == id).first() 12 | 13 | def get_by_username(self, username: Text) -> SiteAdmin: 14 | return self.db_session.query(SiteAdmin).filter(SiteAdmin.user == username).first() -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/wiki_stats.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Text 3 | 4 | 5 | @dataclass 6 | class WikiStats: 7 | summon_total: int = 0 8 | top_active_user: Text = None 9 | top_active_sub: Text = None 10 | total_image_repost: int = 0 11 | total_link_repost: int = 0 12 | total_posts: int = 0 13 | total_image_posts: int = 0 14 | total_link_posts: int = 0 15 | total_text_posts: int = 0 16 | total_video_posts: int = 0 17 | top_5_active_users: dict[Text, int] = None 18 | top_5_active_subs: dict[Text, int] = None -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/db_utils.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import create_engine 2 | from sqlalchemy.engine.url import URL 3 | 4 | from redditrepostsleuth.core.config import Config 5 | from redditrepostsleuth.core.logging import log 6 | 7 | 8 | def get_db_engine(config: Config): 9 | connection_uri = URL.create( 10 | "mysql+pymysql", 11 | username=config.db_user, 12 | password=config.db_password, 13 | host=config.db_host, 14 | database=config.db_name, 15 | ) 16 | return create_engine(connection_uri, echo=False, pool_size=50, pool_pre_ping=True) 17 | 18 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/repostmatch.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.db.databasemodels import Post 2 | 3 | 4 | # TODO - Remove 5 | class RepostMatch: 6 | def __init__(self): 7 | self.post: Post = None 8 | self.original_id: int = None 9 | self.match_id: int = None 10 | self.title_similarity: int = 0 11 | 12 | def to_dict(self): 13 | return { 14 | 'post': self.post.to_dict(), 15 | 'original_id': self.original_id, 16 | 'match_id': self.match_id, 17 | 'title_similarity': self.title_similarity 18 | } -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/stat_top_repost_repo.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from redditrepostsleuth.core.db.databasemodels import StatsTopRepost 4 | 5 | 6 | class StatTopRepostRepo: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | def add(self, item): 10 | self.db_session.add(item) 11 | 12 | def get_all(self, day_range: int, nsfw: bool = False) -> list[StatsTopRepost]: 13 | return self.db_session.query(StatsTopRepost).filter(StatsTopRepost.post_type_id == 2, StatsTopRepost.day_range == day_range, StatsTopRepost.nsfw == nsfw).all() -------------------------------------------------------------------------------- /alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade(): 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade(): 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/events/ingestsubmissionevent.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.model import InfluxEvent 2 | 3 | 4 | class IngestSubmissionEvent(InfluxEvent): 5 | def __init__(self, event_type: str = None, status: str = None, queue: str = None, post_type: str = None, post_id: str = None): 6 | super().__init__(event_type=event_type, status=status) 7 | self.post_id = post_id 8 | self.post_type = post_type 9 | self.queue = queue 10 | 11 | def get_influx_event(self): 12 | event = super().get_influx_event() 13 | event[0]['tags']['post_type'] = self.post_type 14 | return event -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/image_index_api_result.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class ImageMatch(BaseModel): 7 | id: int 8 | distance: float 9 | 10 | class IndexSearchResult(BaseModel): 11 | index_name: str 12 | hamming_filtered: bool = False 13 | annoy_filtered: bool = False 14 | index_search_time: float = 0 15 | total_time: float = 0 16 | total_searched: int = 0 17 | matches: List[ImageMatch] = [] 18 | 19 | class APISearchResults(BaseModel): 20 | total_searched: int = 0 21 | total_search_time: float = 0 22 | results: List[IndexSearchResult] = [] -------------------------------------------------------------------------------- /redditrepostsleuth/core/util/utils.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import urlparse 2 | 3 | 4 | def build_reddit_query_string(post_ids: list[str]) -> str: 5 | t3_ids = [f't3_{p}' for p in post_ids] 6 | return f'{",".join(t3_ids)}' 7 | 8 | 9 | def get_post_ids_from_reddit_req_url(url: str) -> list[str]: 10 | parsed_url = urlparse(url) 11 | t3_ids = parsed_url.query.replace('id=', '').split(',') 12 | return [id.replace('t3_', '') for id in t3_ids] 13 | 14 | 15 | def build_reddit_req_url(post_ids: list[str]) -> str: 16 | t3_ids = [f't3_{p}' for p in post_ids] 17 | return f'https://api.reddit.com/api/info?id={",".join(t3_ids)}' 18 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/post_hash_repo.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.db.databasemodels import PostHash 2 | 3 | 4 | class PostHashRepo: 5 | def __init__(self, db_session): 6 | self.db_session = db_session 7 | 8 | def find_by_hash_and_type(self, hash: str, hash_type_id: int): 9 | return self.db_session.query(PostHash).filter(PostHash.hash_type_id == hash_type_id, PostHash.hash == hash).all() 10 | 11 | def find_first_hash_by_post_and_type(self, post_id: int, hash_type_id: int): 12 | return self.db_session.query(PostHash).filter(PostHash.post_id == post_id, PostHash.hash_type_id == hash_type_id).first() -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/dummy_comment.py: -------------------------------------------------------------------------------- 1 | from praw import Reddit 2 | 3 | 4 | class DummyComment: 5 | def __init__(self, body: str, subreddit: str, submission_id: str): 6 | self.id = 'hz3pblg' 7 | self.body = body 8 | self.permalink = '/r/mock/bot/comment' 9 | self.submission_id = submission_id 10 | 11 | class DummySubmission: 12 | id = self.submission_id 13 | 14 | class DummySubreddit: 15 | def __init__(self, subreddit: str): 16 | self.display_name = subreddit 17 | 18 | self.submission = DummySubmission() 19 | self.subreddit = DummySubreddit(subreddit) -------------------------------------------------------------------------------- /docker-compose-public.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | api: 4 | container_name: api 5 | user: "1001" 6 | build: 7 | context: . 8 | dockerfile: redditrepostsleuth/repostsleuthsiteapi/Dockerfile 9 | environment: 10 | LOG_LEVEL: INFO 11 | db_user: api 12 | restart: unless-stopped 13 | entrypoint: gunicorn redditrepostsleuth.repostsleuthsiteapi.app --bind 0.0.0.0:8443 --workers 10 --log-level DEBUG 14 | volumes: 15 | - /opt/letsencrypt/etc/letsencrypt/live/www.repostsleuth.com:/config/keys 16 | - /opt/imageuploads:/opt/imageuploads 17 | 18 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/events/response_event.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.model.events.influxevent import InfluxEvent 2 | 3 | 4 | class ResponseEvent(InfluxEvent): 5 | def __init__(self, subreddit, source, event_type=None): 6 | super(ResponseEvent, self).__init__(event_type=event_type) 7 | self.subreddit = subreddit 8 | self.count = 1 9 | self.source = source 10 | 11 | def get_influx_event(self): 12 | event = super().get_influx_event() 13 | event[0]['fields']['count'] = self.count 14 | event[0]['tags']['subreddit'] = self.subreddit 15 | event[0]['tags']['source'] = self.source 16 | return event -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/cf449578fbf7_init.py: -------------------------------------------------------------------------------- 1 | """init 2 | 3 | Revision ID: cf449578fbf7 4 | Revises: 5 | Create Date: 2020-09-08 10:09:40.968872 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'cf449578fbf7' 14 | down_revision = None 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | pass 22 | # ### end Alembic commands ### 23 | 24 | 25 | def downgrade(): 26 | # ### commands auto generated by Alembic - please adjust! ### 27 | pass 28 | # ### end Alembic commands ### 29 | -------------------------------------------------------------------------------- /wiki/stats.md: -------------------------------------------------------------------------------- 1 | ###General Stats 2 | 3 | Last Updated at {last_updated} 4 | 5 | **Times Summoned:** {summon_total} 6 | 7 | **Most Active User:** {top_active_user} 8 | 9 | **Most Active Sub:** {top_active_sub} 10 | 11 | **Image Reposts Found:** {total_image_repost} 12 | 13 | **Link Reposts Found:** {total_link_repost} 14 | 15 | ### Archive Stats 16 | **Total Posts:** {total_posts} 17 | 18 | **Image Posts:** {total_image_posts} 19 | 20 | **Link Posts:** {total_link_posts} 21 | 22 | **Text Posts:** {total_text_posts} 23 | 24 | **Video Posts** {total_video_posts} 25 | 26 | ### Top 5 Most Active Subs 27 | {top_5_active_subs} 28 | 29 | ### Top 5 Most Active Users 30 | {top_5_active_users} -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/meme_hash_repo.py: -------------------------------------------------------------------------------- 1 | from typing import NoReturn 2 | 3 | from redditrepostsleuth.core.db.databasemodels import MemeHash 4 | 5 | 6 | class MemeHashRepo: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | 10 | def add(self, item: MemeHash) -> NoReturn: 11 | self.db_session.add(item) 12 | 13 | def get_by_post_id(self, post_id: str) -> MemeHash: 14 | return self.db_session.query(MemeHash).filter(MemeHash.post_id == post_id).first() 15 | 16 | def get_by_post_ids(self, post_ids: list[str]) -> list[MemeHash]: 17 | return self.db_session.query(MemeHash).filter(MemeHash.post_id.in_(post_ids)).all() -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/stat_daily_count_repo.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from redditrepostsleuth.core.db.databasemodels import StatsDailyCount 4 | 5 | 6 | class StatDailyCountRepo: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | 10 | def add(self, item): 11 | self.db_session.add(item) 12 | 13 | def get_all(self, limit: int = None) -> List[StatsDailyCount]: 14 | return self.db_session.query(StatsDailyCount).order_by(StatsDailyCount.date.desc()).limit(limit).all() 15 | 16 | def get_latest(self) -> StatsDailyCount: 17 | return self.db_session.query(StatsDailyCount).order_by(StatsDailyCount.id.desc()).first() -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/search/search_match.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | from redditrepostsleuth.core.db.databasemodels import Post 4 | 5 | 6 | class SearchMatch: 7 | def __init__( 8 | self, 9 | searched_url: str, 10 | post: Post, 11 | title_similarity: int = 0, 12 | ): 13 | self.title_similarity = title_similarity 14 | self.post = post 15 | self.searched_url = searched_url 16 | 17 | def to_dict(self): 18 | return { 19 | 'searched_url': self.searched_url, 20 | 'post': self.post.to_dict() if self.post else None, 21 | 'title_similarity': self.title_similarity 22 | } -------------------------------------------------------------------------------- /redditrepostsleuth/adminsvc/stats.md: -------------------------------------------------------------------------------- 1 | 2 | ###General Status 3 | 4 | Last Updated at {last_updated} 5 | 6 | **Times Summoned:** {summon_total} 7 | 8 | **Most Active User:** {top_active_user} 9 | 10 | **Most Active Sub:** {top_active_sub} 11 | 12 | **Image Reposts Found:** {total_image_repost} 13 | 14 | **Link Reposts Found:** {total_link_repost} 15 | 16 | ### Archive Stats 17 | **Total Posts:** {total_posts} 18 | 19 | **Image Posts:** {total_image_posts} 20 | 21 | **Link Posts:** {total_link_posts} 22 | 23 | **Text Posts:** {total_text_posts} 24 | 25 | **Video Posts** {total_video_posts} 26 | 27 | ### Top 5 Most Active Subs 28 | {top_5_active_subs} 29 | 30 | ### Top 5 Most Active Users 31 | {top_5_active_users} 32 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/events/RedditAdminActionEvent.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.model.events.influxevent import InfluxEvent 2 | 3 | 4 | class RedditAdminActionEvent(InfluxEvent): 5 | def __init__(self, subreddit: str, action: str, event_type:str = None): 6 | super(RedditAdminActionEvent, self).__init__(event_type=event_type) 7 | self.subreddit = subreddit 8 | self.count = 1 9 | self.action = action 10 | 11 | def get_influx_event(self): 12 | event = super().get_influx_event() 13 | #event[0]['fields']['count'] = self.count 14 | event[0]['tags']['subreddit'] = self.subreddit 15 | event[0]['tags']['action'] = self.action 16 | return event -------------------------------------------------------------------------------- /redditrepostsleuth/repostsleuthsiteapi/endpoints/image_repost_endpoint.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from falcon import Request, Response, HTTPNotFound 4 | 5 | from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager 6 | 7 | 8 | class ImageRepostEndpoint: 9 | def __init__(self, uowm: UnitOfWorkManager): 10 | self.uowm = uowm 11 | 12 | def on_get(self, req: Request, resp: Response): 13 | with self.uowm.start() as uow: 14 | post = uow.posts.get_by_post_id(req.get_param('post_id', required=True)) 15 | if not post: 16 | raise HTTPNotFound('Post not found', f'This post was not found in the Repost Sleuth Database') 17 | resp.body = json.dumps(post.to_dict()) 18 | -------------------------------------------------------------------------------- /utility_scripts/cleanup_watches.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.config import Config 2 | from redditrepostsleuth.core.db.db_utils import get_db_engine 3 | from redditrepostsleuth.core.db.uow.sqlalchemyunitofworkmanager import SqlAlchemyUnitOfWorkManager 4 | 5 | config = Config(r'/home/barry/PycharmProjects/RedditRepostSleuth/sleuth_config.json') 6 | uowm = SqlAlchemyUnitOfWorkManager(get_db_engine(config)) 7 | 8 | with uowm.start() as uow: 9 | watches = uow.repostwatch.get_all() 10 | for watch in watches: 11 | post = uow.posts.get_by_post_id(watch.post_id) 12 | if not post: 13 | print(f'Removing watch {watch.id} for post {watch.post_id}') 14 | uow.repostwatch.remove(watch) 15 | uow.commit() 16 | 17 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/monitored_sub_config_revision_repo.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | from redditrepostsleuth.core.db.databasemodels import MonitoredSubConfigRevision 4 | 5 | 6 | class MonitoredSubConfigRevisionRepo: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | 10 | def add(self, revision: MonitoredSubConfigRevision): 11 | self.db_session.add(revision) 12 | 13 | def update(self, revision: MonitoredSubConfigRevision): 14 | self.db_session.update(revision) 15 | 16 | def get_by_revision_id(self, revision_id: Text) -> MonitoredSubConfigRevision: 17 | return self.db_session.query(MonitoredSubConfigRevision).filter(MonitoredSubConfigRevision.revision_id == revision_id).first() -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/audiofingerprintrepo.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from redditrepostsleuth.core.logging import log 4 | from redditrepostsleuth.core.db.databasemodels import AudioFingerPrint 5 | 6 | 7 | class AudioFingerPrintRepository: 8 | def __init__(self, db_session): 9 | self.db_session = db_session 10 | 11 | def add(self, item: AudioFingerPrint): 12 | self.db_session.add(item) 13 | 14 | def get_by_post_id(self, post_id: str) -> AudioFingerPrint: 15 | return self.db_session.query(AudioFingerPrint).filter(AudioFingerPrint.post_id == post_id).first() 16 | 17 | def bulk_save(self, items: List[AudioFingerPrint]): 18 | log.info('Saving %s audio hashes', len(items)) 19 | self.db_session.bulk_save_objects(items) -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/events/reddit_api_event.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.model.events.influxevent import InfluxEvent 2 | 3 | 4 | class RedditApiEvent(InfluxEvent): 5 | def __init__(self, request_type, response_time, remaining_limit=0, event_type=None): 6 | super(RedditApiEvent, self).__init__(event_type=event_type) 7 | self.request_type = request_type 8 | self.response_time = response_time 9 | self.remaining_limit = remaining_limit 10 | 11 | 12 | def get_influx_event(self): 13 | event = super().get_influx_event() 14 | event[0]['fields']['remaining_limit'] = self.remaining_limit 15 | event[0]['fields']['response_time'] = self.response_time 16 | event[0]['tags']['request_type'] = self.request_type 17 | return event -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/banned_subreddit_repo.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | from redditrepostsleuth.core.db.databasemodels import BannedSubreddit 4 | 5 | 6 | class BannedSubredditRepo: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | 10 | def add(self, item): 11 | self.db_session.add(item) 12 | 13 | def get_by_subreddit(self, name: Text) -> BannedSubreddit: 14 | return self.db_session.query(BannedSubreddit).filter(BannedSubreddit.subreddit == name).first() 15 | 16 | def get_all(self, limit: int = None, offset: int = None): 17 | return self.db_session.query(BannedSubreddit).order_by(BannedSubreddit.subreddit).limit(limit).offset(offset).all() 18 | 19 | def remove(self, item): 20 | self.db_session.delete(item) -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/image_index_map_rep.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | from redditrepostsleuth.core.db.databasemodels import ImageIndexMap 4 | 5 | 6 | class ImageIndexMapRepo: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | 10 | def get_by_id_and_index(self, id: int, index: Text) -> ImageIndexMap: 11 | return self.db_session.query(ImageIndexMap).filter(ImageIndexMap.annoy_index_id == id, ImageIndexMap.index_name == index).first() 12 | 13 | def get_all_in_by_ids_and_index(self, ids: list[int], index: str) -> list[ImageIndexMap]: 14 | return self.db_session.query(ImageIndexMap).filter(ImageIndexMap.annoy_index_id.in_(ids), ImageIndexMap.index_name == index).all() 15 | def add(self, item): 16 | self.db_session.add(item) -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/user_review_repo.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.db.databasemodels import UserReview 2 | 3 | 4 | class UserReviewRepo: 5 | 6 | def __init__(self, db_session): 7 | self.db_session = db_session 8 | 9 | def add(self, item: UserReview): 10 | self.db_session.add(item) 11 | 12 | def get_all(self, limit: int = None) -> list[UserReview]: 13 | return self.db_session.query(UserReview).limit(limit).all() 14 | 15 | def get_all_unchecked(self, limit: int = None) -> list[UserReview]: 16 | return self.db_session.query(UserReview).filter(UserReview.last_checked == None).limit(limit).all() 17 | 18 | def get_by_username(self, username: str) -> UserReview: 19 | return self.db_session.query(UserReview).filter(UserReview.username == username).first() -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/search/link_search_results.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | from redditrepostsleuth.core.db.databasemodels import Post 4 | from redditrepostsleuth.core.model.link_search_times import LinkSearchTimes 5 | from redditrepostsleuth.core.model.search.search_results import SearchResults 6 | from redditrepostsleuth.core.model.search_settings import SearchSettings 7 | 8 | 9 | class LinkSearchResults(SearchResults): 10 | def __init__( 11 | self, 12 | checked_url: Text, 13 | search_settings: SearchSettings, 14 | checked_post: Post = None, 15 | search_times: LinkSearchTimes = None 16 | ): 17 | super().__init__(checked_url, search_settings, checked_post=checked_post) 18 | self.search_times = search_times or LinkSearchTimes() 19 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/subreddit_repo.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from sqlalchemy import or_ 4 | 5 | from redditrepostsleuth.core.db.databasemodels import Subreddit 6 | 7 | 8 | class SubredditRepo: 9 | def __init__(self, db_session): 10 | self.db_session = db_session 11 | 12 | def add(self, item): 13 | self.db_session.add(item) 14 | 15 | def get_by_name(self, name: str): 16 | return self.db_session.query(Subreddit).filter(Subreddit.name == name).first() 17 | 18 | def get_subreddits_to_update(self, limit: int = None, offset: int = None) -> list[Subreddit]: 19 | delta = datetime.datetime.now(datetime.UTC) - datetime.timedelta(days=3) 20 | return self.db_session.query(Subreddit).filter(or_(Subreddit.added_at < delta, Subreddit.last_checked == None)).limit(limit).offset(offset).all() -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/http_proxy_repo.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.db.databasemodels import HttpProxy 2 | 3 | 4 | class HttpProxyRepo: 5 | 6 | def __init__(self, db_session): 7 | self.db_session = db_session 8 | 9 | def add(self, item: HttpProxy): 10 | self.db_session.add(item) 11 | 12 | def get_by_id(self, id: int) -> HttpProxy: 13 | return self.db_session.query(HttpProxy).filter(HttpProxy.id == id).first() 14 | 15 | def get_all_enabled(self) -> list[HttpProxy]: 16 | return self.db_session.query(HttpProxy).filter(HttpProxy.enabled).all() 17 | 18 | def get_all_disabled(self) -> list[HttpProxy]: 19 | return self.db_session.query(HttpProxy).filter(HttpProxy.enabled == False).all() 20 | 21 | def delete_all(self) -> None: 22 | self.db_session.query(HttpProxy).delete() -------------------------------------------------------------------------------- /tests/summonssvc/commandparsing/test_commandParser.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from redditrepostsleuth.core.config import Config 4 | from redditrepostsleuth.core.exception import InvalidCommandException 5 | from redditrepostsleuth.summonssvc.commandparsing.command_parser import CommandParser 6 | 7 | 8 | class TestCommandParser(TestCase): 9 | 10 | def test_parse_root_command__valid_command(self): 11 | parser = CommandParser(config=Config(redis_host='dummy')) 12 | r = parser.parse_root_command('repost -meme -matching tight') 13 | self.assertEqual('repost', r) 14 | 15 | def test_parse_root_command__invalid_command(self): 16 | parser = CommandParser(config=Config(redis_host='dummy')) 17 | self.assertRaises(InvalidCommandException, parser.parse_root_command, 'junk -meme -matching tight') 18 | 19 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/ae154e44a9b8_add_config_tables.py: -------------------------------------------------------------------------------- 1 | """Add config tables 2 | 3 | Revision ID: ae154e44a9b8 4 | Revises: 0fac44af5a9c 5 | Create Date: 2020-10-17 16:34:16.328787 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'ae154e44a9b8' 14 | down_revision = '0fac44af5a9c' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_unique_constraint(None, 'config_message_templates', ['template_name']) 22 | # ### end Alembic commands ### 23 | 24 | 25 | def downgrade(): 26 | # ### commands auto generated by Alembic - please adjust! ### 27 | op.drop_constraint(None, 'config_message_templates', type_='unique') 28 | # ### end Alembic commands ### 29 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/77a4e176572e_image_repost_index.py: -------------------------------------------------------------------------------- 1 | """image repost index 2 | 3 | Revision ID: 77a4e176572e 4 | Revises: ee1c9310194b 5 | Create Date: 2020-10-13 22:08:09.234664 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '77a4e176572e' 14 | down_revision = 'ee1c9310194b' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_index('idx_repost_of_date', 'image_reposts', ['detected_at', 'author'], unique=False) 22 | # ### end Alembic commands ### 23 | 24 | 25 | def downgrade(): 26 | # ### commands auto generated by Alembic - please adjust! ### 27 | op.drop_index('idx_repost_of_date', table_name='image_reposts') 28 | # ### end Alembic commands ### 29 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/18c6ae18a160_index_to_link_repost.py: -------------------------------------------------------------------------------- 1 | """index to link repost 2 | 3 | Revision ID: 18c6ae18a160 4 | Revises: cfffe117cd7b 5 | Create Date: 2020-10-14 11:06:58.799449 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '18c6ae18a160' 14 | down_revision = 'cfffe117cd7b' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_index('idx_repost_of_date', 'link_reposts', ['detected_at', 'author'], unique=False) 22 | # ### end Alembic commands ### 23 | 24 | 25 | def downgrade(): 26 | # ### commands auto generated by Alembic - please adjust! ### 27 | op.drop_index('idx_repost_of_date', table_name='link_reposts') 28 | # ### end Alembic commands ### 29 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/5111c30c2895_voting_column_in_report.py: -------------------------------------------------------------------------------- 1 | """voting column in report 2 | 3 | Revision ID: 5111c30c2895 4 | Revises: 126af4529c2d 5 | Create Date: 2020-11-11 21:48:11.094851 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '5111c30c2895' 14 | down_revision = '126af4529c2d' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('reddit_user_report', sa.Column('sent_for_voting', sa.Boolean(), nullable=True)) 22 | # ### end Alembic commands ### 23 | 24 | 25 | def downgrade(): 26 | # ### commands auto generated by Alembic - please adjust! ### 27 | op.drop_column('reddit_user_report', 'sent_for_voting') 28 | # ### end Alembic commands ### 29 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/700576ba9d88_subconfigchange.py: -------------------------------------------------------------------------------- 1 | """subconfigchange 2 | 3 | Revision ID: 700576ba9d88 4 | Revises: a4829f4a5121 5 | Create Date: 2020-10-10 13:33:35.001655 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '700576ba9d88' 14 | down_revision = 'a4829f4a5121' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.drop_index('subreddit', table_name='reddit_monitored_sub_config_change') 22 | # ### end Alembic commands ### 23 | 24 | 25 | def downgrade(): 26 | # ### commands auto generated by Alembic - please adjust! ### 27 | op.create_index('subreddit', 'reddit_monitored_sub_config_change', ['subreddit'], unique=True) 28 | # ### end Alembic commands ### 29 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/8e501b1ac31c_edit_image_index_table.py: -------------------------------------------------------------------------------- 1 | """edit image index table 2 | 3 | Revision ID: 8e501b1ac31c 4 | Revises: 26e2f11e1955 5 | Create Date: 2022-05-15 12:39:26.599598 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '8e501b1ac31c' 14 | down_revision = '26e2f11e1955' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('image_index_map', sa.Column('reddit_image_post_db_id', sa.Integer(), nullable=False)) 22 | # ### end Alembic commands ### 23 | 24 | 25 | def downgrade(): 26 | # ### commands auto generated by Alembic - please adjust! ### 27 | op.drop_column('image_index_map', 'reddit_image_post_db_id') 28 | # ### end Alembic commands ### 29 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/cf751ec0db2c_add_is_private_to_monitor_sub.py: -------------------------------------------------------------------------------- 1 | """Add is_private to monitor sub 2 | 3 | Revision ID: cf751ec0db2c 4 | Revises: f438cebc0e2e 5 | Create Date: 2021-09-05 22:40:45.347424 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'cf751ec0db2c' 14 | down_revision = 'f438cebc0e2e' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('reddit_monitored_sub', sa.Column('is_private', sa.Boolean(), nullable=True)) 22 | # ### end Alembic commands ### 23 | 24 | 25 | def downgrade(): 26 | # ### commands auto generated by Alembic - please adjust! ### 27 | op.drop_column('reddit_monitored_sub', 'is_private') 28 | # ### end Alembic commands ### 29 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/monitored_sub_config_change_repo.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | from redditrepostsleuth.core.db.databasemodels import MonitoredSubConfigRevision, MonitoredSubConfigChange 4 | 5 | 6 | class MonitoredSubConfigChangeRepo: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | 10 | def add(self, revision: MonitoredSubConfigChange): 11 | self.db_session.add(revision) 12 | 13 | def update(self, revision: MonitoredSubConfigChange): 14 | self.db_session.update(revision) 15 | 16 | def get_all_by_subreddit(self, subreddit: Text, limit: int = None, offset: int = None): 17 | return self.db_session.query(MonitoredSubConfigChange).filter( 18 | MonitoredSubConfigChange.subreddit == subreddit).order_by(MonitoredSubConfigChange.updated_at.desc()).limit( 19 | limit).offset(offset).all() -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/e7b3e28cbe72_repost_modmail_to_monitored_sub.py: -------------------------------------------------------------------------------- 1 | """repost modmail to Monitored sub 2 | 3 | Revision ID: e7b3e28cbe72 4 | Revises: 7ecb5b67d5c9 5 | Create Date: 2021-01-30 08:35:26.788077 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'e7b3e28cbe72' 14 | down_revision = '7ecb5b67d5c9' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('reddit_monitored_sub', sa.Column('send_repost_modmail', sa.Boolean(), nullable=True)) 22 | # ### end Alembic commands ### 23 | 24 | 25 | def downgrade(): 26 | # ### commands auto generated by Alembic - please adjust! ### 27 | op.drop_column('reddit_monitored_sub', 'send_repost_modmail') 28 | # ### end Alembic commands ### 29 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/misc_models.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from enum import Enum, auto 3 | 4 | from redditrepostsleuth.core.db.databasemodels import Post, HttpProxy 5 | 6 | 7 | class JobStatus(Enum): 8 | STARTED = auto() 9 | SUCCESS = auto() 10 | DELETED = auto() 11 | TIMEOUT = auto() 12 | PROXYERROR = auto() 13 | ERROR = auto() 14 | RATELIMIT = auto() 15 | 16 | @dataclass 17 | class BatchedPostRequestJob: 18 | url: str 19 | posts: list[Post] 20 | status: JobStatus 21 | proxy: HttpProxy = None 22 | resp_data: str = None 23 | 24 | @dataclass 25 | class DeleteCheckResult: 26 | to_update: list[int] = field(default_factory=lambda: []) 27 | to_delete: list[str] = field(default_factory=lambda: []) 28 | to_recheck: list[str] = field(default_factory=lambda: []) 29 | 30 | @property 31 | def count(self) -> int: 32 | return len(self.to_update) + len(self.to_delete) + len(self.to_recheck) -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/279f1e8d64eb_notificaiton_to_monitored_sub.py: -------------------------------------------------------------------------------- 1 | """notificaiton to monitored sub 2 | 3 | Revision ID: 279f1e8d64eb 4 | Revises: 8d3377b141a8 5 | Create Date: 2020-12-16 19:26:14.383264 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '279f1e8d64eb' 14 | down_revision = '8d3377b141a8' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('reddit_monitored_sub', sa.Column('activation_notification_sent', sa.Boolean(), nullable=True)) 22 | # ### end Alembic commands ### 23 | 24 | 25 | def downgrade(): 26 | # ### commands auto generated by Alembic - please adjust! ### 27 | op.drop_column('reddit_monitored_sub', 'activation_notification_sent') 28 | # ### end Alembic commands ### 29 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/banned_user_repo.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Text, List 3 | 4 | from redditrepostsleuth.core.db.databasemodels import BannedUser 5 | 6 | 7 | class BannedUserRepo: 8 | def __init__(self, db_session): 9 | self.db_session = db_session 10 | 11 | def add(self, item): 12 | self.db_session.add(item) 13 | 14 | def get_by_user(self, name: Text) -> BannedUser: 15 | return self.db_session.query(BannedUser).filter(BannedUser.name == name).first() 16 | 17 | def get_all(self, limit: int = None, offset: int = None) -> List[BannedUser]: 18 | return self.db_session.query(BannedUser).limit(limit).offset(offset).all() 19 | 20 | def get_expired_bans(self): 21 | return self.db_session.query(BannedUser).filter(BannedUser.expires_at < datetime.utcnow()).all() 22 | 23 | def remove(self, item): 24 | self.db_session.delete(item) -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/7ecb5b67d5c9_add_filter_removed_matched_to_montiored_.py: -------------------------------------------------------------------------------- 1 | """add filter_removed_matched to montiored sub 2 | 3 | Revision ID: 7ecb5b67d5c9 4 | Revises: 380abce0d196 5 | Create Date: 2021-01-14 22:51:35.682060 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '7ecb5b67d5c9' 14 | down_revision = '380abce0d196' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('reddit_monitored_sub', sa.Column('filter_removed_matches', sa.Boolean(), nullable=True)) 22 | # ### end Alembic commands ### 23 | 24 | 25 | def downgrade(): 26 | # ### commands auto generated by Alembic - please adjust! ### 27 | op.drop_column('reddit_monitored_sub', 'filter_removed_matches') 28 | # ### end Alembic commands ### 29 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/events/annoysearchevent.py: -------------------------------------------------------------------------------- 1 | import platform 2 | 3 | from redditrepostsleuth.core.model.events.influxevent import InfluxEvent 4 | from redditrepostsleuth.core.model.image_search_times import ImageSearchTimes 5 | 6 | 7 | class AnnoySearchEvent(InfluxEvent): 8 | def __init__( 9 | self, 10 | search_times: ImageSearchTimes, 11 | source=None, 12 | event_type=None, 13 | ): 14 | super().__init__(event_type=event_type) 15 | self.search_times = search_times 16 | self.source = source 17 | self.hostname = platform.node() 18 | 19 | def get_influx_event(self): 20 | event = super().get_influx_event() 21 | for k, v in self.search_times.to_dict().items(): 22 | event[0]['fields'][k] = v 23 | event[0]['tags']['hostname'] = self.hostname 24 | event[0]['tags']['source'] = self.source 25 | return event -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/f438cebc0e2e_nsfw_for_subs.py: -------------------------------------------------------------------------------- 1 | """nsfw for subs 2 | 3 | Revision ID: f438cebc0e2e 4 | Revises: 505caf95a77e 5 | Create Date: 2021-02-26 08:18:01.651250 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | from sqlalchemy.dialects import mysql 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'f438cebc0e2e' 14 | down_revision = '505caf95a77e' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.drop_column('reddit_image_search', 'search_results') 22 | # ### end Alembic commands ### 23 | 24 | 25 | def downgrade(): 26 | # ### commands auto generated by Alembic - please adjust! ### 27 | op.add_column('reddit_image_search', sa.Column('search_results', mysql.MEDIUMTEXT(charset='utf8mb4', collation='utf8mb4_general_ci'), nullable=True)) 28 | # ### end Alembic commands ### 29 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/videohashrepository.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from redditrepostsleuth.core.logging import log 4 | from redditrepostsleuth.core.db.databasemodels import VideoHash 5 | 6 | 7 | class VideoHashRepository: 8 | def __init__(self, db_session): 9 | self.db_session = db_session 10 | 11 | def get_all(self, limit: int = None, offset: int = None) -> List[VideoHash]: 12 | return self.db_session.query(VideoHash).offset(offset).limit(limit).all() 13 | 14 | def get_by_id(self, id: int) -> VideoHash: 15 | result = self.db_session.query(VideoHash).filter(VideoHash.id == id).first() 16 | return result 17 | 18 | def get_by_post_id(self, id: str) -> VideoHash: 19 | result = self.db_session.query(VideoHash).filter(VideoHash.post_id == id).first() 20 | return result 21 | 22 | def add(self, item): 23 | log.debug('Inserting: %s', item) 24 | self.db_session.add(item) -------------------------------------------------------------------------------- /wiki/bot_config.md: -------------------------------------------------------------------------------- 1 | { 2 | "active": false, 3 | "only_comment_on_repost": true, 4 | "report_reposts": false, 5 | "report_msg": "RepostSleuthBot-Repost", 6 | "same_sub_only": true, 7 | "sticky_comment": false, 8 | "target_days_old": 180, 9 | "meme_filter": false, 10 | "oc_response_template": null, 11 | "repost_response_template": null, 12 | "lock_post": false, 13 | "mark_as_oc": false, 14 | "remove_repost": false, 15 | "removal_reason": null, 16 | "title_ignore_keywords": null, 17 | "disable_summons_after_auto_response": false, 18 | "only_allow_one_summons": false, 19 | "remove_additional_summons": false, 20 | "check_all_submissions": true, 21 | "check_title_similarity": false, 22 | "target_title_match": 50, 23 | "filter_crossposts": true, 24 | "filter_same_author": true, 25 | "wiki_managed": true, 26 | "check_image_posts": true, 27 | "check_link_posts": true, 28 | "target_image_match": 92, 29 | "target_image_meme_match": 97 30 | } -------------------------------------------------------------------------------- /.github/workflows/pythonapp.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v1 12 | - name: Set up Python 3.11 13 | uses: actions/setup-python@v1 14 | with: 15 | python-version: 3.11 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install -r requirements.txt 20 | - name: Lint with flake8 21 | run: | 22 | pip install flake8 23 | # stop the build if there are Python syntax errors or undefined names 24 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 25 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 26 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 27 | - name: Test with pytest 28 | run: | 29 | pip install pytest 30 | pytest 31 | -------------------------------------------------------------------------------- /redditrepostsleuth/adminsvc/bot_config.md: -------------------------------------------------------------------------------- 1 | { 2 | "active": false, 3 | "only_comment_on_repost": true, 4 | "report_reposts": false, 5 | "report_msg": "RepostSleuthBot-Repost", 6 | "same_sub_only": true, 7 | "sticky_comment": false, 8 | "target_days_old": 180, 9 | "meme_filter": false, 10 | "oc_response_template": null, 11 | "repost_response_template": null, 12 | "lock_post": false, 13 | "mark_as_oc": false, 14 | "remove_repost": false, 15 | "removal_reason": null, 16 | "title_ignore_keywords": null, 17 | "disable_summons_after_auto_response": false, 18 | "only_allow_one_summons": false, 19 | "remove_additional_summons": false, 20 | "check_all_submissions": true, 21 | "check_title_similarity": false, 22 | "target_title_match": 50, 23 | "filter_crossposts": true, 24 | "filter_same_author": true, 25 | "wiki_managed": true, 26 | "check_image_posts": true, 27 | "check_link_posts": true, 28 | "target_image_match": 92, 29 | "target_image_meme_match": 97 30 | } -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/events/influxevent.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | class InfluxEvent: 4 | def __init__(self, event_type=None, status: str = None, queue: str = None, rate_limit: int = None, env: str = None): 5 | self.event_type = event_type 6 | self.status = status 7 | self.event_time = datetime.utcnow() 8 | self.queue = queue 9 | self.rate_limit = rate_limit 10 | self.env = env 11 | 12 | 13 | def get_influx_event(self): 14 | return [{ 15 | 'measurement': 'repost_sleuth_stats', 16 | 'fields': { 17 | 'event_time': str(self.event_time), 18 | 'rate_limit': self.rate_limit 19 | 20 | }, 21 | #'time': self.event_time, 22 | 'tags': { 23 | 'event_type': self.event_type, 24 | 'status': self.status, 25 | 'queue': self.queue, 26 | 'env': self.env 27 | } 28 | }] 29 | 30 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/e7d28bf7f564_reddit_comment.py: -------------------------------------------------------------------------------- 1 | """reddit comment 2 | 3 | Revision ID: e7d28bf7f564 4 | Revises: e7b3e28cbe72 5 | Create Date: 2021-02-01 19:26:38.864105 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'e7d28bf7f564' 14 | down_revision = 'e7b3e28cbe72' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('reddit_comments', sa.Column('text_hash', sa.String(length=32), nullable=True)) 22 | op.create_index('idx_comment_hash', 'reddit_comments', ['text_hash'], unique=False) 23 | # ### end Alembic commands ### 24 | 25 | 26 | def downgrade(): 27 | # ### commands auto generated by Alembic - please adjust! ### 28 | op.drop_index('idx_comment_hash', table_name='reddit_comments') 29 | op.drop_column('reddit_comments', 'text_hash') 30 | # ### end Alembic commands ### 31 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/ee1c9310194b_add_new_indexes.py: -------------------------------------------------------------------------------- 1 | """add new indexes 2 | 3 | Revision ID: ee1c9310194b 4 | Revises: 7332736c6ef4 5 | Create Date: 2020-10-12 20:09:42.149805 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'ee1c9310194b' 14 | down_revision = '7332736c6ef4' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_index('idx_detected_at', 'image_reposts', ['detected_at'], unique=False) 22 | op.create_index('idx_detected_at', 'link_reposts', ['detected_at'], unique=False) 23 | # ### end Alembic commands ### 24 | 25 | 26 | def downgrade(): 27 | # ### commands auto generated by Alembic - please adjust! ### 28 | op.drop_index('idx_detected_at', table_name='link_reposts') 29 | op.drop_index('idx_detected_at', table_name='image_reposts') 30 | # ### end Alembic commands ### 31 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/c8f1e18b7ebc_comment_permalink.py: -------------------------------------------------------------------------------- 1 | """comment permalink 2 | 3 | Revision ID: c8f1e18b7ebc 4 | Revises: e7d28bf7f564 5 | Create Date: 2021-02-01 20:09:34.600293 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'c8f1e18b7ebc' 14 | down_revision = 'e7d28bf7f564' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('reddit_comments', sa.Column('perma_link', sa.String(length=300), nullable=True)) 22 | op.create_index('idx_comment_id', 'reddit_comments', ['comment_id'], unique=False) 23 | # ### end Alembic commands ### 24 | 25 | 26 | def downgrade(): 27 | # ### commands auto generated by Alembic - please adjust! ### 28 | op.drop_index('idx_comment_id', table_name='reddit_comments') 29 | op.drop_column('reddit_comments', 'perma_link') 30 | # ### end Alembic commands ### 31 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/memetemplaterepository.py: -------------------------------------------------------------------------------- 1 | from typing import List, Text 2 | 3 | from redditrepostsleuth.core.db.databasemodels import MemeTemplate 4 | 5 | 6 | class MemeTemplateRepository: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | 10 | def add(self, item): 11 | self.db_session.add(item) 12 | 13 | def get_by_id(self, id: int) -> MemeTemplate: 14 | return self.db_session.query(MemeTemplate).filter(MemeTemplate.id == id).first() 15 | 16 | def get_by_post_id(self, id: int) -> MemeTemplate: 17 | return self.db_session.query(MemeTemplate).filter(MemeTemplate.post_id == id).first() 18 | 19 | def get_all(self, limit: int = 100, offset: int = 0) -> List[MemeTemplate]: 20 | return self.db_session.query(MemeTemplate).limit(limit).offset(offset).all() 21 | 22 | def update(self, item: MemeTemplate): 23 | self.db_session.merge(item) 24 | 25 | def remove(self, item: MemeTemplate): 26 | self.db_session.delete(item) -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/config_message_template_repo.py: -------------------------------------------------------------------------------- 1 | from typing import Text, List, NoReturn 2 | 3 | from redditrepostsleuth.core.db.databasemodels import ConfigMessageTemplate 4 | 5 | 6 | class ConfigMessageTemplateRepo: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | 10 | def get_by_name(self, name: Text) -> ConfigMessageTemplate: 11 | return self.db_session.query(ConfigMessageTemplate).filter(ConfigMessageTemplate.template_name == name).first() 12 | 13 | def get_by_id(self, id: int) -> ConfigMessageTemplate: 14 | return self.db_session.query(ConfigMessageTemplate).filter(ConfigMessageTemplate.id == id).first() 15 | 16 | def get_all(self) -> List[ConfigMessageTemplate]: 17 | return self.db_session.query(ConfigMessageTemplate).all() 18 | 19 | def add(self, template: ConfigMessageTemplate): 20 | self.db_session.add(template) 21 | 22 | def remove(self, template: ConfigMessageTemplate) -> NoReturn: 23 | self.db_session.delete(template) 24 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/319d257ae614_add_template_slug.py: -------------------------------------------------------------------------------- 1 | """add template slug 2 | 3 | Revision ID: 319d257ae614 4 | Revises: ae154e44a9b8 5 | Create Date: 2020-10-25 22:37:12.362852 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '319d257ae614' 14 | down_revision = 'ae154e44a9b8' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('config_message_templates', sa.Column('template_slug', sa.String(length=100), nullable=False)) 22 | op.create_unique_constraint(None, 'config_message_templates', ['template_slug']) 23 | # ### end Alembic commands ### 24 | 25 | 26 | def downgrade(): 27 | # ### commands auto generated by Alembic - please adjust! ### 28 | op.drop_constraint(None, 'config_message_templates', type_='unique') 29 | op.drop_column('config_message_templates', 'template_slug') 30 | # ### end Alembic commands ### 31 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/user_whitelist_repo.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from redditrepostsleuth.core.db.databasemodels import UserWhitelist 4 | 5 | 6 | class UserWhitelistRepo: 7 | 8 | def __init__(self, db_session): 9 | self.db_session = db_session 10 | 11 | def add(self, item): 12 | self.db_session.add(item) 13 | 14 | def remove(self, item: UserWhitelist): 15 | self.db_session.delete(item) 16 | 17 | def get_by_id(self, id: int) -> Optional[UserWhitelist]: 18 | return self.db_session.query(UserWhitelist).filter(UserWhitelist.id == id).first() 19 | 20 | def get_by_username_and_subreddit(self, username: str, monitored_sub_id: int) -> Optional[UserWhitelist]: 21 | return self.db_session.query(UserWhitelist).filter(UserWhitelist.username == username, UserWhitelist.monitored_sub_id == monitored_sub_id).first() 22 | 23 | def get_by_username(self, username: str) -> Optional[UserWhitelist]: 24 | return self.db_session.query(UserWhitelist).filter(UserWhitelist.username == username).first() -------------------------------------------------------------------------------- /tests/core/util/test_get_closest_image_match.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from redditrepostsleuth.core.db.databasemodels import Post 4 | from redditrepostsleuth.core.model.search.image_search_match import ImageSearchMatch 5 | from redditrepostsleuth.core.util.repost.repost_helpers import get_closest_image_match 6 | 7 | 8 | class Test_Repost_Helpers(TestCase): 9 | def test_get_closest_image_match__return_closest(self): 10 | matches = [] 11 | match1 = ImageSearchMatch('test.com', 1, Post(id=1), 3, .077, 32) 12 | match2 = ImageSearchMatch('test.com', 1, Post(id=2), 5, .077, 32) 13 | match3 = ImageSearchMatch('test.com', 1, Post(id=3), 7, .077, 32) 14 | matches.append(match1) 15 | matches.append(match2) 16 | matches.append(match3) 17 | 18 | r = get_closest_image_match(matches, validate_url=False) 19 | self.assertEqual(r, match1) 20 | 21 | def test_get_closest_image_match__empty_list(self): 22 | matches = [] 23 | r = get_closest_image_match(matches) 24 | self.assertIsNone(r) 25 | -------------------------------------------------------------------------------- /redditrepostsleuth/repostsleuthsiteapi/endpoints/admin/general_admin.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from falcon import Response, Request, HTTPNotFound 4 | 5 | from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager 6 | from redditrepostsleuth.core.util.reddithelpers import get_user_data 7 | 8 | 9 | class GeneralAdmin: 10 | def __init__(self, uowm: UnitOfWorkManager): 11 | self.uowm = uowm 12 | 13 | def on_get(self, req: Request, resp: Response): 14 | token = req.get_param('token', required=True) 15 | user_data = get_user_data(token) 16 | if not user_data: 17 | raise HTTPNotFound(title=f'No admin found for provided token', 18 | description=f'No admin found for provided token') 19 | with self.uowm.start() as uow: 20 | admin = uow.site_admin.get_by_username(user_data['name']) 21 | 22 | if not admin: 23 | raise HTTPNotFound(title=f'No admin found for provided token', 24 | description=f'No admin found for provided token') 25 | 26 | resp.body = json.dumps(admin.to_dict()) 27 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/investigatepostrepo.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | from redditrepostsleuth.core.db.databasemodels import InvestigatePost 4 | 5 | 6 | class InvestigatePostRepo: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | 10 | def add(self, item: InvestigatePost): 11 | self.db_session.add(item) 12 | 13 | def get_by_id(self, id: int): 14 | return self.db_session.query(InvestigatePost).filter(InvestigatePost.id == id).first() 15 | 16 | def get_by_post_id(self, id: Text) -> InvestigatePost: 17 | return self.db_session.query(InvestigatePost).filter(InvestigatePost.post_id == id).first() 18 | 19 | def get_all(self): 20 | return self.db_session.query(InvestigatePost).filter(InvestigatePost.flag_reason == 'High match meme').order_by(InvestigatePost.matches.desc()).limit(100).all() 21 | 22 | def remove(self, item: InvestigatePost): 23 | self.db_session.delete(item) 24 | 25 | def remove_by_post_id(self, post_id: str) -> None: 26 | self.db_session.query(InvestigatePost).filter(InvestigatePost.post_id == post_id).delete() -------------------------------------------------------------------------------- /wiki/bot_usage.md: -------------------------------------------------------------------------------- 1 | ### Using the bot 2 | To have the bot perform a repost check simply tag the bot in a comment. u/repostsleuthbot 3 | 4 | ### Commands 5 | The bot has several commands available to modify the search results. These can be used in any combination. 6 | 7 | The basic format is 8 | 9 | ```u/repostsleuthbot -age 60 -samesub``` 10 | 11 | ### Base Commands 12 | 13 | #### Image Post Commands 14 | 15 | * -all | Send you a PM with all matches 16 | * -meme | Use the meme filter during the search 17 | * -samesub | Only search within the same Subreddit 18 | * -matching [loose, regular, tight] | Changes how strict the matching threshold is 19 | * -age [days] | Only find matches within this number of days 20 | 21 | #### Link Post Commands 22 | 23 | * -all | Send you a PM with all matches 24 | * -samesub | Only search within the same Subreddit 25 | * -age [days] | Only find matches within this number of days 26 | 27 | **Examples:** 28 | 29 | *Seach for matches no more than 60 days old in this sub* 30 | 31 | ```u/repostsleuthbot -age 60 -samesub``` 32 | 33 | *Find all matches using a strict matching threshold* 34 | 35 | ```u/repostsleuthbot -matching strict -all``` -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/meme_template_potential_votes_repo.py: -------------------------------------------------------------------------------- 1 | from typing import List, Text 2 | 3 | from redditrepostsleuth.core.db.databasemodels import MemeTemplatePotentialVote 4 | 5 | 6 | class MemeTemplatePotentialVoteRepo: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | 10 | def add(self, item: MemeTemplatePotentialVote): 11 | self.db_session.add(item) 12 | 13 | def get_all(self, limit: int = None, offset: int = None) -> List[MemeTemplatePotentialVote]: 14 | return self.db_session.query(MemeTemplatePotentialVote).limit(limit).offset(offset).all() 15 | 16 | def get_by_post_and_user(self, post_id: Text, user: Text) -> List[MemeTemplatePotentialVote]: 17 | return self.db_session.query(MemeTemplatePotentialVote).filter(MemeTemplatePotentialVote.post_id == post_id, 18 | MemeTemplatePotentialVote.user == user).all() 19 | 20 | def remove(self, item: MemeTemplatePotentialVote): 21 | self.db_session.delete(item) 22 | 23 | def update(self, item: MemeTemplatePotentialVote): 24 | self.db_session.merge(item) -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/link_search_settings.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.model.search_settings import SearchSettings 2 | 3 | 4 | class TextSearchSettings(SearchSettings): 5 | """ 6 | Wrapper that contains all settings to be used when searching for a repost 7 | Initial values will be set to sensible defaults if none are provided 8 | """ 9 | def __init__( 10 | self, 11 | target_distance: float, 12 | **kwargs 13 | ): 14 | """ 15 | Settings to use when performing an image search. 16 | When values are not provided sensible defaults are used 17 | :param target_match_percent: Percent threshold a match must meet to be considered 18 | :param target_meme_match_percent: Percent threshold an identified meme must match to be considered 19 | :param target_annoy_distance: Minimum distance from the annoy indiex 20 | :param meme_filter: enable the meme filter when searching 21 | :param kwargs: 22 | """ 23 | super().__init__(**kwargs) 24 | self.target_distance = target_distance 25 | 26 | def to_dict(self): 27 | return {**super().to_dict(), **self.__dict__} -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/380abce0d196_new_monitored_sub_options.py: -------------------------------------------------------------------------------- 1 | """new monitored sub options 2 | 3 | Revision ID: 380abce0d196 4 | Revises: 279f1e8d64eb 5 | Create Date: 2021-01-13 14:39:46.631940 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '380abce0d196' 14 | down_revision = '279f1e8d64eb' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('reddit_monitored_sub', sa.Column('comment_on_oc', sa.Boolean(), nullable=True)) 22 | op.add_column('reddit_monitored_sub', sa.Column('comment_on_repost', sa.Boolean(), nullable=True)) 23 | op.add_column('reddit_monitored_sub', sa.Column('lock_response_comment', sa.Boolean(), nullable=True)) 24 | # ### end Alembic commands ### 25 | 26 | 27 | def downgrade(): 28 | # ### commands auto generated by Alembic - please adjust! ### 29 | op.drop_column('reddit_monitored_sub', 'lock_response_comment') 30 | op.drop_column('reddit_monitored_sub', 'comment_on_repost') 31 | op.drop_column('reddit_monitored_sub', 'comment_on_oc') 32 | # ### end Alembic commands ### 33 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/26e2f11e1955_add_annoy_map.py: -------------------------------------------------------------------------------- 1 | """add annoy map 2 | 3 | Revision ID: 26e2f11e1955 4 | Revises: cf751ec0db2c 5 | Create Date: 2022-04-10 08:14:06.380744 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '26e2f11e1955' 14 | down_revision = 'cf751ec0db2c' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('image_index_map', 22 | sa.Column('id', sa.Integer(), nullable=False), 23 | sa.Column('annoy_index_id', sa.Integer(), nullable=False), 24 | sa.Column('reddit_post_db_id', sa.Integer(), nullable=False), 25 | sa.Column('index_name', sa.String(length=10), nullable=False), 26 | sa.PrimaryKeyConstraint('id') 27 | ) 28 | op.create_index('id_map', 'image_index_map', ['annoy_index_id', 'index_name'], unique=False) 29 | # ### end Alembic commands ### 30 | 31 | 32 | def downgrade(): 33 | # ### commands auto generated by Alembic - please adjust! ### 34 | op.drop_index('id_map', table_name='image_index_map') 35 | op.drop_table('image_index_map') 36 | # ### end Alembic commands ### 37 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/search_times.py: -------------------------------------------------------------------------------- 1 | import json 2 | from time import perf_counter 3 | from typing import Text 4 | 5 | from redditrepostsleuth.core.logging import log 6 | 7 | 8 | class SearchTimes: 9 | def __init__(self): 10 | self._timers = [] 11 | self.total_search_time: float = float(0) 12 | self.total_filter_time: float = float(0) 13 | self.set_title_similarity_time: float = float(0) 14 | 15 | def __repr__(self): 16 | return json.dumps(self.to_dict()) 17 | 18 | def start_timer(self, name: Text): 19 | self._timers.append({ 20 | 'name': name, 21 | 'start': perf_counter() 22 | }) 23 | 24 | def stop_timer(self, name: Text): 25 | timer = next((x for x in self._timers if x['name'] == name), None) 26 | if not timer: 27 | log.error('Failed to find timer %s', name) 28 | if hasattr(self, name): 29 | setattr(self, name, round(perf_counter() - timer['start'], 5)) 30 | 31 | def to_dict(self): 32 | return { 33 | 'total_search_time': self.total_search_time, 34 | 'total_filter_time': self.total_filter_time, 35 | 'set_title_similarity_time': self.set_title_similarity_time 36 | } -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/8d3377b141a8_change_monitoredsub_column_names.py: -------------------------------------------------------------------------------- 1 | """change MonitoredSub column names 2 | 3 | Revision ID: 8d3377b141a8 4 | Revises: a53c1ffe8f99 5 | Create Date: 2020-12-16 18:32:30.175761 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '8d3377b141a8' 14 | down_revision = 'a53c1ffe8f99' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('reddit_monitored_sub', sa.Column('failed_admin_check_count', sa.Integer(), nullable=True)) 22 | op.add_column('reddit_monitored_sub', sa.Column('only_comment_on_repost', sa.Boolean(), nullable=True)) 23 | op.add_column('reddit_monitored_sub', sa.Column('report_reposts', sa.Boolean(), nullable=True)) 24 | # ### end Alembic commands ### 25 | 26 | 27 | def downgrade(): 28 | # ### commands auto generated by Alembic - please adjust! ### 29 | op.drop_column('reddit_monitored_sub', 'report_reposts') 30 | op.drop_column('reddit_monitored_sub', 'only_comment_on_repost') 31 | op.drop_column('reddit_monitored_sub', 'failed_admin_check_count') 32 | # ### end Alembic commands ### 33 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/31d210ea9abe_add_proxy_table.py: -------------------------------------------------------------------------------- 1 | """add proxy table 2 | 3 | Revision ID: 31d210ea9abe 4 | Revises: eca6fa2ea7a2 5 | Create Date: 2023-03-10 19:19:35.926649 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '31d210ea9abe' 14 | down_revision = 'eca6fa2ea7a2' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('http_proxy', 22 | sa.Column('id', sa.Integer(), nullable=False), 23 | sa.Column('address', sa.String(length=23), nullable=False), 24 | sa.Column('enabled', sa.Boolean(), nullable=True), 25 | sa.Column('provider', sa.String(length=50), nullable=False), 26 | sa.Column('cooldown_expire', sa.DateTime(), nullable=True), 27 | sa.Column('times_used', sa.Integer(), nullable=False), 28 | sa.Column('successive_failures', sa.Integer(), nullable=False), 29 | sa.PrimaryKeyConstraint('id') 30 | ) 31 | # ### end Alembic commands ### 32 | 33 | 34 | def downgrade(): 35 | # ### commands auto generated by Alembic - please adjust! ### 36 | op.drop_table('http_proxy') 37 | # ### end Alembic commands ### 38 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/services/managed_subreddit.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | from sqlalchemy.exc import IntegrityError 4 | 5 | from redditrepostsleuth.core.db.databasemodels import MonitoredSub 6 | from redditrepostsleuth.core.db.uow.unitofwork import UnitOfWork 7 | from redditrepostsleuth.core.logging import log 8 | from redditrepostsleuth.core.util.default_bot_config import DEFAULT_CONFIG_VALUES 9 | 10 | 11 | def create_monitored_sub_in_db(subreddit_name: Text, uow: UnitOfWork, wiki_managed: bool = False) -> MonitoredSub: 12 | 13 | monitored_sub = MonitoredSub(name=subreddit_name) 14 | for k,v in DEFAULT_CONFIG_VALUES.items(): 15 | if hasattr(monitored_sub, k): 16 | setattr(monitored_sub, k, v) 17 | monitored_sub.wiki_managed = wiki_managed 18 | uow.monitored_sub.add(monitored_sub) 19 | try: 20 | uow.commit() 21 | log.info('Sub %s added as monitored sub', subreddit_name) 22 | except IntegrityError as e: 23 | # TODO - This can be pulled since we're checking during activation 24 | log.error('Failed to create monitored sub for %s. It already exists', subreddit_name, exc_info=True) 25 | except Exception as e: 26 | log.exception('Unknown exception saving monitored sub', exc_info=True) 27 | raise 28 | 29 | return monitored_sub -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/b2b67bea8e6e_modify_image_post_current_table.py: -------------------------------------------------------------------------------- 1 | """Modify image post current table 2 | 3 | Revision ID: b2b67bea8e6e 4 | Revises: 8e501b1ac31c 5 | Create Date: 2022-06-07 18:27:55.510427 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | from sqlalchemy.dialects import mysql 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'b2b67bea8e6e' 14 | down_revision = '8e501b1ac31c' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | 22 | op.add_column('reddit_image_post_current', sa.Column('reddit_image_post_db_id', sa.Integer(), nullable=True)) 23 | op.add_column('reddit_image_post_current', sa.Column('reddit_post_db_id', sa.Integer(), nullable=True)) 24 | op.drop_column('reddit_image_post_current', 'dhash_v') 25 | # ### end Alembic commands ### 26 | 27 | 28 | def downgrade(): 29 | # ### commands auto generated by Alembic - please adjust! ### 30 | op.add_column('reddit_image_post_current', sa.Column('dhash_v', mysql.VARCHAR(length=64), nullable=True)) 31 | op.drop_column('reddit_image_post_current', 'reddit_post_db_id') 32 | op.drop_column('reddit_image_post_current', 'reddit_image_post_db_id') 33 | # ### end Alembic commands ### 34 | -------------------------------------------------------------------------------- /redditrepostsleuth/repostsleuthsiteapi/util/image_store.py: -------------------------------------------------------------------------------- 1 | import io 2 | import mimetypes 3 | import os 4 | import re 5 | import uuid 6 | 7 | 8 | class ImageStore: 9 | _CHUNK_SIZE_BYTES = 4096 10 | _IMAGE_NAME_PATTERN = re.compile( 11 | '[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.[a-z]{2,4}$' 12 | ) 13 | 14 | def __init__(self, storage_path, uuidgen=uuid.uuid4, fopen=io.open): 15 | self._storage_path = storage_path 16 | self._uuidgen = uuidgen 17 | self._fopen = fopen 18 | 19 | def save(self, image_stream, image_content_type): 20 | ext = mimetypes.guess_extension(image_content_type) 21 | name = '{uuid}{ext}'.format(uuid=self._uuidgen(), ext='.png') 22 | image_path = os.path.join(self._storage_path, name) 23 | 24 | with self._fopen(image_path, 'wb') as image_file: 25 | image_file.write(image_stream.read()) 26 | 27 | return name 28 | 29 | def open(self, name): 30 | # Always validate untrusted input! 31 | if not self._IMAGE_NAME_PATTERN.match(name): 32 | raise IOError('File not found') 33 | 34 | image_path = os.path.join(self._storage_path, name) 35 | stream = self._fopen(image_path, 'rb') 36 | content_length = os.path.getsize(image_path) 37 | 38 | return stream, content_length -------------------------------------------------------------------------------- /redditrepostsleuth/core/logfilters.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | class SingleLevelFilter(logging.Filter): 5 | def __init__(self, passlevel, above=True): 6 | self.passlevel = passlevel 7 | self.above = above 8 | 9 | def filter(self, record): 10 | if self.above: 11 | return record.levelno >= self.passlevel 12 | else: 13 | return record.levelno <= self.passlevel 14 | 15 | 16 | class ContextFilter(logging.Filter): 17 | 18 | def __init__(self): 19 | super().__init__() 20 | self.trace_id = 'None' 21 | self.post_id = 'None' 22 | self.subreddit = 'None' 23 | self.service = None 24 | 25 | def filter(self, record): 26 | record.trace_id = self.trace_id 27 | record.post_id = self.post_id 28 | record.subreddit = self.subreddit 29 | record.service = self.service 30 | return True 31 | 32 | 33 | class IngestContextFilter(ContextFilter): 34 | 35 | def __init__(self): 36 | super().__init__() 37 | self.post_type = None 38 | 39 | def filter(self, record): 40 | record.post_id = self.post_id 41 | record.subreddit = self.subreddit 42 | record.service = self.service 43 | record.post_type = self.post_type 44 | record.trace_id = self.trace_id 45 | return True 46 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/stats_top_reposter_repo.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import func 2 | 3 | from redditrepostsleuth.core.db.databasemodels import StatsTopReposter 4 | 5 | 6 | class StatTopReposterRepo: 7 | 8 | def __init__(self, db_session): 9 | self.db_session = db_session 10 | 11 | def add(self, item): 12 | self.db_session.add(item) 13 | 14 | def get_total_reposts_by_author_and_day_range(self, author: str, day_range: int) -> StatsTopReposter: 15 | res = self.db_session.query(func.sum(StatsTopReposter.repost_count)).filter(StatsTopReposter.author == author, StatsTopReposter.day_range == day_range).one() 16 | return res[0] 17 | def get_by_author_post_type_and_range(self, author: str, post_type_id: int, day_range: int) -> list[StatsTopReposter]: 18 | return self.db_session.query(StatsTopReposter).filter(StatsTopReposter.post_type_id == post_type_id, 19 | StatsTopReposter.day_range == day_range, 20 | StatsTopReposter.author == author).first() 21 | 22 | def get_by_post_type_and_range(self, post_type_id: int, day_range: int): 23 | return self.db_session.query(StatsTopReposter).filter(StatsTopReposter.day_range == day_range, StatsTopReposter.post_type_id == post_type_id).all() -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/8b4d674b700b_add_site_admin_table.py: -------------------------------------------------------------------------------- 1 | """add site admin table 2 | 3 | Revision ID: 8b4d674b700b 4 | Revises: 319d257ae614 5 | Create Date: 2020-10-26 20:37:10.313374 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '8b4d674b700b' 14 | down_revision = '319d257ae614' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('site_admin', 22 | sa.Column('id', sa.Integer(), nullable=False), 23 | sa.Column('user', sa.String(length=100), nullable=False), 24 | sa.Column('super_user', sa.Boolean(), nullable=True), 25 | sa.Column('created_at', sa.DateTime(), nullable=False), 26 | sa.Column('updated_at', sa.DateTime(), nullable=True), 27 | sa.PrimaryKeyConstraint('id'), 28 | sa.UniqueConstraint('user') 29 | ) 30 | op.add_column('reddit_monitored_sub_config_change', sa.Column('config_key', sa.String(length=100), nullable=False)) 31 | # ### end Alembic commands ### 32 | 33 | 34 | def downgrade(): 35 | # ### commands auto generated by Alembic - please adjust! ### 36 | op.drop_column('reddit_monitored_sub_config_change', 'config_key') 37 | op.drop_table('site_admin') 38 | # ### end Alembic commands ### 39 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/celery/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from billiard.exceptions import WorkerLostError 4 | from celery import Celery, signals 5 | from celery.signals import after_setup_logger 6 | from kombu.serialization import registry 7 | from prawcore import TooManyRequests 8 | 9 | from redditrepostsleuth.core.exception import IngestHighMatchMeme, ImageConversionException 10 | 11 | registry.enable('pickle') 12 | celery = Celery('tasks') 13 | celery.config_from_object('redditrepostsleuth.core.celery.celeryconfig') 14 | 15 | 16 | 17 | if os.getenv('SENTRY_DNS', None): 18 | print('Sentry DNS set, loading Sentry module') 19 | 20 | @signals.celeryd_init.connect 21 | def init_sentry(**_kwargs): 22 | from sentry_sdk.integrations.celery import CeleryIntegration 23 | import sentry_sdk 24 | sentry_sdk.init( 25 | dsn=os.getenv('SENTRY_DNS', None), 26 | environment=os.getenv('RUN_ENV', 'dev'), 27 | integrations=[ 28 | CeleryIntegration( 29 | monitor_beat_tasks=True, 30 | ), 31 | ], 32 | ignore_errors=[IngestHighMatchMeme, ImageConversionException, WorkerLostError, TooManyRequests] 33 | ) 34 | 35 | @after_setup_logger.connect 36 | def setup_loggers(logger, *args, **kwargs): 37 | logger.handlers = [] 38 | 39 | if __name__ == '__main__': 40 | celery.start() 41 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/util/constants.py: -------------------------------------------------------------------------------- 1 | USER_AGENTS = [ 2 | 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36', 3 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 4 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0', 5 | 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', 6 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A19', 7 | 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25' 8 | ] 9 | 10 | GENERIC_REQ_HEADERS = { 11 | 'Accept': '*/*', 12 | 'Accept-Encoding': 'gzip, deflate, br', 13 | 'Accept-Language': 'en-US,en;q=0.5', 14 | 'Connection': 'keep-alive', 15 | 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36" 16 | } 17 | 18 | GENERIC_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36' 19 | 20 | REDDIT_REMOVAL_REASONS = ['deleted', 'author', 'reddit', 'copyright_takedown', 'content_takedown'] 21 | 22 | EXCLUDE_FROM_TOP_REPOSTERS = [ 23 | 'AutoModerator', 24 | 'AutoNewspaperAdmin', 25 | ] -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/monitoredsubcheckrepository.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | from typing import Text 3 | 4 | from sqlalchemy import func 5 | 6 | from redditrepostsleuth.core.db.databasemodels import MonitoredSubChecks 7 | 8 | 9 | class MonitoredSubCheckRepository: 10 | def __init__(self, db_session): 11 | self.db_session = db_session 12 | 13 | def add(self, item): 14 | self.db_session.add(item) 15 | 16 | def get_by_id(self, id: int) -> MonitoredSubChecks: 17 | return self.db_session.query(MonitoredSubChecks).filter(MonitoredSubChecks.post_id == id).first() 18 | 19 | def get_by_subreddit(self, monitored_sub_id: int, limit: int = 20, offset: int = None): 20 | return self.db_session.query(MonitoredSubChecks).filter(MonitoredSubChecks.monitored_sub_id == monitored_sub_id).order_by(MonitoredSubChecks.checked_at.desc()).limit(limit).offset(offset).all() 21 | 22 | def remove(self, item: MonitoredSubChecks): 23 | self.db_session.delete(item) 24 | 25 | def get_count_by_subreddit(self, monitored_sub_id: int, hours: int = None): 26 | query = self.db_session.query(func.count(MonitoredSubChecks.id)).filter(MonitoredSubChecks.monitored_sub_id == monitored_sub_id) 27 | if hours: 28 | query = query.filter(MonitoredSubChecks.checked_at > (datetime.now() - timedelta(hours=hours))) 29 | return query.first() -------------------------------------------------------------------------------- /redditrepostsleuth/summonssvc/commandparsing/command_parser.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | from redditrepostsleuth.core.config import Config 4 | from redditrepostsleuth.core.logging import log 5 | from redditrepostsleuth.summonssvc.commandparsing.argumentparserthrow import ArgumentParserThrow 6 | 7 | 8 | class CommandParser: 9 | def __init__(self, config: Config = None): 10 | if config: 11 | self.config = config 12 | else: 13 | self.config = Config() 14 | 15 | def parse_watch_command(self, cmd: Text): 16 | pass 17 | 18 | def parse_root_command(self, command: str): 19 | if not command: 20 | log.error('Got empty command. Returning repost') 21 | return 'repost' 22 | parser = ArgumentParserThrow() 23 | parser.add_argument('command', default=None, choices=['repost', 'watch', 'unwatch']) 24 | options, args = parser.parse_known_args(command.split(' ')) 25 | return options.command 26 | 27 | def _get_hamming_from_strictness(self, strictness: Text) -> int: 28 | if strictness is None: 29 | return 30 | 31 | if strictness == 'loose': 32 | # TODO - What happens if this isn't set in config? 33 | return self.config.summons_match_strictness_loose 34 | elif strictness == 'tight': 35 | return self.config.summons_match_strictness_tight 36 | else: 37 | return self.config.default_hamming_distance 38 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/monitoredsubrepository.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from sqlalchemy import func 4 | 5 | from redditrepostsleuth.core.db.databasemodels import MonitoredSub 6 | 7 | 8 | class MonitoredSubRepository: 9 | def __init__(self, db_session): 10 | self.db_session = db_session 11 | 12 | def add(self, item): 13 | self.db_session.add(item) 14 | 15 | def get_all(self, limit: int = None) -> List[MonitoredSub]: 16 | return self.db_session.query(MonitoredSub).order_by(MonitoredSub.subscribers.asc()).limit(limit).all() 17 | 18 | def get_all_active(self, limit: int = None) -> List[MonitoredSub]: 19 | return self.db_session.query(MonitoredSub).filter(MonitoredSub.active == True).order_by(MonitoredSub.subscribers.desc()).limit(limit).all() 20 | 21 | def get_by_id(self, id: int) -> MonitoredSub: 22 | return self.db_session.query(MonitoredSub).filter(MonitoredSub.id == id).first() 23 | 24 | def get_by_sub(self, sub: str) -> MonitoredSub: 25 | return self.db_session.query(MonitoredSub).filter(MonitoredSub.name == sub).first() 26 | 27 | def get_count(self): 28 | r = self.db_session.query(func.count(MonitoredSub.id)).first() 29 | return r[0] if r else None 30 | 31 | def update(self, item: MonitoredSub): 32 | self.db_session.merge(item) 33 | 34 | def remove(self, item: MonitoredSub): 35 | self.db_session.delete(item) 36 | 37 | def refresh(self, item: MonitoredSub): 38 | self.db_session.refresh(item) -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Thanks for considering contributing to Repost Sleuth Bot! 4 | 5 | ## Opening issues 6 | 7 | If you find a bug, please feel free to [open an issue](https://github.com/barrycarey/RedditRepostSleuth/default/issues). 8 | 9 | If you taking the time to mention a problem, even a seemingly minor one, it is greatly appreciated, and a totally valid contribution to this project. Thank you! 10 | 11 | ## Fixing bugs 12 | 13 | We love pull requests. Here’s a quick guide: 14 | 15 | 1. [Fork this repository](https://github.com/barrycarey/RedditRepostSleuth/default/fork) and then clone it locally: 16 | 17 | ```bash 18 | git clone https://github.com/kennethormandy/default 19 | ``` 20 | 21 | 2. Create a topic branch for your changes: 22 | 23 | ```bash 24 | git checkout -b fix-for-that-thing 25 | ``` 26 | 3. Commit a failing test for the bug: 27 | 28 | ```bash 29 | git commit -am "Adds a failing test to demonstrate that thing" 30 | ``` 31 | 32 | 4. Commit a fix that makes the test pass: 33 | 34 | ```bash 35 | git commit -am "Adds a fix for that thing!" 36 | ``` 37 | 38 | 5. If everything looks good, push to your fork: 39 | 40 | ```bash 41 | git push origin fix-for-that-thing 42 | ``` 43 | 44 | 6. [Submit a pull request.](https://help.github.com/articles/creating-a-pull-request) 45 | 46 | 7. Enjoy being the wonderful person you are 47 | 48 | ## Adding new features 49 | 50 | Thinking of adding a new feature? Cool! [Open an issue](https://github.com/barrycarey/RedditRepostSleuth/default/issues) and let’s design it together. -------------------------------------------------------------------------------- /tests/core/services/test_reponse_handler.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from unittest.mock import Mock 3 | 4 | from redditrepostsleuth.core.services.response_handler import ResponseHandler 5 | 6 | 7 | class TestResponseHandler(TestCase): 8 | 9 | def test_send_mod_mail_invalid_subreddit(self): 10 | subreddit_mock = Mock(return_value=None) 11 | reddit_mock = Mock( 12 | subreddit=subreddit_mock 13 | ) 14 | response_handler = ResponseHandler( 15 | reddit_mock, 16 | Mock(), 17 | Mock(), 18 | Mock() 19 | ) 20 | response_handler.send_mod_mail('test', 'test', 'test') 21 | subreddit_mock.assert_called() 22 | 23 | def test_send_mod_mail_valid_subreddit(self): 24 | message_mock = Mock(return_value=None) 25 | subreddit_mock = Mock(message=message_mock) 26 | get_subreddit = Mock(return_value=subreddit_mock) 27 | reddit_mock = Mock( 28 | subreddit=get_subreddit 29 | ) 30 | response_handler = ResponseHandler( 31 | reddit_mock, 32 | Mock(), 33 | Mock(), 34 | live_response=True 35 | ) 36 | response_handler._save_private_message = Mock(return_value=None) 37 | response_handler.send_mod_mail('test subreddit', 'test body', 'test subject') 38 | get_subreddit.assert_called_with('test subreddit') 39 | message_mock.assert_called_with('test subject', 'test body') 40 | response_handler._save_private_message.assert_called() 41 | 42 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/a4829f4a5121_subconfigchange.py: -------------------------------------------------------------------------------- 1 | """subconfigchange 2 | 3 | Revision ID: a4829f4a5121 4 | Revises: cf449578fbf7 5 | Create Date: 2020-10-10 13:31:17.602780 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'a4829f4a5121' 14 | down_revision = 'cf449578fbf7' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('reddit_monitored_sub_config_change', 22 | sa.Column('id', sa.Integer(), nullable=False), 23 | sa.Column('updated_at', sa.DateTime(), nullable=False), 24 | sa.Column('updated_by', sa.String(length=100), nullable=False), 25 | sa.Column('source', sa.String(length=10), nullable=True), 26 | sa.Column('subreddit', sa.String(length=200), nullable=False), 27 | sa.Column('old_value', sa.String(length=2000), nullable=True), 28 | sa.Column('new_value', sa.String(length=2000), nullable=True), 29 | sa.PrimaryKeyConstraint('id'), 30 | sa.UniqueConstraint('subreddit') 31 | ) 32 | op.create_index('idx_subreddit', 'reddit_monitored_sub_config_change', ['subreddit', 'updated_at'], unique=False) 33 | # ### end Alembic commands ### 34 | 35 | 36 | def downgrade(): 37 | # ### commands auto generated by Alembic - please adjust! ### 38 | op.drop_index('idx_subreddit', table_name='reddit_monitored_sub_config_change') 39 | op.drop_table('reddit_monitored_sub_config_change') 40 | # ### end Alembic commands ### 41 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/a53c1ffe8f99_change_meme_vote_f_key.py: -------------------------------------------------------------------------------- 1 | """change meme vote F key 2 | 3 | Revision ID: a53c1ffe8f99 4 | Revises: 5111c30c2895 5 | Create Date: 2020-11-20 08:25:42.939220 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'a53c1ffe8f99' 14 | down_revision = '5111c30c2895' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('meme_template_potential_votes', sa.Column('meme_template_potential_id', sa.Integer(), nullable=True)) 22 | 23 | op.drop_constraint('meme_template_potential_votes_ibfk_1', 'meme_template_potential_votes', type_='foreignkey') 24 | op.create_foreign_key(None, 'meme_template_potential_votes', 'meme_template_potential', ['meme_template_potential_id'], ['id']) 25 | op.drop_index('post_id', table_name='meme_template_potential_votes') 26 | # ### end Alembic commands ### 27 | 28 | 29 | def downgrade(): 30 | # ### commands auto generated by Alembic - please adjust! ### 31 | op.drop_constraint(None, 'meme_template_potential_votes', type_='foreignkey') 32 | op.create_foreign_key('meme_template_potential_votes_ibfk_1', 'meme_template_potential_votes', 'meme_template_potential', ['post_id'], ['post_id']) 33 | op.create_index('post_id', 'meme_template_potential_votes', ['post_id'], unique=True) 34 | op.drop_column('meme_template_potential_votes', 'meme_template_potential_id') 35 | # ### end Alembic commands ### 36 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/image_search_settings.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.model.search_settings import SearchSettings 2 | 3 | 4 | class ImageSearchSettings(SearchSettings): 5 | """ 6 | Wrapper that contains all settings to be used when searching for a repost 7 | Initial values will be set to sensible defaults if none are provided 8 | """ 9 | def __init__( 10 | self, 11 | target_match_percent: float, 12 | target_annoy_distance: float, 13 | target_meme_match_percent: float = None, 14 | meme_filter: bool = False, 15 | max_depth: int = 4000, 16 | **kwargs 17 | ): 18 | """ 19 | Settings to use when performing an image search. 20 | When values are not provided sensible defaults are used 21 | :param target_match_percent: Percent threshold a match must meet to be considered 22 | :param target_meme_match_percent: Percent threshold an identified meme must match to be considered 23 | :param target_annoy_distance: Minimum distance from the annoy indiex 24 | :param meme_filter: enable the meme filter when searching 25 | :param kwargs: 26 | """ 27 | super().__init__(**kwargs) 28 | self.max_depth = max_depth 29 | self.meme_filter = meme_filter 30 | self.target_annoy_distance = target_annoy_distance 31 | self.target_meme_match_percent = target_meme_match_percent 32 | self.target_match_percent = target_match_percent 33 | 34 | def to_dict(self): 35 | return {**super().to_dict(), **self.__dict__} -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/meme_template_potential_repo.py: -------------------------------------------------------------------------------- 1 | from typing import List, Text 2 | 3 | from redditrepostsleuth.core.db.databasemodels import MemeTemplatePotential 4 | 5 | 6 | class MemeTemplatePotentialRepo: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | 10 | def add(self, item: MemeTemplatePotential): 11 | self.db_session.add(item) 12 | 13 | def get_all(self, limit: int = None, offset: int = None) -> List[MemeTemplatePotential]: 14 | return self.db_session.query(MemeTemplatePotential).order_by(MemeTemplatePotential.id.desc()).limit(limit).offset(offset).all() 15 | 16 | def get_by_id(self, id: int) -> MemeTemplatePotential: 17 | return self.db_session.query(MemeTemplatePotential).filter(MemeTemplatePotential.id == id).first() 18 | 19 | def get_by_post_id(self, id: int) -> MemeTemplatePotential: 20 | return self.db_session.query(MemeTemplatePotential).filter(MemeTemplatePotential.post_id == id).first() 21 | 22 | def get_with_more_votes_than(self, total: int) -> List[MemeTemplatePotential]: 23 | return self.db_session.query(MemeTemplatePotential).filter(MemeTemplatePotential.vote_total >= total).all() 24 | 25 | def get_with_less_votes_than(self, total: int) -> List[MemeTemplatePotential]: 26 | return self.db_session.query(MemeTemplatePotential).filter(MemeTemplatePotential.vote_total <= total).all() 27 | 28 | def remove(self, item: MemeTemplatePotential): 29 | self.db_session.delete(item) 30 | 31 | def update(self, item: MemeTemplatePotential): 32 | self.db_session.merge(item) -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/7332736c6ef4_create_author_index.py: -------------------------------------------------------------------------------- 1 | """create author index 2 | 3 | Revision ID: 7332736c6ef4 4 | Revises: 700576ba9d88 5 | Create Date: 2020-10-12 19:20:36.418076 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | from sqlalchemy.dialects import mysql 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '7332736c6ef4' 14 | down_revision = '700576ba9d88' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('bot_stat', sa.Column('comments_left', sa.Integer(), nullable=True)) 22 | op.drop_column('bot_stat', 'comments') 23 | op.create_index('idx_author', 'image_reposts', ['author'], unique=False) 24 | op.create_index('idx_author', 'link_reposts', ['author'], unique=False) 25 | op.create_index('idx_source', 'reddit_image_search', ['source'], unique=False) 26 | op.drop_index('idx _source', table_name='reddit_image_search') 27 | # ### end Alembic commands ### 28 | 29 | 30 | def downgrade(): 31 | # ### commands auto generated by Alembic - please adjust! ### 32 | op.create_index('idx _source', 'reddit_image_search', ['source'], unique=False) 33 | op.drop_index('idx_source', table_name='reddit_image_search') 34 | op.drop_index('idx_author', table_name='link_reposts') 35 | op.drop_index('idx_author', table_name='image_reposts') 36 | op.add_column('bot_stat', sa.Column('comments', mysql.INTEGER(), autoincrement=False, nullable=True)) 37 | op.drop_column('bot_stat', 'comments_left') 38 | # ### end Alembic commands ### 39 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/bot_private_message_repo.py: -------------------------------------------------------------------------------- 1 | from typing import Text, List 2 | 3 | from redditrepostsleuth.core.db.databasemodels import BotPrivateMessage 4 | 5 | 6 | class BotPrivateMessageRepo: 7 | def __init__(self, db_session): 8 | self.db_session = db_session 9 | 10 | def get_all(self, limit: int = None) -> List[BotPrivateMessage]: 11 | return self.db_session.query(BotPrivateMessage).limit(limit).all() 12 | 13 | def get_by_post_id(self, post_id: Text) -> List[BotPrivateMessage]: 14 | return self.db_session.query(BotPrivateMessage).filter(BotPrivateMessage.in_response_to_post == post_id).all() 15 | 16 | def get_by_comment_id(self, post_id: Text) -> List[BotPrivateMessage]: 17 | return self.db_session.query(BotPrivateMessage).filter(BotPrivateMessage.in_response_to_comment == post_id).all() 18 | 19 | def get_first_by_recipient(self, recipient: Text) -> BotPrivateMessage: 20 | return self.db_session.query(BotPrivateMessage).filter(BotPrivateMessage.recipient == recipient).order_by(BotPrivateMessage.id.desc()).first() 21 | 22 | def get_by_user_source_and_post(self, user: Text, source: Text, post: Text) -> List[BotPrivateMessage]: 23 | return self.db_session.query(BotPrivateMessage).filter( 24 | BotPrivateMessage.in_response_to_post == post, 25 | BotPrivateMessage.triggered_from == source, 26 | BotPrivateMessage.recipient == user 27 | ).all() 28 | 29 | def add(self, item: BotPrivateMessage): 30 | self.db_session.add(item) 31 | 32 | def remove(self, item: BotPrivateMessage): 33 | self.db_session.delete(item) -------------------------------------------------------------------------------- /redditrepostsleuth/core/util/audiohelpers.py: -------------------------------------------------------------------------------- 1 | import audioop 2 | 3 | import numpy as np 4 | import wavio 5 | from pydub import AudioSegment 6 | 7 | 8 | def read_audio_file(filename, limit=None): 9 | """ 10 | Source: https://github.com/worldveil/dejavu/blob/7f53f2ab6896b38cfd54cc396e2326a98b957d07/dejavu/decoder.py#L37 11 | Reads any file supported by pydub (ffmpeg) and returns the data contained 12 | within. If file reading fails due to input being a 24-bit wav file, 13 | wavio is used as a backup. 14 | 15 | Can be optionally limited to a certain amount of seconds from the start 16 | of the file by specifying the `limit` parameter. This is the amount of 17 | seconds from the start of the file. 18 | 19 | returns: (channels, samplerate) 20 | """ 21 | # pydub does not support 24-bit wav files, use wavio when this occurs 22 | try: 23 | audiofile = AudioSegment.from_file(filename) 24 | 25 | if limit: 26 | audiofile = audiofile[:limit * 1000] 27 | 28 | data = np.fromstring(audiofile._data, np.int16) 29 | 30 | channels = [] 31 | for chn in range(audiofile.channels): 32 | channels.append(data[chn::audiofile.channels]) 33 | 34 | fs = audiofile.frame_rate 35 | except audioop.error: 36 | fs, _, audiofile = wavio.readwav(filename) 37 | 38 | if limit: 39 | audiofile = audiofile[:limit * 1000] 40 | 41 | audiofile = audiofile.T 42 | audiofile = audiofile.astype(np.int16) 43 | 44 | channels = [] 45 | for chn in audiofile: 46 | channels.append(chn) 47 | 48 | return channels, audiofile.frame_rate -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/events/celerytask.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.model.events.influxevent import InfluxEvent 2 | 3 | 4 | class CeleryTaskEvent(InfluxEvent): 5 | def __init__(self, task, event_type=None): 6 | super().__init__(event_type=event_type) 7 | self.task_state = task['state'] 8 | self.task_uuid = task['uuid'] 9 | self.task_name = task['name'].split('.')[-1] if task['name'] else None 10 | 11 | def get_influx_event(self): 12 | event = super().get_influx_event() 13 | event[0]['tags']['task_state'] = self.task_state 14 | event[0]['fields']['uuid'] = self.task_uuid 15 | event[0]['tags']['name'] = self.task_name 16 | return event 17 | 18 | class CeleryQueueSize(InfluxEvent): 19 | def __init__(self, queue_name, size, event_type=None, env: str = None): 20 | super().__init__(event_type=event_type, env=env) 21 | self.queue_name = queue_name 22 | self.size = size 23 | 24 | def get_influx_event(self): 25 | event = super().get_influx_event() 26 | event[0]['tags']['queue_name'] = self.queue_name 27 | event[0]['fields']['size'] = self.size 28 | #log.debug('Writting influx log: %s', event) 29 | return event 30 | 31 | class BatchedEvent(InfluxEvent): 32 | def __init__(self, count, event_type=None, status=None, post_type=None): 33 | super().__init__(event_type=event_type, status=status) 34 | self.count = count 35 | self.post_type = post_type 36 | 37 | def get_influx_event(self): 38 | event = super().get_influx_event() 39 | event[0]['fields']['count'] = self.count 40 | event[0]['tags']['post_type'] = self.post_type 41 | return event -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/search/search_results.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Text, List, Optional 3 | 4 | from redditrepostsleuth.core.db.databasemodels import Post, RepostSearch 5 | from redditrepostsleuth.core.model.search.search_match import SearchMatch 6 | from redditrepostsleuth.core.model.search_settings import SearchSettings 7 | from redditrepostsleuth.core.model.search_times import SearchTimes 8 | 9 | 10 | class SearchResults: 11 | def __init__( 12 | self, 13 | checked_url: str, 14 | search_settings: SearchSettings, 15 | checked_post: Post = None, 16 | search_times: SearchTimes = None 17 | ): 18 | self.checked_post = checked_post 19 | self.search_settings = search_settings 20 | self.checked_url = checked_url 21 | self.total_searched: int = 0 22 | self.matches: List[SearchMatch] = [] 23 | self.search_times: SearchTimes = search_times or SearchTimes() 24 | self.logged_search: Optional[RepostSearch] = None 25 | 26 | @property 27 | def report_data(self) -> Optional[Text]: 28 | """ 29 | Return a JSON dump to use in the report message for this search 30 | :return: dumped JSON 31 | """ 32 | if not self.checked_post: 33 | return None 34 | return json.dumps({'post_id': self.checked_post.post_id}) 35 | 36 | def to_dict(self): 37 | return { 38 | 'checked_url': self.checked_url, 39 | 'checked_post': self.checked_post.to_dict() if self.checked_post else None, 40 | 'search_settings': self.search_settings.to_dict(), 41 | 'search_times': self.search_times.to_dict(), 42 | 'matches': [match.to_dict() for match in self.matches] 43 | } -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/126af4529c2d_meme_voting.py: -------------------------------------------------------------------------------- 1 | """meme voting 2 | 3 | Revision ID: 126af4529c2d 4 | Revises: 8b4d674b700b 5 | Create Date: 2020-11-10 19:49:27.290130 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '126af4529c2d' 14 | down_revision = '8b4d674b700b' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('meme_template_potential', 22 | sa.Column('id', sa.Integer(), nullable=False), 23 | sa.Column('post_id', sa.String(length=100), nullable=False), 24 | sa.Column('submitted_by', sa.String(length=100), nullable=False), 25 | sa.Column('created_at', sa.DateTime(), nullable=False), 26 | sa.Column('vote_total', sa.Integer(), nullable=False), 27 | sa.PrimaryKeyConstraint('id'), 28 | sa.UniqueConstraint('post_id') 29 | ) 30 | op.create_table('meme_template_potential_votes', 31 | sa.Column('id', sa.Integer(), nullable=False), 32 | sa.Column('post_id', sa.String(length=100), nullable=False), 33 | sa.Column('user', sa.String(length=100), nullable=False), 34 | sa.Column('vote', sa.Integer(), nullable=False), 35 | sa.Column('voted_at', sa.DateTime(), nullable=False), 36 | sa.ForeignKeyConstraint(['post_id'], ['meme_template_potential.post_id'], ), 37 | sa.PrimaryKeyConstraint('id'), 38 | sa.UniqueConstraint('post_id') 39 | ) 40 | # ### end Alembic commands ### 41 | 42 | 43 | def downgrade(): 44 | # ### commands auto generated by Alembic - please adjust! ### 45 | op.drop_table('meme_template_potential_votes') 46 | op.drop_table('meme_template_potential') 47 | # ### end Alembic commands ### 48 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/notification/notification_service.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List, NoReturn, Text 3 | 4 | from redditrepostsleuth.core.config import Config 5 | from redditrepostsleuth.core.notification.agent_class_maps import AGENT_MAP 6 | from redditrepostsleuth.core.notification.notification_agent import NotificationAgent 7 | 8 | log = logging.getLogger(__name__) 9 | class NotificationService: 10 | def __init__(self, config: Config): 11 | self.config = config 12 | self.notification_agents: List[NotificationAgent] = [] 13 | self._load_config_agents() 14 | 15 | def send_notification(self, msg: Text, **kwargs) -> NoReturn: 16 | for agent in self.notification_agents: 17 | log.info('Sending notification to %s', agent.name) 18 | log.debug(msg) 19 | try: 20 | agent.send(msg, **kwargs) 21 | except Exception as e: 22 | log.exception('Failed to send notification', exc_info=True) 23 | 24 | def _load_config_agents(self): 25 | if 'notification_agents' not in self.config.CONFIG: 26 | log.error('No agents to create in config') 27 | return 28 | 29 | for agent_config in self.config.CONFIG['notification_agents']: 30 | agent_name = agent_config['name'].lower() 31 | if not agent_name in AGENT_MAP: 32 | log.error('Unabled to locate agent %s in class map', agent_config['name']) 33 | agent = AGENT_MAP[agent_name](**agent_config) 34 | log.info('Created %s agent', agent.name) 35 | self.notification_agents.append(agent) 36 | 37 | def register_agent(self, agent: NotificationAgent) -> NoReturn: 38 | log.info('Registered notification agent %s', agent.name) 39 | self.notification_agents.append(agent) -------------------------------------------------------------------------------- /wiki/add-your-sub.md: -------------------------------------------------------------------------------- 1 | ### Repost Sleuth Can Check All Your Posts! 2 | --- 3 | You can register your Subreddit to have Repost Sleuth check all new submissions and/or control how the bot handles summons! 4 | 5 | ### How To Register 6 | --- 7 | Make u/RepostSleuthBot a mod with **Wiki and Post** permissions. 8 | 9 | Within 20 minutes the bot will see the moderator invite, accept it and create a wiki page with it's configuration. 10 | 11 | Once registration is complete, RepostSleuth will notify you via ModMail. 12 | 13 | You can then [configure the bot as you please](https://www.reddit.com/r/RepostSleuthBot/wiki/add-you-sub/configure-repost-sleuth) 14 | 15 | ### Why Register? 16 | --- 17 | Registering your sub gives you direct control over a number of the bots features. You gain the ability to control all these features easily via a config file in your Subreddit's wiki 18 | 19 | ### Moderator Features 20 | --- 21 | Repost Sleuth can perform a number of Moderation actions to take the load off your Subreddit moderators. 22 | 23 | * Report Reposts 24 | * Lock Reposts 25 | * Remove Reposts 26 | * Mark a post as OC 27 | * Blocking people from mentioning the bot 28 | * Limit mentions to a single time 29 | 30 | # All Features 31 | --- 32 | * Pick if the bot searches all of Reddit or just your Subreddit 33 | * Pick custom matching threshold for what is considered a repost 34 | * Limit how many days back the bot will search for reposts 35 | * Comment on reposts and sticky the comment 36 | * Enable the meme filter for better meme matching 37 | * Mark a post as OC 38 | * Lock reposts 39 | * Remove Reposts 40 | * Report Reposts with custom report message 41 | * Create custom repost comment templates 42 | * Limit user tags to 1 per submission 43 | * Disable user tags of the bot 44 | * Check title similarity 45 | * Check all new submissions 46 | * Limit checks by post type -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/user_report_repo.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta, datetime 2 | from typing import Text, List 3 | 4 | from redditrepostsleuth.core.db.databasemodels import UserReport 5 | from redditrepostsleuth.core.logging import log 6 | 7 | 8 | class UserReportRepo: 9 | def __init__(self, db_session): 10 | self.db_session = db_session 11 | 12 | def add(self, item): 13 | log.debug('Inserting: %s', item) 14 | self.db_session.add(item) 15 | 16 | def get_reports_for_voting(self, days: int) -> List[UserReport]: 17 | since = datetime.now() - timedelta(days=days) 18 | return self.db_session.query(UserReport).filter(UserReport.reported_at > since, 19 | UserReport.report_type == 'False Positive', 20 | UserReport.sent_for_voting == False).order_by( 21 | UserReport.reported_at.desc()).all() 22 | 23 | def get_all(self, limit: int = None, offset: int = None) -> List[UserReport]: 24 | return self.db_session.query(UserReport).offset(offset).limit(limit).all() 25 | 26 | def get_by_id(self, id: Text) -> UserReport: 27 | return self.db_session.query(UserReport).filter(UserReport.id == id).first() 28 | 29 | def get_by_post_id(self, post_id: Text) -> List[UserReport]: 30 | return self.db_session.query(UserReport).filter(UserReport.post_id == post_id).all() 31 | 32 | def get_first_by_message_id(self, id: Text) -> UserReport: 33 | return self.db_session.query(UserReport).filter(UserReport.message_id == id).first() 34 | 35 | def remove(self, item: UserReport): 36 | self.db_session.delete(item) 37 | 38 | def remove_by_post_id(self, post_id: str) -> None: 39 | self.db_session.query(UserReport).filter(UserReport.post_id == post_id).delete() -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/search/image_search_match.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | from redditrepostsleuth.core.db.databasemodels import Post 4 | from redditrepostsleuth.core.model.search.search_match import SearchMatch 5 | 6 | 7 | class ImageSearchMatch(SearchMatch): 8 | 9 | def __init__( 10 | self, 11 | searched_url: Text, 12 | match_id: int, 13 | post: Post, 14 | hamming_distance: int, 15 | annoy_distance: float, 16 | hash_size: int, 17 | title_similarity: int = 0, 18 | ): 19 | """ 20 | :param searched_url: URL of the searched image 21 | :param post: Post obj of this match 22 | :param title_similarity: % similarity of title 23 | :param hamming_distance: Hamming distance between match and searched image 24 | :param annoy_distance: Annoy distance between match and searched image 25 | :param hash_size: Hash size used in search 26 | """ 27 | super().__init__(searched_url, post, title_similarity) 28 | # TODO - Don't need to set attrbs used in super 29 | self.hash_size = hash_size 30 | self.annoy_distance = annoy_distance 31 | self.hamming_distance = hamming_distance 32 | self.title_similarity = title_similarity 33 | self.post = post 34 | self.match_id = match_id 35 | self.searched_url = searched_url 36 | 37 | @property 38 | def hamming_match_percent(self): 39 | return round(100 - (self.hamming_distance / self.hash_size) * 100, 2) 40 | 41 | def to_dict(self): 42 | return {**{ 43 | 'hamming_distance': self.hamming_distance, 44 | 'annoy_distance': self.annoy_distance, 45 | 'hamming_match_percent': self.hamming_match_percent, 46 | 'hash_size': self.hash_size, 47 | }, **super().to_dict()} -------------------------------------------------------------------------------- /tests/core/test_config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from unittest import TestCase 4 | 5 | from redditrepostsleuth.core.config import Config 6 | 7 | 8 | class TestConfig(TestCase): 9 | 10 | main_file = os.path.join(os.getcwd(), 'sleuth_config.json') 11 | alt_file = os.path.join(os.getcwd(), 'sleuth_config_alt.json') 12 | 13 | @classmethod 14 | def setUpClass(cls) -> None: 15 | with open(cls.main_file, 'w') as f: 16 | f.write(json.dumps({'log_level': 'debug'})) 17 | with open(cls.alt_file, 'w') as f: 18 | f.write(json.dumps({'log_level': 'debug'})) 19 | 20 | @classmethod 21 | def tearDownClass(cls) -> None: 22 | if os.path.isfile(cls.main_file): 23 | os.remove('sleuth_config.json') 24 | if os.path.isfile(cls.alt_file): 25 | os.remove('sleuth_config_alt.json') 26 | 27 | def test__load_config_file_load_order_prefer_passed_file(self): 28 | config_file = os.path.join(os.getcwd(), 'sleuth_config.json') 29 | Config.CONFIG = {} 30 | config = Config(config_file) 31 | self.assertEqual(config_file, config.CONFIG_FILE) 32 | 33 | def test__load_config_file_load_order_prefer_env_over_local(self): 34 | config_file = os.path.join(os.getcwd(), 'sleuth_config_alt.json') 35 | os.environ['bot_config'] = config_file 36 | Config.CONFIG = {} 37 | config = Config() 38 | self.assertEqual(config_file, config.CONFIG_FILE) 39 | 40 | def test__init_passed_override(self): 41 | config_file = os.path.join(os.getcwd(), 'sleuth_config_alt.json') 42 | os.environ['bot_config'] = config_file 43 | config = Config(log_level='info') 44 | self.assertEqual(config.log_level, 'info') 45 | 46 | def test__env_override(self): 47 | os.environ['log_level'] = 'info' 48 | config = Config() 49 | self.assertEqual(config.log_level, 'info') -------------------------------------------------------------------------------- /redditrepostsleuth/repostsleuthsiteapi/endpoints/posts.py: -------------------------------------------------------------------------------- 1 | import json 2 | from datetime import datetime 3 | from typing import Text 4 | 5 | from falcon import Request, Response, HTTPNotFound 6 | from praw import Reddit 7 | 8 | from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager 9 | from redditrepostsleuth.core.services.reddit_manager import RedditManager 10 | 11 | 12 | class PostsEndpoint: 13 | def __init__(self, uowm: UnitOfWorkManager, reddit: RedditManager): 14 | self.uowm = uowm 15 | self.reddit = reddit 16 | 17 | def on_get(self, req: Request, resp: Response): 18 | with self.uowm.start() as uow: 19 | post = uow.posts.get_by_post_id(req.get_param('post_id', required=True)) 20 | if not post: 21 | raise HTTPNotFound(title='Post not found', description=f'This post was not found in the Repost Sleuth Database') 22 | resp.body = json.dumps(post.to_dict()) 23 | 24 | def on_get_all(self, req: Request, resp: Response): 25 | result = [] 26 | with self.uowm.start() as uow: 27 | image_posts = uow.posts.get_newest_by_type( 28 | 2, 29 | req.get_param_as_int('limit', default=20, required=False), 30 | req.get_param_as_int('offset', default=None, required=False) 31 | ) 32 | 33 | resp.body = json.dumps([p.to_dict() for p in image_posts]) 34 | 35 | def on_get_reddit(self, req: Request, resp: Response): 36 | post = self.reddit.submission(req.get_param('post_id', required=True)) 37 | if not post: 38 | raise HTTPNotFound() 39 | 40 | resp.body = json.dumps({ 41 | 'post_id': post.id, 42 | 'author': post.author.name, 43 | 'title': post.title, 44 | 'url': post.url, 45 | 'created_at': post.created_utc, 46 | 'score': post.score 47 | }) -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/botcommentrepo.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | from typing import List, Text, Optional 3 | 4 | from sqlalchemy import func 5 | 6 | from redditrepostsleuth.core.db.databasemodels import BotComment 7 | 8 | 9 | class BotCommentRepo: 10 | def __init__(self, db_session): 11 | self.db_session = db_session 12 | 13 | def get_all(self, limit: int = None) -> List[BotComment]: 14 | return self.db_session.query(BotComment).limit(limit).all() 15 | 16 | def get_by_post_id(self, post_id) -> List[BotComment]: 17 | return self.db_session.query(BotComment).filter(BotComment.post_id == post_id).all() 18 | 19 | def get_by_post_id_and_type(self, post_id: Text, response_type: Text) -> BotComment: 20 | return self.db_session.query(BotComment).filter(BotComment.reddit_post_id == post_id, BotComment.source == response_type).first() 21 | 22 | def get_after_date(self, date: datetime) -> List[BotComment]: 23 | return self.db_session.query(BotComment).filter(BotComment.comment_left_at > date, BotComment.active == True).order_by(BotComment.id).all() 24 | 25 | def get_by_comment_id(self, comment_id: str) -> BotComment: 26 | return self.db_session.query(BotComment).filter(BotComment.comment_id == comment_id).first() 27 | def add(self, item: BotComment): 28 | self.db_session.add(item) 29 | 30 | def remove(self, item: BotComment): 31 | self.db_session.delete(item) 32 | 33 | def remove_by_post_id(self, post_id: str) -> None: 34 | self.db_session.query(BotComment).filter(BotComment.post_id == post_id).delete() 35 | 36 | def get_count(self, hours: int = None) -> Optional[int]: 37 | query = self.db_session.query(func.count(BotComment.id)) 38 | if hours: 39 | query = query.filter(BotComment.comment_left_at > (datetime.now() - timedelta(hours=hours))) 40 | r = query.first() 41 | return r[0] if r else None -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/repost_watch_repo.py: -------------------------------------------------------------------------------- 1 | from typing import Text, Optional, List 2 | 3 | from redditrepostsleuth.core.logging import log 4 | from redditrepostsleuth.core.db.databasemodels import RepostWatch 5 | 6 | 7 | class RepostWatchRepo: 8 | def __init__(self, db_session): 9 | self.db_session = db_session 10 | 11 | def add(self, item): 12 | log.debug('Inserting: %s', item) 13 | self.db_session.add(item) 14 | 15 | def get_all(self, limit: int = None, offset: int = None) -> Optional[List[RepostWatch]]: 16 | return self.db_session.query(RepostWatch).limit(limit).offset(offset).all() 17 | 18 | def get_all_by_user(self, user: Text, limit: int = None, offset: int = None) -> Optional[List[RepostWatch]]: 19 | return self.db_session.query(RepostWatch).filter(RepostWatch.user == user).limit(limit).offset(offset).all() 20 | 21 | def get_by_id(self, id: Text) -> RepostWatch: 22 | return self.db_session.query(RepostWatch).filter(RepostWatch.id == id).first() 23 | 24 | def get_all_by_post_id(self, id: str) -> RepostWatch: 25 | return self.db_session.query(RepostWatch).filter(RepostWatch.post_id == id).all() 26 | 27 | def get_all_active_by_post_id(self, id: int) -> list[RepostWatch]: 28 | return self.db_session.query(RepostWatch).filter(RepostWatch.post_id == id, RepostWatch.enabled == True).all() 29 | 30 | def find_existing_watch(self, user: Text, post_id: Text): 31 | return self.db_session.query(RepostWatch).filter(RepostWatch.user == user, RepostWatch.post_id == post_id).first() 32 | 33 | def remove(self, item: RepostWatch): 34 | log.debug('Deleting post %s', item.id) 35 | self.db_session.delete(item) 36 | 37 | def remove_by_post_id(self, post_id: str) -> None: 38 | self.db_session.query(RepostWatch).filter(RepostWatch.post_id == post_id).delete() 39 | 40 | def update(self, item: RepostWatch): 41 | self.db_session.merge(item) -------------------------------------------------------------------------------- /redditrepostsleuth/hotpostsvc/hotpostsvc.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from prawcore import ResponseException 4 | 5 | from redditrepostsleuth.core.config import Config 6 | from redditrepostsleuth.core.db.db_utils import get_db_engine 7 | from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager 8 | from redditrepostsleuth.core.logging import log 9 | from redditrepostsleuth.core.services.duplicateimageservice import DuplicateImageService 10 | from redditrepostsleuth.core.services.eventlogging import EventLogging 11 | from redditrepostsleuth.core.services.reddit_manager import RedditManager 12 | from redditrepostsleuth.core.services.response_handler import ResponseHandler 13 | from redditrepostsleuth.core.services.responsebuilder import ResponseBuilder 14 | from redditrepostsleuth.core.util.helpers import get_reddit_instance 15 | from redditrepostsleuth.hotpostsvc.hot_post_monitor import TopPostMonitor 16 | 17 | if __name__ == '__main__': 18 | while True: 19 | config = Config() 20 | uowm = UnitOfWorkManager(get_db_engine(config)) 21 | event_logger = EventLogging(config=config) 22 | reddit = get_reddit_instance(config) 23 | reddit_manager = RedditManager(reddit) 24 | dup = DuplicateImageService(uowm, event_logger, reddit, config=config) 25 | response_builder = ResponseBuilder(uowm) 26 | 27 | top = TopPostMonitor( 28 | reddit_manager, 29 | uowm, 30 | dup, 31 | response_builder, 32 | ResponseHandler(reddit_manager, uowm, event_logger, source='toppost', live_response=config.live_responses), 33 | config=config 34 | ) 35 | try: 36 | top.monitor() 37 | except ResponseException as e: 38 | if e.response.status_code == 429: 39 | log.error('IP Rate limit hit. Waiting') 40 | time.sleep(60) 41 | continue 42 | except Exception as e: 43 | log.exception('Service crashed', exc_info=True) -------------------------------------------------------------------------------- /redditrepostsleuth/core/celery/task_logic/repost_image.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List 3 | 4 | from redditrepostsleuth.core.db.databasemodels import Post, RepostWatch 5 | from redditrepostsleuth.core.db.uow.unitofwork import UnitOfWork 6 | from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager 7 | from redditrepostsleuth.core.model.search.image_search_match import ImageSearchMatch 8 | from redditrepostsleuth.core.model.search.search_match import SearchMatch 9 | from redditrepostsleuth.core.services.reddit_manager import RedditManager 10 | from redditrepostsleuth.core.services.response_handler import ResponseHandler 11 | from redditrepostsleuth.core.util.replytemplates import WATCH_NOTIFY_OF_MATCH 12 | 13 | log = logging.getLogger(__name__) 14 | 15 | 16 | def check_for_post_watch(matches: list[ImageSearchMatch], uow: UnitOfWork) -> list[dict]: 17 | results = [] 18 | for match in matches: 19 | watches = uow.repostwatch.get_all_active_by_post_id(match.match_id) 20 | if watches: 21 | log.info('Found %s active watch requests for post %s', len(watches), match.post.post_id) 22 | for watch in watches: 23 | results.append({'match': match, 'watch': watch}) 24 | return results 25 | 26 | 27 | def repost_watch_notify(watches: List[dict[SearchMatch, RepostWatch]], reddit: RedditManager, response_handler: ResponseHandler, repost: Post): 28 | for watch in watches: 29 | # TODO - What happens if we don't get redditor back? 30 | redditor = reddit.redditor(watch['watch'].user) 31 | msg = WATCH_NOTIFY_OF_MATCH.format( 32 | watch_shortlink=f"https://redd.it/{watch['watch'].post_id}", 33 | repost_shortlink=f"https://redd.it/{repost.post_id}", 34 | percent_match=watch['match'].hamming_match_percent 35 | ) 36 | log.info('Sending repost watch PM to %s', redditor.name) 37 | response_handler.send_private_message( 38 | redditor, 39 | msg, 40 | 'A post you are watching has been reposted', 41 | 'watch', 42 | ) -------------------------------------------------------------------------------- /redditrepostsleuth/core/util/default_bot_config.py: -------------------------------------------------------------------------------- 1 | DEFAULT_CONFIG_VALUES = { 2 | "active": True, 3 | "report_msg": "RepostSleuthBot-Repost", 4 | "same_sub_only": False, 5 | "sticky_comment": False, 6 | "target_days_old": None, 7 | "meme_filter": False, 8 | "oc_response_template": None, 9 | "repost_response_template": None, 10 | "lock_post": False, 11 | "mark_as_oc": False, 12 | "remove_repost": False, 13 | "removal_reason": None, 14 | "title_ignore_keywords": None, 15 | "disable_summons_after_auto_response": False, 16 | "only_allow_one_summons": False, 17 | "remove_additional_summons": False, 18 | "check_all_submissions": True, 19 | "check_title_similarity": False, 20 | "target_title_match": 50, 21 | "filter_crossposts": True, 22 | "filter_same_author": True, 23 | "check_image_posts": True, 24 | "check_link_posts": True, 25 | "target_image_match": 92, 26 | "target_image_meme_match": 97, 27 | "report_reposts": False, 28 | "comment_on_repost": True, 29 | "comment_on_oc": False, 30 | "lock_response_comment": False, 31 | "filter_removed_matches": False, 32 | "send_repost_modmail": False, 33 | "adult_promoter_remove_post": False, 34 | "adult_promoter_ban_user": False, 35 | "adult_promoter_notify_mod_mail": False, 36 | "adult_promoter_ban_reason": None, 37 | "adult_promoter_removal_reason": None, 38 | "high_volume_reposter_ban_user": False, 39 | "high_volume_reposter_remove_post": False, 40 | "high_volume_reposter_threshold": 150, 41 | "high_volume_reposter_notify_mod_mail": False, 42 | "high_volume_reposter_removal_reason": None, 43 | "high_volume_reposter_ban_reason": None 44 | 45 | } 46 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/image_search_times.py: -------------------------------------------------------------------------------- 1 | from redditrepostsleuth.core.model.search_times import SearchTimes 2 | 3 | 4 | class ImageSearchTimes(SearchTimes): 5 | """ 6 | Class to dynamically start and stop perf_counts with variable names 7 | """ 8 | def __init__(self): 9 | super().__init__() 10 | self.pre_annoy_filter_time: float = float(0) 11 | self.index_search_time: float = float(0) 12 | self.meme_filter_time: float = float(0) 13 | self.meme_detection_time: float = float(0) 14 | self.set_match_post_time: float = float(0) 15 | self.remove_duplicate_time: float = float(0) 16 | self.set_match_hamming: float = float(0) 17 | self.image_search_api_time: float = float(0) 18 | self.filter_removed_posts_time: float = float(0) 19 | self.filter_deleted_posts_time: float = float(0) 20 | self.set_meme_hash_time: float = float(0) 21 | self.set_closest_meme_hash_time: float = float(0) 22 | self.distance_filter_time: float = float(0) 23 | self.get_closest_match_time: float = float(0) 24 | 25 | def to_dict(self): 26 | return {**{ 27 | 'pre_annoy_filter_time': self.pre_annoy_filter_time, 28 | 'index_search_time': self.index_search_time, 29 | 'meme_filter_time': self.meme_filter_time, 30 | 'meme_detection_time': self.meme_detection_time, 31 | 'set_match_post_time': self.set_match_post_time, 32 | 'remove_duplicate_time': self.remove_duplicate_time, 33 | 'set_match_hamming': self.set_match_hamming, 34 | 'image_search_api_time': self.image_search_api_time, 35 | 'filter_removed_posts_time': self.filter_removed_posts_time, 36 | 'filter_deleted_posts_time': self.filter_deleted_posts_time, 37 | 'image_search_api_time': self.image_search_api_time, 38 | 'set_meme_hash_time': self.set_meme_hash_time, 39 | 'set_closest_meme_hash_time': self.set_closest_meme_hash_time, 40 | 'distance_filter_time': self.distance_filter_time, 41 | 'get_closest_match_time': self.get_closest_match_time 42 | 43 | }, **super().to_dict()} 44 | 45 | -------------------------------------------------------------------------------- /redditrepostsleuth/repostsleuthsiteapi/util/helpers.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | from falcon import HTTPBadRequest, Request, HTTPServiceUnavailable 4 | 5 | from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager 6 | from redditrepostsleuth.core.exception import NoIndexException, ImageConversionException 7 | from redditrepostsleuth.core.logging import log 8 | from redditrepostsleuth.core.model.image_search_settings import ImageSearchSettings 9 | from redditrepostsleuth.core.model.search.image_search_results import ImageSearchResults 10 | from redditrepostsleuth.core.services.duplicateimageservice import DuplicateImageService 11 | 12 | 13 | def is_site_admin(user_data: dict, uowm: UnitOfWorkManager) -> bool: 14 | if not user_data: 15 | return False; 16 | if 'name' not in user_data: 17 | return False 18 | if user_data['name'].lower() in ['barrycarey', 'repostsleuthbot']: 19 | return True 20 | return False 21 | 22 | def check_image( 23 | search_settings: ImageSearchSettings, 24 | uowm: UnitOfWorkManager, 25 | image_svc: DuplicateImageService, 26 | post_id: Text = None, 27 | url: Text = None, 28 | ) -> ImageSearchResults: 29 | 30 | if not post_id and not url: 31 | log.error('No post ID or URL provided') 32 | raise HTTPBadRequest("No Post ID or URL", "Please provide a post ID or url to search") 33 | 34 | search_settings.max_matches = 500 35 | 36 | post = None 37 | if post_id: 38 | with uowm.start() as uow: 39 | post = uow.posts.get_by_post_id(post_id) 40 | 41 | try: 42 | return image_svc.check_image( 43 | url, 44 | post=post, 45 | search_settings=search_settings, 46 | source='api' 47 | ) 48 | except NoIndexException: 49 | log.error('No available index for image repost check. Trying again later') 50 | raise HTTPServiceUnavailable('Search API is not available.', 'The search API is not currently available') 51 | except ImageConversionException as e: 52 | log.warning('Problem hashing the provided url: %s', str(e)) 53 | raise HTTPBadRequest('Invalid URL', 'The provided URL is not a valid image') -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/505caf95a77e_iamge_search_update.py: -------------------------------------------------------------------------------- 1 | """iamge search update 2 | 3 | Revision ID: 505caf95a77e 4 | Revises: c8f1e18b7ebc 5 | Create Date: 2021-02-21 11:25:47.481725 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | from sqlalchemy.dialects import mysql 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '505caf95a77e' 14 | down_revision = 'c8f1e18b7ebc' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.drop_index('comment_id', table_name='reddit_comments') 22 | op.drop_index('idx_comment_hash', table_name='reddit_comments') 23 | op.drop_index('idx_comment_id', table_name='reddit_comments') 24 | op.drop_table('reddit_comments') 25 | op.add_column('reddit_image_search', sa.Column('filter_crossposts', sa.Boolean(), nullable=True)) 26 | op.add_column('reddit_image_search', sa.Column('filter_same_author', sa.Boolean(), nullable=True)) 27 | # ### end Alembic commands ### 28 | 29 | 30 | def downgrade(): 31 | # ### commands auto generated by Alembic - please adjust! ### 32 | op.drop_column('reddit_image_search', 'filter_same_author') 33 | op.drop_column('reddit_image_search', 'filter_crossposts') 34 | op.create_table('reddit_comments', 35 | sa.Column('id', mysql.INTEGER(), autoincrement=True, nullable=False), 36 | sa.Column('comment_id', mysql.VARCHAR(length=100), nullable=False), 37 | sa.Column('body', mysql.TEXT(charset='utf8mb4', collation='utf8mb4_general_ci'), nullable=True), 38 | sa.Column('ingested_at', mysql.DATETIME(), nullable=True), 39 | sa.Column('text_hash', mysql.VARCHAR(length=32), nullable=True), 40 | sa.Column('perma_link', mysql.VARCHAR(length=300), nullable=True), 41 | sa.PrimaryKeyConstraint('id'), 42 | mysql_default_charset='utf8', 43 | mysql_engine='InnoDB', 44 | mysql_row_format='COMPRESSED' 45 | ) 46 | op.create_index('idx_comment_id', 'reddit_comments', ['comment_id'], unique=False) 47 | op.create_index('idx_comment_hash', 'reddit_comments', ['text_hash'], unique=False) 48 | op.create_index('comment_id', 'reddit_comments', ['comment_id'], unique=True) 49 | # ### end Alembic commands ### 50 | -------------------------------------------------------------------------------- /utility_scripts/push_shift_backfill.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | from datetime import datetime 4 | from typing import Text 5 | 6 | import requests 7 | 8 | from redditrepostsleuth.core.celery.tasks.ingest_tasks import save_pushshift_results 9 | from redditrepostsleuth.core.config import Config 10 | from redditrepostsleuth.core.db.db_utils import get_db_engine 11 | from redditrepostsleuth.core.db.uow.sqlalchemyunitofworkmanager import SqlAlchemyUnitOfWorkManager 12 | from redditrepostsleuth.core.logging import log 13 | 14 | 15 | config = Config() 16 | uowm = SqlAlchemyUnitOfWorkManager(get_db_engine(config)) 17 | 18 | def fetch_results(url: Text): 19 | try: 20 | r = requests.get(f'{config.util_api}/pushshift', params={'url': url}) 21 | except Exception as e: 22 | log.exception('Exception getting Push Shift result', exc_info=True) 23 | time.sleep(10) 24 | return 25 | 26 | try: 27 | response = json.loads(r.text) 28 | except Exception: 29 | log.exception('Error decoding json') 30 | time.sleep(10) 31 | return 32 | 33 | if response['status'] != 'success': 34 | log.error('Error from API. Status code %s, reason %s', response['status_code'], 35 | response['message']) 36 | return 37 | 38 | return response['payload'] 39 | 40 | def get_from_api(oldest_created = None): 41 | base_url = 'https://api.pushshift.io/reddit/search/submission?size=2000&sort_type=created_utc&sort=desc' 42 | if oldest_created: 43 | url = base_url + '&before=' + str(oldest_created) 44 | else: 45 | url = base_url 46 | return fetch_results(url) 47 | 48 | def parse_and_submit_to_queue(data) -> Text: 49 | log.debug('Oldest: %s | Newest: %s', datetime.utcfromtimestamp(data[-1]['created_utc']), 50 | datetime.utcfromtimestamp(data[0]['created_utc'])) 51 | 52 | try: 53 | save_pushshift_results.apply_async((data,), queue='pushshift') 54 | except Exception as e: 55 | log.exception('Failed to send to celery', exc_info=False) 56 | 57 | 58 | oldest_created = None 59 | while True: 60 | results = get_from_api(oldest_created) 61 | if not results: 62 | continue 63 | oldest_created = results['data'][-1]['created_utc'] 64 | parse_and_submit_to_queue(results['data']) 65 | 66 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | from typing import Text 5 | 6 | from redditrepostsleuth.core.logfilters import SingleLevelFilter 7 | 8 | default_format = '%(asctime)s - %(module)s:%(funcName)s:%(lineno)d - [%(process)d][%(threadName)s] - %(levelname)s: %(message)s' 9 | image_search_trace_format = '%(asctime)s - %(module)s:%(funcName)s:%(lineno)d - [Search ID: %(search_id)d] - %(levelname)s: %(message)s' 10 | 11 | 12 | def get_configured_logger(name: Text = None, format: Text = None) -> logging.Logger: 13 | log = logging.getLogger(name or __name__) 14 | log.setLevel(os.getenv('LOG_LEVEL', 'DEBUG')) 15 | formatter = logging.Formatter(format or default_format) 16 | 17 | general_handler = logging.StreamHandler(sys.stdout) 18 | general_filter = SingleLevelFilter(logging.INFO, False) 19 | general_handler.setFormatter(formatter) 20 | general_handler.addFilter(general_filter) 21 | log.addHandler(general_handler) 22 | 23 | error_handler = logging.StreamHandler(sys.stderr) 24 | error_filter = SingleLevelFilter(logging.WARNING) 25 | error_handler.setFormatter(formatter) 26 | error_handler.addFilter(error_filter) 27 | log.addHandler(error_handler) 28 | log.propagate = False 29 | return log 30 | 31 | 32 | def configure_logger(name: Text = None, format: Text = None, filters: list[logging.Filter] = []) -> logging.Logger: 33 | log = logging.getLogger(name or '') 34 | log.setLevel(os.getenv('LOG_LEVEL', 'DEBUG')) 35 | log.handlers = [] 36 | formatter = logging.Formatter(format or default_format) 37 | general_handler = logging.StreamHandler(sys.stdout) 38 | general_handler.setFormatter(formatter) 39 | general_handler.setLevel(os.getenv('LOG_LEVEL', 'DEBUG')) 40 | error_handler = logging.StreamHandler(sys.stderr) 41 | error_filter = SingleLevelFilter(logging.WARNING) 42 | error_handler.setFormatter(formatter) 43 | for fltr in filters: 44 | general_handler.addFilter(fltr) 45 | error_handler.addFilter((fltr)) 46 | general_handler.addFilter(SingleLevelFilter(logging.INFO, False)) 47 | error_handler.addFilter(SingleLevelFilter(logging.WARNING)) 48 | log.addHandler(general_handler) 49 | log.addHandler(error_handler) 50 | return log 51 | 52 | 53 | log = get_configured_logger(__name__) 54 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/0fac44af5a9c_add_config_tables.py: -------------------------------------------------------------------------------- 1 | """Add config tables 2 | 3 | Revision ID: 0fac44af5a9c 4 | Revises: 18c6ae18a160 5 | Create Date: 2020-10-17 16:31:53.780683 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '0fac44af5a9c' 14 | down_revision = '18c6ae18a160' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('config_message_templates', 22 | sa.Column('id', sa.Integer(), nullable=False), 23 | sa.Column('template_name', sa.String(length=100), nullable=False), 24 | sa.Column('template', sa.String(length=2000), nullable=False), 25 | sa.Column('created_at', sa.DateTime(), nullable=False), 26 | sa.Column('updated_at', sa.DateTime(), nullable=True), 27 | sa.PrimaryKeyConstraint('id') 28 | ) 29 | op.create_table('config_settings', 30 | sa.Column('id', sa.Integer(), nullable=False), 31 | sa.Column('comment_karma_flag_threshold', sa.Integer(), nullable=True), 32 | sa.Column('comment_karma_remove_threshold', sa.Integer(), nullable=True), 33 | sa.Column('index_api', sa.String(length=150), nullable=True), 34 | sa.Column('util_api', sa.String(length=150), nullable=True), 35 | sa.Column('top_post_offer_watch', sa.Boolean(), nullable=True), 36 | sa.Column('repost_watch_enabled', sa.Boolean(), nullable=True), 37 | sa.Column('ingest_repost_check_image', sa.Boolean(), nullable=True), 38 | sa.Column('ingest_repost_check_link', sa.Boolean(), nullable=True), 39 | sa.Column('ingest_repost_check_text', sa.Boolean(), nullable=True), 40 | sa.Column('ingest_repost_check_video', sa.Boolean(), nullable=True), 41 | sa.Column('image_repost_target_image_match', sa.Integer(), nullable=True), 42 | sa.Column('image_repost_target_image_meme_match', sa.Integer(), nullable=True), 43 | sa.Column('image_repost_target_annoy_distance', sa.Float(), nullable=True), 44 | sa.PrimaryKeyConstraint('id') 45 | ) 46 | # ### end Alembic commands ### 47 | 48 | 49 | def downgrade(): 50 | # ### commands auto generated by Alembic - please adjust! ### 51 | op.drop_table('config_settings') 52 | op.drop_table('config_message_templates') 53 | # ### end Alembic commands ### 54 | -------------------------------------------------------------------------------- /redditrepostsleuth/queue_monitor_svc/queue_monitor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import redis 5 | from redis import ResponseError 6 | 7 | from redditrepostsleuth.core.config import Config 8 | from redditrepostsleuth.core.logging import get_configured_logger 9 | from redditrepostsleuth.core.model.events.celerytask import CeleryQueueSize 10 | from redditrepostsleuth.core.services.eventlogging import EventLogging 11 | 12 | config = Config() 13 | 14 | log = get_configured_logger(__name__) 15 | 16 | 17 | def log_queue_size(event_logger): 18 | skip_keys = ['unacked_index', 'unacked_mutex', 'unacked', 'prof_token'] 19 | while True: 20 | try: 21 | client = redis.Redis(host=config.redis_host, port=config.redis_port, db=config.redis_database, password=config.redis_password) 22 | session_client = redis.Redis(host=config.redis_host, port=config.redis_port, db=2, password=config.redis_password) 23 | for queue in client.scan_iter(): 24 | queue_name = queue.decode('utf-8').replace('_kombu.binding.', '') 25 | if len(queue_name) > 30 or queue_name in skip_keys or 'celery' in queue_name: 26 | continue 27 | try: 28 | queue_length = client.llen(queue_name) 29 | except ResponseError as e: 30 | continue 31 | event_logger.save_event( 32 | CeleryQueueSize(queue_name, queue_length, event_type='queue_update', env=os.getenv('RUN_ENV', 'dev'))) 33 | 34 | session_event = { 35 | 'measurement': 'Session_Count', 36 | # 'time': datetime.utcnow().timestamp(), 37 | 'fields': { 38 | 'count': session_client.dbsize() 39 | }, 40 | } 41 | event_logger.write_raw_points([session_event]) 42 | time.sleep(2) 43 | except ConnectionError as e: 44 | log.error('Failed to connect to Redis') 45 | time.sleep(30) 46 | # log.error('Queue update task failed. Key %s', queue_name) 47 | 48 | 49 | if __name__ == '__main__': 50 | log.info('Starting Monitor Service') 51 | try: 52 | log_queue_size(EventLogging()) 53 | except redis.exceptions.ConnectionError as e: 54 | log.error('Failed to connect to Redis') 55 | time.sleep(5) -------------------------------------------------------------------------------- /docker-compose-infra.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | grafana: 4 | image: grafana/grafana:latest 5 | container_name: grafana 6 | restart: unless-stopped 7 | ports: 8 | - "3000:3000" 9 | volumes: 10 | - /config/grafana:/var/lib/grafana 11 | user: '1001' 12 | 13 | loki: 14 | image: grafana/loki:latest 15 | container_name: loki 16 | restart: unless-stopped 17 | ports: 18 | - "3100:3100" 19 | user: '1001' 20 | command: -config.file=/etc/loki/loki-config.yaml 21 | volumes: 22 | - /config/loki/loki-config.yaml:/etc/loki/loki-config.yaml 23 | - /config/loki/data:/loki 24 | - /config/loki/wal:/wal 25 | 26 | redis: 27 | image: redis:7.0.11 28 | container_name: redis 29 | user: '1001' 30 | ports: 31 | - '6379:6379' 32 | volumes: 33 | - /config/redis:/data 34 | command: redis-server --save 550 1 --maxmemory 40gb 35 | 36 | influxdb: 37 | image: influxdb:latest 38 | container_name: influxdb 39 | restart: unless-stopped 40 | user: '1001' 41 | ports: 42 | - '8086:8086' 43 | volumes: 44 | - /config/influxdb/data:/var/lib/influxdb2 45 | - /config/influxdb/config.yml:/etc/influxdb2/config.yml 46 | 47 | telegraf: 48 | image: telegraf-collectd:1.1 49 | container_name: telegraf 50 | user: '1001' 51 | environment: 52 | - HOST_ETC=/hostfs/etc 53 | - HOST_PROC=/hostfs/proc 54 | - HOST_SYS=/hostfs/sys 55 | - HOST_VAR=/hostfs/var 56 | - HOST_RUN=/hostfs/run 57 | - HOST_MOUNT_PREFIX=/hostfs 58 | ports: 59 | - '8125:8125' 60 | - '8092:8092' 61 | - '8094:8094' 62 | - '25826:25826/udp' 63 | - '2003:2003' 64 | volumes: 65 | - /config/telegraf/telegraf.conf:/etc/telegraf/telegraf.conf 66 | - /config/telegraf/types.db:/data/types.db 67 | - /var/run/docker.sock:/var/run/docker.sock 68 | - /:/hostfs:ro 69 | 70 | promtail: 71 | image: grafana/promtail:latest 72 | container_name: promtail 73 | restart: unless-stopped 74 | user: '1001' 75 | volumes: 76 | - /config/promtail/promtail.yaml:/etc/promtail/promtail.yaml 77 | - /config/swag/log/nginx:/logs 78 | command: -config.file=/etc/promtail/promtail.yaml 79 | -------------------------------------------------------------------------------- /redditrepostsleuth/post_import/import_posts_new_db.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import os 3 | 4 | import pymysql 5 | import redis 6 | 7 | from redditrepostsleuth.core.celery.tasks.migrate_tasks import import_post 8 | from redditrepostsleuth.core.config import Config 9 | from redditrepostsleuth.core.db.databasemodels import Post 10 | 11 | conn = pymysql.connect(host=os.getenv('DB_HOST'), 12 | user=os.getenv('DB_USER'), 13 | password=os.getenv('DB_PASSWORD'), 14 | db=os.getenv('DB_NAME'), 15 | cursorclass=pymysql.cursors.SSDictCursor) 16 | 17 | def post_from_row(row: dict): 18 | return Post( 19 | post_id=row['post_id'], 20 | url=row['url'], 21 | perma_link=row['perma_link'], 22 | post_type=row['post_type'], 23 | author=row['author'], 24 | selftext=row['selftext'], 25 | created_at=row['created_at'], 26 | ingested_at=row['ingested_at'], 27 | subreddit=row['subreddit'], 28 | title=row['title'], 29 | crosspost_parent=row['crosspost_parent'], 30 | hash_1=row['dhash_h'], 31 | hash_2=row['dhash_v'], 32 | url_hash=row['url_hash'] 33 | ) 34 | 35 | def load_posts(start_date: datetime, end_date: datetime): 36 | with conn.cursor() as cur: 37 | # was at 650gb last schema 38 | #query = f"SELECT * FROM reddit_post WHERE (created_at BETWEEN '{start_date.year}-{start_date.month}-{start_date.day}' AND '{end_date.year}-{end_date.month}-{end_date.day}')" 39 | 40 | # Last real import 1694610999 41 | query = f"SELECT * FROM reddit_post WHERE id > {int(os.getenv('START_ID', 1713041070))}" 42 | cur.execute(query) 43 | batch = [] 44 | count = 0 45 | batch_delay = 0 46 | for row in cur: 47 | batch.append(row) 48 | if len(batch) > 200: 49 | print('sending batch') 50 | print(f'{batch[-1]["id"]} - {batch[-1]["created_at"]}') 51 | import_post.apply_async((batch,), queue='post_import') 52 | batch = [] 53 | 54 | 55 | count += 1 56 | 57 | 58 | 59 | config = Config('/sleuth_config_dev.json') 60 | redis_client = redis.Redis(host=config.redis_host, port=config.redis_port, db=config.redis_database, password=config.redis_password) 61 | load_posts(datetime(2022, 1, 1), datetime.utcnow()) -------------------------------------------------------------------------------- /redditrepostsleuth/core/celery/tasks/maintenance_tasks.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import requests 4 | 5 | from redditrepostsleuth.core.celery import celery 6 | from redditrepostsleuth.core.celery.basetasks import SqlAlchemyTask 7 | from redditrepostsleuth.core.db.databasemodels import Subreddit 8 | from redditrepostsleuth.core.exception import UtilApiException 9 | from redditrepostsleuth.core.logging import configure_logger 10 | 11 | log = configure_logger( 12 | name='redditrepostsleuth', 13 | ) 14 | 15 | 16 | @celery.task(bind=True, base=SqlAlchemyTask, autoretry_for=(UtilApiException,), retry_kwards={'max_retries': 50, 'countdown': 600}) 17 | def update_subreddit_data(self, subreddit_name) -> None: 18 | try: 19 | with self.uowm.start() as uow: 20 | subreddit = uow.subreddit.get_by_name(subreddit_name) 21 | url_to_fetch = f'{self.config.util_api}/reddit/subreddit?name={subreddit.name}' 22 | res = requests.get(url_to_fetch) 23 | if res.status_code != 200: 24 | log.error('Bad status %s from util API when checking subreddit %s', res.status_code, subreddit.name) 25 | raise UtilApiException(f'Bad status {res.status_code} checking {subreddit_name}') 26 | 27 | subreddit_data = res.json()['data'] 28 | subreddit.subscribers = subreddit_data['subscribers'] or 0 29 | subreddit.nsfw = subreddit_data['over18'] or False 30 | subreddit.last_checked = datetime.datetime.now(datetime.UTC) 31 | uow.commit() 32 | log.debug('Update subreddit data for %s. NSFW: %s - Subscribers: %s', subreddit.name, subreddit.nsfw, subreddit.subscribers) 33 | except UtilApiException as e: 34 | raise e 35 | except Exception as e: 36 | log.exception('') 37 | 38 | @celery.task(bind=True, base=SqlAlchemyTask, ignore_reseults=True, serializer='pickle') 39 | def save_subreddit(self, subreddit_name: str): 40 | try: 41 | with self.uowm.start() as uow: 42 | existing = uow.subreddit.get_by_name(subreddit_name) 43 | if existing: 44 | log.debug('Subreddit %s already exists', subreddit_name) 45 | return 46 | subreddit = Subreddit(name=subreddit_name) 47 | uow.subreddit.add(subreddit) 48 | uow.commit() 49 | log.debug('Saved Subreddit %s', subreddit_name) 50 | update_subreddit_data.apply_async((subreddit_name,)) 51 | except Exception as e: 52 | log.exception('') -------------------------------------------------------------------------------- /utility_scripts/push_shift_backfill_beta.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | from datetime import datetime 4 | from typing import Text 5 | 6 | import requests 7 | 8 | from redditrepostsleuth.core.celery.tasks.ingest_tasks import save_pushshift_results 9 | from redditrepostsleuth.core.config import Config 10 | from redditrepostsleuth.core.db.db_utils import get_db_engine 11 | from redditrepostsleuth.core.db.uow.sqlalchemyunitofworkmanager import SqlAlchemyUnitOfWorkManager 12 | from redditrepostsleuth.core.logging import log 13 | 14 | 15 | 16 | oldest_timestamp = None 17 | 18 | config = Config() 19 | uowm = SqlAlchemyUnitOfWorkManager(get_db_engine(config)) 20 | 21 | def fetch_results(url: Text): 22 | try: 23 | r = requests.get(f'{config.util_api}/pushshift', params={'url': url}) 24 | except Exception as e: 25 | log.exception('Exception getting Push Shift result', exc_info=True) 26 | time.sleep(10) 27 | return 28 | 29 | try: 30 | response = json.loads(r.text) 31 | except Exception: 32 | log.exception('Error decoding json') 33 | time.sleep(10) 34 | return 35 | 36 | if response['status'] != 'success': 37 | log.error('Error from API. Status code %s, reason %s', response['status_code'], 38 | response['message']) 39 | return 40 | 41 | return response['payload'] 42 | 43 | def get_from_beta_api(oldest_sid = None): 44 | base_url = 'https://beta.pushshift.io/search/reddit/submissions?size=1000&sort_type=created_utc&sort=desc' 45 | if oldest_sid: 46 | url = base_url + '&max_sid=' + str(oldest_sid) 47 | else: 48 | url = base_url 49 | return fetch_results(url) 50 | 51 | def parse_and_submit_to_queue(data) -> Text: 52 | log.debug('Oldest: %s | Newest: %s', datetime.utcfromtimestamp(data['data'][-1]['created_utc']), 53 | datetime.utcfromtimestamp(data['data'][0]['created_utc'])) 54 | 55 | try: 56 | save_pushshift_results.apply_async((data['data'],), queue='pushshift') 57 | except Exception as e: 58 | log.exception('Failed to send to pushshift', exc_info=False) 59 | 60 | try: 61 | return data['data'][-1]['sid'] 62 | except ValueError: 63 | log.error('Failed to get oldest SID') 64 | 65 | oldtest_sid = None 66 | while True: 67 | results = get_from_beta_api(oldtest_sid) 68 | if not results: 69 | continue 70 | oldtest_sid = results['data'][-1]['sid'] 71 | parse_and_submit_to_queue(results['data']) 72 | 73 | -------------------------------------------------------------------------------- /redditrepostsleuth/repostsleuthsiteapi/endpoints/image_search_history.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from falcon import Request, Response, HTTPBadRequest 4 | 5 | from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager 6 | from redditrepostsleuth.core.logging import log 7 | 8 | 9 | class ImageSearchHistory: 10 | def __init__(self, uowm: UnitOfWorkManager): 11 | self.uowm = uowm 12 | 13 | def on_get_search_history(self, req: Request, resp: Response): 14 | with self.uowm.start() as uow: 15 | post = uow.post.get_by_post_id(req.get_param('post_id')) 16 | if not post: 17 | raise HTTPBadRequest(title='Unable to find post', description=f'Cannot locate post with ID {req.get_param("post_id")}') 18 | results = uow.repost_search.get_by_post_id(post.id) 19 | resp.body = json.dumps([r.to_dict() for r in results]) 20 | 21 | def on_get_monitored_sub_with_history(self, req: Request, resp: Response): 22 | results = [] 23 | limit = req.get_param_as_int('limit', required=False, default=20) 24 | if limit == -1: 25 | limit = 1000 26 | with self.uowm.start() as uow: 27 | checked = uow.repost_search.get_by_subreddit( 28 | req.get_param('subreddit', required=True), 29 | limit=limit, 30 | offset=req.get_param_as_int('offset', required=False, default=None), 31 | only_reposts=req.get_param_as_bool('repost_only', required=False, default=False) 32 | ) 33 | for search in checked: 34 | r = { 35 | 'checked_post': None, 36 | 'search': search.to_dict(), 37 | } 38 | post = uow.posts.get_by_id(search.post_id) 39 | 40 | results.append({ 41 | 'checked_post': post.to_dict(), 42 | 'search': search.to_dict(), 43 | }) 44 | resp.body = json.dumps(results) 45 | 46 | def on_get_monitored_sub_checked(self, req: Request, resp: Response): 47 | with self.uowm.start() as uow: 48 | results = uow.monitored_sub_checked.get_by_subreddit( 49 | req.get_param('subreddit', required=True), 50 | limit=req.get_param_as_int('limit', required=False, default=20), 51 | offset=req.get_param_as_int('offset', required=False, default=None) 52 | ) 53 | resp.body = json.dumps([r.to_dict() for r in results]) -------------------------------------------------------------------------------- /tests/adminsvc/test_subreddit_config_update.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from unittest.mock import MagicMock 3 | 4 | from redditrepostsleuth.core.services.subreddit_config_updater import SubredditConfigUpdater 5 | from redditrepostsleuth.core.config import Config 6 | from redditrepostsleuth.core.db.databasemodels import MonitoredSub 7 | 8 | 9 | class TestSubredditConfigUpdater(TestCase): 10 | 11 | def test__create_wiki_config_from_database_mapped_value(self): 12 | config = Config(sub_monitor_exposed_config_options=['only_comment_on_repost']) 13 | monitored_sub = MonitoredSub(name='test', repost_only=False) 14 | config_updater = self.get_config_updater(config) 15 | r = config_updater._create_wiki_config_from_database(monitored_sub) 16 | self.assertTrue('only_comment_on_repost' in r) 17 | self.assertFalse(r['only_comment_on_repost']) 18 | 19 | def test__create_wiki_config_from_database_unmapped_value(self): 20 | config = Config(sub_monitor_exposed_config_options=['remove_repost']) 21 | monitored_sub = MonitoredSub(name='test', remove_repost=True) 22 | config_updater = self.get_config_updater(config) 23 | r = config_updater._create_wiki_config_from_database(monitored_sub) 24 | self.assertTrue('remove_repost' in r) 25 | self.assertTrue(r['remove_repost']) 26 | 27 | def test__update_monitored_sub_from_wiki_unmapped_value(self): 28 | config = Config(sub_monitor_exposed_config_options=['remove_repost']) 29 | monitored_sub = MonitoredSub(name='test', remove_repost=False) 30 | config_updater = self.get_config_updater(config) 31 | config_updater._update_monitored_sub_from_wiki(monitored_sub, {'remove_repost': True}) 32 | self.assertTrue(monitored_sub.remove_repost) 33 | 34 | def test__get_missing_config_values_one_missing(self): 35 | config = Config(sub_monitor_exposed_config_options=['only_comment_on_repost', 'repost_only']) 36 | monitored_sub = MonitoredSub(name='test', repost_only=False) 37 | config_updater = self.get_config_updater(config) 38 | r = config_updater._get_missing_config_values( {'repost_only': True}) 39 | self.assertTrue(len(r) == 1) 40 | self.assertTrue('only_comment_on_repost' in r) 41 | 42 | 43 | def get_config_updater(self, config: Config): 44 | uowm = MagicMock() 45 | reddit = MagicMock() 46 | res_handler = MagicMock() 47 | return SubredditConfigUpdater(uowm, reddit, res_handler, config) -------------------------------------------------------------------------------- /redditrepostsleuth/core/util/objectmapping.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from praw.models import Submission 4 | from prawcore import Forbidden 5 | 6 | from redditrepostsleuth.core.db.databasemodels import Post 7 | 8 | from redditrepostsleuth.core.util.helpers import get_post_type_id, get_post_type 9 | 10 | 11 | def submission_to_post(submission: Submission, source: str = 'praw') -> Post: 12 | """ 13 | Convert a PRAW Submission object into a Post object 14 | :param submission: 15 | """ 16 | # TODO - Do we still need this? 17 | #log.debug('Converting submission %s to post', submission.id) 18 | post = Post() 19 | post.post_id = submission.id 20 | post.url = submission.url 21 | post.shortlink = submission.__dict__.get('shortlink', None) 22 | post.author = submission.author.name if submission.author else None 23 | post.created_at = datetime.utcfromtimestamp(submission.created_utc) 24 | post.subreddit = submission.subreddit.display_name 25 | post.title = submission.title 26 | post.perma_link = submission.permalink 27 | post.crosspost_parent = submission.__dict__.get('crosspost_parent', None) 28 | post.selftext = submission.__dict__.get('selftext', None) 29 | post.crosspost_checked = True 30 | post.ingested_from = source 31 | if submission.is_self: 32 | post.post_type = 'text' 33 | else: 34 | try: 35 | post.post_type = submission.__dict__.get('post_hint', None) 36 | except (AttributeError, Forbidden) as e: 37 | pass 38 | 39 | return post 40 | 41 | 42 | def reddit_submission_to_post(submission: dict, post_type: str = None) -> Post: 43 | post = Post() 44 | post.post_id = submission.get('id', None) 45 | post.url = submission.get('url', None) 46 | post.perma_link = submission.get('permalink', None) 47 | post.author = submission.get('author', None) 48 | post.selftext = submission.get('selftext', None) 49 | post.created_at = datetime.utcfromtimestamp(submission.get('created_utc', None)) 50 | post.subreddit = submission.get('subreddit', None) 51 | post.title = submission.get('title', None) 52 | crosspost_parent = submission.get('crosspost_parent', None) 53 | if crosspost_parent: 54 | post.crosspost_parent = post.is_crosspost = True 55 | 56 | if not post_type: 57 | post_type = get_post_type(submission) 58 | 59 | post.post_type_id = get_post_type_id(post_type) 60 | post.nsfw = submission.get('over_18', None) 61 | 62 | return post 63 | 64 | -------------------------------------------------------------------------------- /tests/core/util/test_repost_helpers.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase, mock 2 | from unittest.mock import Mock 3 | 4 | from redditrepostsleuth.core.db.databasemodels import Post 5 | from redditrepostsleuth.core.model.repostmatch import RepostMatch 6 | from datetime import datetime 7 | 8 | from redditrepostsleuth.core.model.search.search_match import SearchMatch 9 | from redditrepostsleuth.core.util.repost.repost_helpers import sort_reposts, get_first_active_match, get_closest_image_match 10 | from tests.core.helpers import get_image_search_results_multi_match 11 | 12 | 13 | class TestHelpers(TestCase): 14 | 15 | def test_sort_reposts_correct_order(self): 16 | match1 = RepostMatch() 17 | match2 = RepostMatch() 18 | match3 = RepostMatch() 19 | post1 = Post(id=1, created_at=datetime.fromtimestamp(1575508228)) 20 | post2 = Post(id=2, created_at=datetime.fromtimestamp(1572916228)) 21 | post3 = Post(id=3, created_at=datetime.fromtimestamp(1570237828)) 22 | match1.post = post1 23 | match2.post = post2 24 | match3.post = post3 25 | matches = [match1, match2, match3] 26 | 27 | result = sort_reposts(matches) 28 | 29 | self.assertEqual(3, result[0].post.id) 30 | 31 | def test_get_first_active_match(self): 32 | def get_dummy_res(url, **kwargs): 33 | if url == 'www.bad.com': 34 | return Mock(status_code=400) 35 | else: 36 | return Mock(status_code=200) 37 | with mock.patch('redditrepostsleuth.core.util.repost.repost_helpers.requests.head') as mock_head: 38 | mock_head.side_effect = get_dummy_res 39 | matches = [ 40 | SearchMatch('www.dummy.com', Post(id=1, url='www.bad.com')), 41 | SearchMatch('www.dummy.com', Post(id=2, url='www.bad.com')), 42 | SearchMatch('www.dummy.com', Post(id=3, url='www.good.com')), 43 | SearchMatch('www.dummy.com', Post(id=4, url='www.good.com')), 44 | ] 45 | r = get_first_active_match(matches) 46 | self.assertEqual(3, r.post.id) 47 | 48 | def test_get_closest_image_match(self): 49 | search_results = get_image_search_results_multi_match() 50 | search_results.matches[0].hamming_distance = 98 51 | search_results.matches[1].hamming_distance = 2 52 | search_results.matches[2].hamming_distance = 25 53 | r = get_closest_image_match(search_results.matches, validate_url=False) 54 | self.assertEqual(2, r.post.id) 55 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/db/repository/summonsrepository.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | from typing import List, Text, Optional 3 | 4 | from sqlalchemy import func 5 | from sqlalchemy.orm import joinedload 6 | 7 | from redditrepostsleuth.core.logging import log 8 | from redditrepostsleuth.core.db.databasemodels import Summons 9 | 10 | 11 | class SummonsRepository: 12 | def __init__(self, db_session): 13 | self.db_session = db_session 14 | def add(self, item): 15 | log.debug('Inserting: %s', item) 16 | self.db_session.add(item) 17 | 18 | def get_all(self) -> List[Summons]: 19 | return self.db_session.query(Summons).all() 20 | 21 | def get_by_post_id(self, post_id) -> List[Summons]: 22 | return self.db_session.query(Summons).filter(Summons.post_id == post_id).all() 23 | 24 | def get_by_id(self, id: int) -> Summons: 25 | result = self.db_session.query(Summons).filter(Summons.id == id).first() 26 | return result 27 | 28 | def get_by_user_interval(self, user: Text, interval_hours: int = 1) -> Optional[List[Summons]]: 29 | since = datetime.now() - timedelta(hours=interval_hours) 30 | return self.db_session.query(Summons).filter(Summons.requestor == user, Summons.summons_received_at > since).all() 31 | 32 | def get_by_comment_id(self, id: str) -> Summons: 33 | return self.db_session.query(Summons).filter(Summons.comment_id == id).first() 34 | 35 | def get_unreplied(self, limit: int = 10) -> Summons: 36 | return self.db_session.query(Summons).options(joinedload(Summons.post)).filter(Summons.summons_replied_at == None).order_by(Summons.summons_received_at.desc()).limit(limit).all() 37 | 38 | def get_count(self, hours: int = None): 39 | query = self.db_session.query(func.count(Summons.id)) 40 | if hours: 41 | query = query.filter(Summons.summons_received_at > (datetime.now() - timedelta(hours=hours))) 42 | r = query.first() 43 | return r[0] if r else None 44 | 45 | def get_count_by_subreddit(self, subreddit: Text, hours: int = None): 46 | query = self.db_session.query(func.count(Summons.id)).filter(Summons.subreddit == subreddit) 47 | if hours: 48 | query = query.filter(Summons.summons_received_at > (datetime.now() - timedelta(hours=hours))) 49 | return query.first() 50 | 51 | def remove(self, item: Summons): 52 | self.db_session.delete(item) 53 | def remove_by_post_id(self, post_id: str) -> None: 54 | self.db_session.query(Summons).filter(Summons.post_id == post_id).delete() -------------------------------------------------------------------------------- /tests/adminsvc/test_deleted_post_monitor.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from redditrepostsleuth.adminsvc.deleted_post_monitor import db_ids_from_post_ids, merge_results 4 | from redditrepostsleuth.core.db.databasemodels import Post 5 | from redditrepostsleuth.core.model.misc_models import DeleteCheckResult 6 | from redditrepostsleuth.core.util.utils import get_post_ids_from_reddit_req_url, build_reddit_req_url 7 | 8 | 9 | class TestDeletedPostMonitor(TestCase): 10 | def test_build_reddit_req_url(self): 11 | post_ids = ['1216baz', '1216baw', '1216bao'] 12 | expected = 'https://api.reddit.com/api/info?id=t3_1216baz,t3_1216baw,t3_1216bao' 13 | self.assertEqual(expected, build_reddit_req_url(post_ids)) 14 | 15 | def test_get_post_ids_from_reddit_req_url(self): 16 | expected = ['1216baz', '1216baw', '1216bao'] 17 | url = 'https://api.reddit.com/api/info?id=t3_1216baz,t3_1216baw,t3_1216bao' 18 | result = get_post_ids_from_reddit_req_url(url) 19 | self.assertEqual(expected, result) 20 | 21 | def test_db_ids_from_post_ids_all_valid_return_all(self): 22 | post_ids = ['abc123', 'abc345', 'abc456'] 23 | expected = [12345,22345,32345] 24 | posts = [ 25 | Post(id=12345, post_id='abc123'), 26 | Post(id=22345, post_id='abc345'), 27 | Post(id=32345, post_id='abc456'), 28 | ] 29 | result = db_ids_from_post_ids(post_ids, posts) 30 | 31 | self.assertEqual(expected, result) 32 | 33 | def test_db_ids_from_post_ids_missing_one_return_two(self): 34 | post_ids = ['abc123', 'abc345',] 35 | expected = [12345,22345] 36 | posts = [ 37 | Post(id=12345, post_id='abc123'), 38 | Post(id=22345, post_id='abc345'), 39 | Post(id=32345, post_id='abc456'), 40 | ] 41 | result = db_ids_from_post_ids(post_ids, posts) 42 | 43 | self.assertEqual(expected, result) 44 | 45 | def test_merge_results(self): 46 | results_one = DeleteCheckResult( 47 | to_delete=['sdfsdf', 'asdfsdf'], to_update=['sahdf', 'kjelikj'], to_recheck=['sadfd'] 48 | ) 49 | results_two = DeleteCheckResult( 50 | to_delete=['klkl;',], to_update=['safhdf', 'kjsdikj', 'eflodf'], to_recheck=['weree'] 51 | ) 52 | 53 | merged = merge_results([results_one, results_two]) 54 | self.assertEqual(5, len(merged.to_update)) 55 | self.assertEqual(3, len(merged.to_delete)) 56 | self.assertEqual(2, len(merged.to_recheck)) 57 | self.assertEqual(10, merged.count) 58 | -------------------------------------------------------------------------------- /alembic/versions-pre-refactor/cfffe117cd7b_add_stats_image_repost.py: -------------------------------------------------------------------------------- 1 | """add stats image repost 2 | 3 | Revision ID: cfffe117cd7b 4 | Revises: 77a4e176572e 5 | Create Date: 2020-10-14 07:42:26.262883 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | from sqlalchemy.dialects import mysql 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'cfffe117cd7b' 14 | down_revision = '77a4e176572e' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('stats_general', 22 | sa.Column('id', sa.Integer(), nullable=False), 23 | sa.Column('image_reposts_detected', sa.Integer(), nullable=True), 24 | sa.Column('link_reposts_detected', sa.Integer(), nullable=True), 25 | sa.Column('private_messages_sent', sa.Integer(), nullable=True), 26 | sa.Column('comments_left', sa.Integer(), nullable=True), 27 | sa.Column('summons_received', sa.Integer(), nullable=True), 28 | sa.Column('karma_gained', sa.Integer(), nullable=True), 29 | sa.PrimaryKeyConstraint('id') 30 | ) 31 | op.create_table('stats_top_image_repost', 32 | sa.Column('id', sa.Integer(), nullable=False), 33 | sa.Column('post_id', sa.String(length=100), nullable=False), 34 | sa.Column('repost_count', sa.Integer(), nullable=False), 35 | sa.Column('days', sa.Integer(), nullable=False), 36 | sa.Column('nsfw', sa.Boolean(), nullable=False), 37 | sa.PrimaryKeyConstraint('id') 38 | ) 39 | op.drop_table('bot_stat') 40 | # ### end Alembic commands ### 41 | 42 | 43 | def downgrade(): 44 | # ### commands auto generated by Alembic - please adjust! ### 45 | op.create_table('bot_stat', 46 | sa.Column('id', mysql.INTEGER(), autoincrement=True, nullable=False), 47 | sa.Column('image_reposts_detected', mysql.INTEGER(), autoincrement=False, nullable=True), 48 | sa.Column('link_reposts_detected', mysql.INTEGER(), autoincrement=False, nullable=True), 49 | sa.Column('private_messages_sent', mysql.INTEGER(), autoincrement=False, nullable=True), 50 | sa.Column('summons_received', mysql.INTEGER(), autoincrement=False, nullable=True), 51 | sa.Column('karma_gained', mysql.INTEGER(), autoincrement=False, nullable=True), 52 | sa.Column('comments_left', mysql.INTEGER(), autoincrement=False, nullable=True), 53 | sa.PrimaryKeyConstraint('id'), 54 | mysql_default_charset='utf8', 55 | mysql_engine='InnoDB' 56 | ) 57 | op.drop_table('stats_top_image_repost') 58 | op.drop_table('stats_general') 59 | # ### end Alembic commands ### 60 | -------------------------------------------------------------------------------- /tests/core/test_duplicateImageService.py: -------------------------------------------------------------------------------- 1 | import json 2 | from types import SimpleNamespace 3 | from unittest import TestCase, mock 4 | from unittest.mock import MagicMock, Mock 5 | from requests.exceptions import ConnectionError 6 | 7 | from redditrepostsleuth.core.db.databasemodels import Post 8 | from redditrepostsleuth.core.services.duplicateimageservice import DuplicateImageService 9 | from redditrepostsleuth.core.exception import NoIndexException 10 | from redditrepostsleuth.core.model.search.image_search_match import ImageSearchMatch 11 | 12 | 13 | 14 | class TestDuplicateImageService(TestCase): 15 | 16 | 17 | def test__get_matches_connection_error(self): 18 | with mock.patch('redditrepostsleuth.core.services.duplicateimageservice.requests.get') as mock_get: 19 | dup_svc = DuplicateImageService(Mock(), Mock(), Mock(), config=MagicMock(index_api='http://test.com')) 20 | mock_get.side_effect = ConnectionError('ouch!') 21 | self.assertRaises(NoIndexException, dup_svc._get_matches, '111', 1, 1) 22 | 23 | def test__get_matches_unknown_exception(self): 24 | with mock.patch('redditrepostsleuth.core.services.duplicateimageservice.requests.get') as mock_get: 25 | dup_svc = DuplicateImageService(Mock(), Mock(), Mock(), config=MagicMock(index_api='http://test.com')) 26 | mock_get.side_effect = Exception('Ouch') 27 | self.assertRaises(Exception, dup_svc._get_matches, '111', 1, 1) 28 | 29 | def test__get_matches_bad_status_code(self): 30 | with mock.patch('redditrepostsleuth.core.services.duplicateimageservice.requests.get') as mock_get: 31 | dup_svc = DuplicateImageService(Mock(), Mock(), Mock(), config=MagicMock(index_api='http://test.com')) 32 | mock_get.return_value = SimpleNamespace(**{'status_code': 500, 'text': 'result'}) 33 | self.assertRaises(NoIndexException, dup_svc._get_matches, '111', 1, 1) 34 | 35 | 36 | def test__build_search_results(self): 37 | search_results = [ 38 | {'id': 123, 'distance': .123} 39 | ] 40 | with mock.patch.object(DuplicateImageService, '_build_search_results') as dup: 41 | dup._set_match_posts.return_value = {} 42 | 43 | def test__remove_duplicates_one_dup_remove(self): 44 | matches = [ 45 | ImageSearchMatch('test.com', 123, Post(id=1), 10, 10, 32), 46 | ImageSearchMatch('test.com', 123, Post(id=1), 10, 10, 32), 47 | ImageSearchMatch('test.com', 123, Post(id=2), 10, 10, 32) 48 | ] 49 | dup_svc = DuplicateImageService(Mock(), Mock(), Mock(), config=MagicMock()) 50 | r = dup_svc._remove_duplicates(matches) 51 | self.assertEqual(2, len(r)) 52 | 53 | -------------------------------------------------------------------------------- /tests/summonssvc/test_summonsHandler.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase, mock 2 | from unittest.mock import MagicMock 3 | 4 | from redditrepostsleuth.core.config import Config 5 | from redditrepostsleuth.core.db.databasemodels import MonitoredSub 6 | from redditrepostsleuth.summonssvc.summonshandler import SummonsHandler 7 | 8 | 9 | class TestSummonsHandler(TestCase): 10 | 11 | def test__strip_summons_flags__clean_input_usertag(self): 12 | config = Config(redis_host='dummy') 13 | sum_handler = SummonsHandler(MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), config=config) 14 | summons = '/repostsleuthbot' 15 | self.assertIsNone(sum_handler._strip_summons_flags(summons)) 16 | 17 | def test__strip_summons_flags__junk_input_usertag(self): 18 | config = Config(redis_host='dummy') 19 | sum_handler = SummonsHandler(MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), config=config) 20 | summons = 'This test u/repostsleuthbot some junk' 21 | self.assertEqual(sum_handler._strip_summons_flags(summons), 'some junk') 22 | 23 | def test__strip_summons_flags__clean_input_commandtag(self): 24 | config = Config(redis_host='dummy') 25 | sum_handler = SummonsHandler(MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), config=config) 26 | summons = '?repost' 27 | self.assertEqual(sum_handler._strip_summons_flags(summons), '') 28 | 29 | def test__strip_summons_flags__junk_input_commandtag(self): 30 | config = Config(redis_host='dummy') 31 | sum_handler = SummonsHandler(MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), config=config) 32 | summons = 'This test ?repost some junk' 33 | self.assertEqual(sum_handler._strip_summons_flags(summons), 'some junk') 34 | 35 | def test__strip_summons_flags__junk_input_commandtag(self): 36 | config = Config(redis_host='dummy') 37 | sum_handler = SummonsHandler(MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), config=config) 38 | summons = 'This test ?repost some junk' 39 | self.assertEqual(sum_handler._strip_summons_flags(summons), 'some junk') 40 | 41 | def test__get_target_distances__monitored_sub(self): 42 | monitored_sub = MonitoredSub(target_image_match=98, target_image_meme_match=5) 43 | sum_handler = SummonsHandler(MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), config=MagicMock(default_annoy_distance=0.777)) 44 | target_image_match, target_image_meme_match, target_annoy = sum_handler._get_target_distances(monitored_sub) 45 | self.assertEqual(98, target_image_match) 46 | self.assertEqual(5, target_image_meme_match) 47 | 48 | -------------------------------------------------------------------------------- /utility_scripts/ingest_pushshift_archive_pipe.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | 4 | import redis 5 | from datetime import datetime 6 | 7 | from influxdb import InfluxDBClient 8 | import sys 9 | 10 | from redditrepostsleuth.core.celery.tasks import save_pushshift_results_archive 11 | from redditrepostsleuth.core.config import config 12 | 13 | client = redis.Redis(host=config.redis_host, port=6379, db=0, password=config.redis_password) 14 | influx = InfluxDBClient('monitor.ho.me', '8086', database='collectd') 15 | 16 | def get_memeory(): 17 | keys = ['pushshift_intake', 'pushshift_ingest', 'postingest', 'repost_image'] 18 | used_bytes = 0 19 | for key in keys: 20 | used = client.memory_usage(key) 21 | if used: 22 | used_bytes = used_bytes + used 23 | total = used_bytes / 1024 / 1024 / 1024 24 | print(str(round(total, 2))) 25 | return total 26 | r = influx.query('SELECT mean("value") FROM "redis_value" WHERE ("type" = \'memory\') AND time >= now() - 15m GROUP BY time(10s) fill(null)') 27 | total = r.raw['series'][-1]['values'][1][1] 28 | 29 | 30 | if __name__ == '__main__': 31 | 32 | batch = [] 33 | 34 | for line in sys.stdin: 35 | object = json.loads(line) 36 | 37 | if object['created_utc'] < 1519082075: 38 | pass 39 | 40 | 41 | batch.append(object) 42 | 43 | if len(batch) >= 1000: 44 | try: 45 | save_pushshift_results_archive.apply_async((batch,), queue='pushshift_intake') 46 | except Exception as e: 47 | time.sleep(20) 48 | try: 49 | save_pushshift_results_archive.apply_async((batch,), queue='pushshift_intake') 50 | except Exception: 51 | continue 52 | print('Sent batch to celery: ' + str(datetime.utcfromtimestamp(batch[0]['created_utc'])) + ' (' + str(object['created_utc']) + ')' ) 53 | batch = [] 54 | 55 | while True: 56 | r = influx.query( 57 | 'SELECT mean("value") FROM "redis_value" WHERE ("type" = \'memory\') AND time >= now() - 15m GROUP BY time(10s) fill(null)') 58 | total = r.raw['series'][0]['values'][-2][1] 59 | if total and total >= 12884901888: 60 | print('Waiting for memory to lower: ' + str(total / 1024 / 1024 / 1024)) 61 | time.sleep(20) 62 | else: 63 | break 64 | 65 | 66 | save_pushshift_results_archive.apply_async((batch,), queue='pushshift_intake') 67 | batch = [] 68 | print('sent last batch') 69 | 70 | -------------------------------------------------------------------------------- /wiki/message_templates.md: -------------------------------------------------------------------------------- 1 | # Custom Message Templates 2 | 3 | 4 | The bot exposes several variables that can be used in the bot's comments as well as report messages. These allow you to define a custom message that includes values unique to the search results. 5 | 6 | ## Bot Comments 7 | 8 | * Total Posts Searched: {total_searched} 9 | * Search Execute Time: {search_time} 10 | * Total Matches: {match_count} 11 | * Post Type: {post_type} 12 | * Name of Current Subreddit: {this_subreddit} 13 | * Plural or Singular Time/Times word based on result count: {times_word} 14 | * Short link to this post: {post_shortlink} 15 | * Subreddit of closest match: {closest_sub} 16 | * URL of closest match: {closest_url} 17 | * Shortlink of closest match: {closest_shortlink} 18 | * Matching % of closest match: {closest_percent_match} 19 | * Closest match created date: {closest_created_at} 20 | * Meme filter used: {meme_filter} 21 | * Oldest Match Created: {oldest_created_at} 22 | * Oldest Match Shortlink: {oldest_shortlink} 23 | * Oldest Percent Match: {oldest_percent_match} 24 | * Oldest Sub: {oldest_sub} 25 | * Newest Match Created: {newest_created_at} 26 | * Newest Match Shortlink: {newest_shortlink} 27 | * Newest Percent Match: {newest_percent_match} 28 | * Newest Sub: {newest_sub}closest_shortlink 29 | * List of All Matches: {match_list} 30 | * Post Author: {post_author} 31 | * Search URL: {search_url} 32 | * Meme Filter Used: {meme_filter_used} 33 | * Search URL: {search_url} - URL to repostsleuth.com with exact search settings 34 | * Checked Title: {check_title} 35 | * Report Link: {report_post_link} 36 | * Target % used on final matching: {effective_target_match_percent} 37 | * Max match age: {max_age} 38 | 39 | ## Report Message 40 | 41 | * Total Matches: {match_count} 42 | * Post Type: {post_type} 43 | * Name of Current Subreddit: {this_subreddit} 44 | * Subreddit of closest match: {closest_sub} 45 | * URL of closest match: {closest_url} 46 | * Shortlink of closest match: {closest_shortlink} 47 | * Matching % of closest match: {closest_percent_match} 48 | * Closest match created date: {closest_created_at} 49 | * Oldest Match Created: {oldest_created_at} 50 | * Oldest Match Shortlink: {oldest_shortlink} 51 | * Oldest Percent Match: {oldest_percent_match} 52 | * Oldest Sub: {oldest_sub} 53 | * Newest Match Created: {newest_created_at} 54 | * Newest Match Shortlink: {newest_shortlink} 55 | * Newest Percent Match: {newest_percent_match} 56 | * Newest Sub: {newest_sub} 57 | * Post Author: {post_author} 58 | 59 | ## Example 60 | 61 | I searched {total_searched} and found {match_count} matching posts. The oldest is {oldest_shortlink} and is a {oldest_percent_match}% match 62 | 63 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/util/repost/text_repost.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from typing import Optional 4 | 5 | import requests 6 | from requests.exceptions import ConnectionError 7 | from redditrepostsleuth.core.config import Config 8 | from redditrepostsleuth.core.db.databasemodels import Post 9 | from redditrepostsleuth.core.db.db_utils import get_db_engine 10 | from redditrepostsleuth.core.db.uow.unitofwork import UnitOfWork 11 | from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager 12 | from redditrepostsleuth.core.exception import IndexApiException 13 | from redditrepostsleuth.core.model.image_index_api_result import APISearchResults 14 | from redditrepostsleuth.core.model.search.search_match import SearchMatch 15 | from redditrepostsleuth.core.model.search.search_results import SearchResults 16 | from redditrepostsleuth.core.model.search.text_search_match import TextSearchMatch 17 | from redditrepostsleuth.core.model.search_settings import SearchSettings 18 | from redditrepostsleuth.core.util.helpers import get_default_link_search_settings, get_default_text_search_settings 19 | 20 | config = Config() 21 | log = logging.getLogger(__name__) 22 | 23 | def get_text_matches(text: str) -> APISearchResults: 24 | 25 | try: 26 | res = requests.post(f'{config.index_api}/text', json={'text': text}) 27 | except ConnectionError: 28 | log.error('Failed to connect to Index API') 29 | raise 30 | 31 | if res.status_code != 200: 32 | log.error('Unexpected status code %s from Index API', res.status_code) 33 | raise IndexApiException(f'Unexpected Status {res.status_code} from Index API') 34 | 35 | return APISearchResults(**json.loads(res.text)) 36 | 37 | def get_text_post_matches( 38 | post: Post, 39 | uow: UnitOfWork, # TODO - Start passing UOW instead of UOWM 40 | search_settings: SearchSettings 41 | ) -> Optional[SearchResults]: 42 | 43 | search_results = SearchResults(post.url, checked_post=post, search_settings=search_settings) 44 | api_results = get_text_matches(post.selftext) 45 | for index_results in api_results.results: 46 | for match in index_results.matches: 47 | post = uow.posts.get_by_id(match.id) 48 | if not post: 49 | log.warning('Failed to find post for index match with ID %s', match.id) 50 | continue 51 | search_results.matches.append(TextSearchMatch(post, match.distance)) 52 | 53 | search_results.search_times.total_search_time = api_results.total_search_time 54 | 55 | return search_results 56 | 57 | 58 | 59 | if __name__ == '__main__': 60 | uowm = UnitOfWorkManager(get_db_engine(config)) 61 | with uowm.start() as uow: 62 | post = uow.posts.get_by_id(1043928780) 63 | 64 | get_text_post_matches(post, uow, get_default_text_search_settings(config)) -------------------------------------------------------------------------------- /alembic/env.py: -------------------------------------------------------------------------------- 1 | from __future__ import with_statement 2 | 3 | from logging.config import fileConfig 4 | import sys, os 5 | from urllib.parse import quote_plus 6 | 7 | 8 | 9 | sys.path.append('/home/barry/PycharmProjects/RedditRepostSleuth') 10 | sys.path.append(r'C:\Users\mcare\PycharmProjects\RedditRepostSleuth') 11 | 12 | from redditrepostsleuth.core.config import Config 13 | from redditrepostsleuth.core.db.databasemodels import Base 14 | 15 | from sqlalchemy import engine_from_config, create_engine 16 | from sqlalchemy import pool 17 | 18 | 19 | from alembic import context 20 | 21 | # Load bot config 22 | if not os.getenv('bot_config', None): 23 | print('No bot config provided, aborting') 24 | sys.exit() 25 | 26 | 27 | 28 | # this is the Alembic Config object, which provides 29 | # access to the values within the .ini file in use. 30 | config = context.config 31 | bot_config = Config(os.getenv('bot_config')) 32 | # Interpret the config file for Python logging. 33 | # This line sets up loggers basically. 34 | fileConfig(config.config_file_name) 35 | 36 | # add your model's MetaData object here 37 | # for 'autogenerate' support 38 | # from myapp import mymodel 39 | # target_metadata = mymodel.Base.metadata 40 | target_metadata = Base.metadata 41 | 42 | def get_conn_string(): 43 | return f'mysql+pymysql://{bot_config.db_user}:{quote_plus(bot_config.db_password)}@{bot_config.db_host}/{bot_config.db_name}' 44 | 45 | # other values from the config, defined by the needs of env.py, 46 | # can be acquired: 47 | # my_important_option = config.get_main_option("my_important_option") 48 | # ... etc. 49 | 50 | 51 | def run_migrations_offline(): 52 | """Run migrations in 'offline' mode. 53 | 54 | This configures the context with just a URL 55 | and not an Engine, though an Engine is acceptable 56 | here as well. By skipping the Engine creation 57 | we don't even need a DBAPI to be available. 58 | 59 | Calls to context.execute() here emit the given string to the 60 | script output. 61 | 62 | """ 63 | url = config.get_main_option("sqlalchemy.url") 64 | context.configure( 65 | url=get_conn_string(), target_metadata=target_metadata, literal_binds=True 66 | ) 67 | 68 | with context.begin_transaction(): 69 | context.run_migrations() 70 | 71 | 72 | def run_migrations_online(): 73 | """Run migrations in 'online' mode. 74 | 75 | In this scenario we need to create an Engine 76 | and associate a connection with the context. 77 | 78 | """ 79 | connectable = create_engine(get_conn_string(), echo=False, ) 80 | 81 | with connectable.connect() as connection: 82 | context.configure( 83 | connection=connection, target_metadata=target_metadata 84 | ) 85 | 86 | with context.begin_transaction(): 87 | context.run_migrations() 88 | 89 | 90 | if context.is_offline_mode(): 91 | run_migrations_offline() 92 | else: 93 | run_migrations_online() 94 | -------------------------------------------------------------------------------- /redditrepostsleuth/core/model/search_settings.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import Optional 3 | 4 | 5 | class SearchSettings: 6 | """ 7 | Wrapper that contains all settings to be used when searching for a repost 8 | Initial values will be set to sensible defaults if none are provided 9 | """ 10 | def __init__( 11 | self, 12 | target_title_match: Optional[int] = None, 13 | max_matches: int = 75, 14 | same_sub: bool = False, 15 | max_days_old: Optional[int] = None, 16 | filter_dead_matches: bool = False, 17 | filter_removed_matches: bool = False, 18 | only_older_matches: bool = True, 19 | filter_same_author: bool = True, 20 | filter_crossposts: bool = True 21 | ): 22 | """ 23 | 24 | :param target_title_match: Threshold a title must meet to be considered a match 25 | :param max_matches: Max matches to fetch from search 26 | :param same_sub: Only keep matches from same subreddit 27 | :param max_days_old: Drop all matches older than X days 28 | :param filter_dead_matches: Remove matches that return a 404 29 | :param filter_removed_matches: Removed matches that have been removed from Reddit 30 | :param only_older_matches: Only include matches older than the searched post 31 | :param filter_same_author: Remove matches by the same author is searched post 32 | """ 33 | self.filter_crossposts = filter_crossposts 34 | self.filter_same_author = filter_same_author 35 | self.only_older_matches = only_older_matches 36 | self.filter_removed_matches = filter_removed_matches 37 | self.filter_dead_matches = filter_dead_matches 38 | self.max_days_old = max_days_old 39 | self.same_sub = same_sub 40 | self.max_matches = max_matches 41 | self.target_title_match = target_title_match 42 | 43 | @property 44 | def search_scope(self): 45 | return 'This Sub' if self.same_sub else 'Reddit' 46 | 47 | @property 48 | def check_title(self): 49 | return self.target_title_match is not None 50 | 51 | def __repr__(self): 52 | r = '' 53 | for k, v in self.__dict__.items(): 54 | r += f'{k}: {v} | ' 55 | return r 56 | 57 | def to_dict(self): 58 | return { 59 | 'filter_crossposts': self.filter_crossposts, 60 | 'filter_same_author': self.filter_same_author, 61 | 'only_older_matches': self.only_older_matches, 62 | 'filter_removed_matches': self.filter_removed_matches, 63 | 'filter_dead_matches': self.filter_dead_matches, 64 | 'max_days_old': self.max_days_old, 65 | 'same_sub': self.same_sub, 66 | 'max_matches': self.max_matches, 67 | 'target_title_match': self.target_title_match, 68 | 'search_scope': self.search_scope, 69 | 'check_title': self.check_title 70 | } -------------------------------------------------------------------------------- /redditrepostsleuth/core/services/reddit_manager.py: -------------------------------------------------------------------------------- 1 | from typing import Text 2 | 3 | from praw import Reddit 4 | from praw.models import Redditor, Submission, Comment, Subreddit 5 | 6 | from redditrepostsleuth.core.logging import log 7 | 8 | 9 | class RedditManager: 10 | """ 11 | Wrapper to 'cache' comments and submissions 12 | """ 13 | def __init__(self, reddit: Reddit): 14 | self.reddit = reddit 15 | self._comments = [] 16 | self._submissions = [] 17 | self._subreddits = [] 18 | self._redditors = [] 19 | 20 | def subreddit(self, sub_name: Text) -> Subreddit: 21 | return self._return_subreddit(sub_name) 22 | 23 | def _return_subreddit(self, sub_name: Text) -> Subreddit: 24 | for sub in self._subreddits: 25 | if sub.display_name == sub_name: 26 | log.debug('Returning cached sub %s', sub_name) 27 | return sub 28 | new_sub = self.reddit.subreddit(sub_name) 29 | if new_sub: 30 | log.debug('Returning new subreddit %s', sub_name) 31 | self._subreddits.append(new_sub) 32 | return new_sub 33 | 34 | def comment(self, comment_id: Text) -> Comment: 35 | return self._return_comment(comment_id) 36 | 37 | def _return_comment(self, comment_id: Text) -> Comment: 38 | for comment in self._comments: 39 | if comment.id == comment_id: 40 | log.debug('Returning cached comment %s', comment_id) 41 | return comment 42 | new_comment = self.reddit.comment(comment_id) 43 | log.debug('Returning new comment %s', comment_id) 44 | if new_comment: 45 | self._comments.append(new_comment) 46 | return new_comment 47 | 48 | def submission(self, submission_id: Text) -> Submission: 49 | return self._return_submission(submission_id) 50 | 51 | def _return_submission(self, submission_id: Text) -> Submission: 52 | for submission in self._submissions: 53 | if submission.id == submission_id: 54 | log.debug('Returning cached submission %s', submission_id) 55 | return submission 56 | new_submission = self.reddit.submission(submission_id) 57 | if new_submission: 58 | self._submissions.append(new_submission) 59 | log.debug('Returning new submission %s', submission_id) 60 | return new_submission 61 | 62 | def redditor(self, username: Text) -> Redditor: 63 | return self._return_redditor(username) 64 | 65 | def _return_redditor(self, username: Text) -> Redditor: 66 | for redditor in self._redditors: 67 | if redditor.name == username: 68 | log.debug('Returning cached redditor %s', redditor.name) 69 | return redditor 70 | new_redditor = self.reddit.redditor(username) 71 | if new_redditor: 72 | self._redditors.append(new_redditor) 73 | log.debug('Returning new redditor %s', username) 74 | return new_redditor -------------------------------------------------------------------------------- /wiki/support-sleuth-bot.md: -------------------------------------------------------------------------------- 1 | ## Supporting Repost Sleuth 2 | 3 | --- 4 | 5 | ### TLDR 6 | 7 | --- 8 | 9 | Repost Sleuth is a passion project that is expensive to run. It costs about $40 a month in electricity. It was just moved to a new $800 server and will soon be getting a $300 RAM upgrade. 10 | 11 | No obligation to donate but I would sincerely appreciate any donations. 12 | 13 | [Click Here To Support Repost Sleuth With a Donation](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=DXTH32CHAK334&source=url) 14 | 15 | ### What's The Deal? 16 | 17 | --- 18 | 19 | When I started this project I could never imagine how popular it would become. I put this bot together as a fun side project thinking it might help me land a job someday. I never thought it would get any traction. Boy was I wrong. 20 | 21 | The daily amount of interaction the bot gets, the amount of subs that have signed up, and the general positive support has been mind blowing. 22 | 23 | I am immensely grateful to the Reddit community for the love and support shown to Repost Sleuth. 24 | 25 | To put some numbers to it. 26 | 27 | * Average of 3000 summons a day 28 | * Over 180 subs signed up for monitoring, covering millions of members 29 | * 340,000 Comments Left 30 | * Over 40 million image searches performed 31 | * 20 new submissions a second are checked for reposts 32 | * Over 400 million Reddit submissions indexed 33 | 34 | ### All of these numbers have a big downside 35 | 36 | The bot requires a ton of CPU power and electricity to work. On the surface it seems simple. You tag the bot and it responds. Under the covers there is a lot going on. 37 | 38 | At any given moment there are 20 to 40 containers running to do all of the processing required. These cover things like 39 | 40 | * Monitoring for new mentions 41 | * Responding to new mentions 42 | * Ingesting every single new submission Reddit receives 43 | * Performing repost checks on every new link and image submission (can be over 20,000+ and hour) 44 | * Performing repost checks with custom settings on subs that are signed up for monitoring 45 | * Collecting stats about bot performance 46 | * Building search indexes 47 | * And many other tasks 48 | 49 | Most of this runs on a Dell enterprise grade server in my house. What the server can't handle is currently being picked up by my gaming PC. 50 | 51 | I'd like to add new features like OC theft detection and video repost checking (and get my gaming PC back). To make this possible I had to buy another server to pick up the slack. 52 | 53 | The new server was a cost of $765 out of pocket. IMO it's money well spent since it will enable the bot to have a much greater capacity. 54 | 55 | I don't have a problem spending the money because I love working on the bot. However, I'm not opposed to community support if the want is there. 56 | 57 | If you have the means and want to help support Repost Sleuth, you can donate below via the QR code or the link 58 | 59 | ## Donate 60 | 61 | [Click Here To Support Repost Sleuth With a Donation](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=DXTH32CHAK334&source=url) 62 | -------------------------------------------------------------------------------- /tests/core/celery/task_logic/test_ingest_task_logic.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from redditrepostsleuth.core.celery.task_logic.ingest_task_logic import image_links_from_gallery_meta_data 4 | from redditrepostsleuth.core.exception import GalleryNotProcessed 5 | 6 | 7 | class TestIngestTasks(TestCase): 8 | 9 | def test_image_links_from_gallery_meta_data_return_jpg_links(self): 10 | meta_data = { 11 | 'abcd123': { 12 | 'status': 'valid', 13 | 'm': 'image/jpg' 14 | } 15 | } 16 | expected = ['https://i.redd.it/abcd123.jpg'] 17 | self.assertListEqual(expected, image_links_from_gallery_meta_data(meta_data)) 18 | 19 | def test_image_links_from_gallery_meta_data_return_png_links(self): 20 | meta_data = { 21 | 'abcd456': { 22 | 'status': 'valid', 23 | 'm': 'image/png' 24 | } 25 | } 26 | expected = ['https://i.redd.it/abcd456.png'] 27 | self.assertListEqual(expected, image_links_from_gallery_meta_data(meta_data)) 28 | 29 | def test_image_links_from_gallery_meta_data_return_gif_links(self): 30 | meta_data = { 31 | 'abcd456': { 32 | 'status': 'valid', 33 | 'm': 'image/gif' 34 | } 35 | } 36 | expected = ['https://i.redd.it/abcd456.gif'] 37 | self.assertListEqual(expected, image_links_from_gallery_meta_data(meta_data)) 38 | 39 | def test_image_links_from_gallery_meta_data_return_mixed_links(self): 40 | meta_data = { 41 | 'abcd123': { 42 | 'status': 'valid', 43 | 'm': 'image/jpg' 44 | }, 45 | 'abcd456': { 46 | 'status': 'valid', 47 | 'm': 'image/png' 48 | } 49 | } 50 | expected = ['https://i.redd.it/abcd123.jpg', 'https://i.redd.it/abcd456.png'] 51 | self.assertListEqual(expected, image_links_from_gallery_meta_data(meta_data)) 52 | 53 | def test_image_links_from_gallery_meta_data_no_valid_type_raises_key_error(self): 54 | meta_data = { 55 | 'abcd123': { 56 | 'status': 'valid', 57 | 'm': 'image/test' 58 | }, 59 | } 60 | with self.assertRaises(KeyError): 61 | image_links_from_gallery_meta_data(meta_data) 62 | 63 | def test_image_links_from_gallery_meta_data_image_still_processing_raises(self): 64 | meta_data = { 65 | 'abcd123': { 66 | 'status': 'unprocessed', 67 | 'm': 'image/test' 68 | }, 69 | } 70 | with self.assertRaises(GalleryNotProcessed): 71 | image_links_from_gallery_meta_data(meta_data) 72 | 73 | def test_image_links_from_gallery_meta_data_unknown_status_processing_throws(self): 74 | meta_data = { 75 | 'abcd123': { 76 | 'status': 'unknown', 77 | 'm': 'image/test' 78 | }, 79 | } 80 | with self.assertRaises(ValueError): 81 | image_links_from_gallery_meta_data(meta_data) --------------------------------------------------------------------------------