├── src ├── Controller │ └── .gitignore ├── Enum │ └── BotMode.php ├── Exception │ └── ContentFetchingFailedException.php ├── Service │ ├── PermissionChecker.php │ ├── SiteHandlerCollection.php │ ├── SummaryTextWrapper.php │ ├── RatioFormatter.php │ ├── PostService.php │ ├── HardcodedPermissionChecker.php │ └── LinkResolver.php ├── SummaryProvider │ ├── SummaryProvider.php │ └── SumySummaryProvider.php ├── SiteHandler │ ├── AxiosSiteHandler.php │ ├── PoliticoSiteHandler.php │ ├── DallasNewsSiteHandler.php │ ├── GamerkickSiteHandler.php │ ├── TheInterceptSiteHandler.php │ ├── TheGlobeAndMailSiteHandler.php │ ├── SiteHandler.php │ ├── NewsSkySiteHandler.php │ ├── VoxSiteHandler.php │ ├── PhoronixSiteHandler.php │ ├── TheHillSiteHandler.php │ ├── TheWireInSiteHandler.php │ ├── TrainsSiteHandler.php │ ├── CbsNewsSiteHandler.php │ ├── BoingBoingSiteHandler.php │ ├── EngadgetSiteHandler.php │ ├── GizmodoSiteHandler.php │ ├── JPostSiteHandler.php │ ├── NewsweekSiteHandler.php │ ├── TheRegisterSiteHandler.php │ ├── ApNewsSiteHandler.php │ ├── ArsTechnicaSiteHandler.php │ ├── CnnSiteHandler.php │ ├── KsatSiteHandler.php │ ├── MichaelWestSiteHandler.php │ ├── NbcNewsSiteHandler.php │ ├── NewslaundrySiteHandler.php │ ├── NyTimesSiteHandler.php │ ├── RollingStoneSiteHandler.php │ ├── ScienceDailySiteHandler.php │ ├── TechCrunchSiteHandler.php │ ├── BlockClubChicagoSiteHandler.php │ ├── CtvNewsSiteHandler.php │ ├── IndependentCoUkSiteHandler.php │ ├── TexasMonthlySiteHandler.php │ ├── DailymailSiteHandler.php │ ├── TvLineSiteHandler.php │ ├── WindowsCentralSiteHandler.php │ ├── BusinessInsiderSiteHandler.php │ ├── GamingOnLinuxSiteHandler.php │ ├── HollywoodReporterSiteHandler.php │ ├── TheGuardianSiteHandler.php │ ├── TheVergeSiteHandler.php │ ├── MagazineScienceForThePeople.php │ ├── StarTelegramSiteHandler.php │ ├── EuronewsSiteHandler.php │ ├── TomsHardwareSiteHandler.php │ ├── Abc7SiteHandler.php │ ├── AbcAustraliaSiteHandler.php │ ├── BellingcatSiteHandler.php │ ├── SpaceComSiteHandler.php │ ├── ElPasoTimesSiteHandler.php │ ├── CbcSiteHandler.php │ ├── NprSiteHandler.php │ ├── TheBigNewsletterSiteHandler.php │ ├── AlSiteHandler.php │ ├── EnglishNvUa.php │ ├── BbcUkBlogSiteHandler.php │ ├── BbcComHandler.php │ ├── AdguardBlogSiteHandler.php │ ├── FuturismSiteHandler.php │ ├── AustralianGeographicSiteHandler.php │ ├── AxiomSpaceSiteHandler.php │ ├── BalkanInsightSiteHandler.php │ ├── TechnologyReviewSiteHandler.php │ ├── FortuneSiteHandler.php │ ├── TheQuintSiteHandler.php │ ├── DwSiteHandler.php │ ├── VancouverSunSiteHandler.php │ ├── BbcUkNewsSiteHandler.php │ ├── PvMagazineSiteHandler.php │ ├── WashingtonPostSiteHandler.php │ ├── DallasObserverSiteHandler.php │ ├── TexasTribuneSiteHandler.php │ ├── TimeSiteHandler.php │ ├── TheBulwarkSiteHandler.php │ ├── NikkeiAsiaSiteHandler.php │ └── AbstractSiteHandler.php ├── Dto │ └── RatioFormatterResult.php ├── SummaryTextWrapper │ ├── SummaryTextWrapperProvider.php │ ├── DefaultSummaryTextWrapperProvider.php │ └── CondensedSummaryTextWrapperProvider.php ├── Listener │ └── ExceptionLogger.php ├── Kernel.php └── Command │ ├── ManualRunCommand.php │ ├── ReplyToDirectMessagesCommand.php │ ├── ReplyToPostsCommand.php │ ├── ReplyToMentionsCommand.php │ └── GetStatsCommand.php ├── .dockerignore ├── package.json ├── config ├── packages │ ├── prod │ │ ├── rikudou_memoize.yaml │ │ └── rikudou_dynamo_db_cache.yaml │ ├── routing.yaml │ ├── rikudou_memoize.yaml │ ├── nyholm_psr7.yaml │ ├── cache.yaml │ └── framework.yaml ├── routes.yaml ├── routes │ └── framework.yaml ├── preload.php ├── bundles.php └── services.yaml ├── phpstan.neon.dist ├── public └── index.php ├── tests └── console-application.php ├── python └── source │ ├── requirements.txt │ ├── summarizer.py │ └── data │ └── english.py ├── .gitignore ├── bin └── console ├── Dockerfile ├── yarn.lock ├── .github └── workflows │ ├── docker.yaml │ └── tests.yaml ├── LICENSE ├── README.md ├── lambda └── bootstrap.php ├── .env ├── base.Dockerfile ├── composer.json ├── symfony.lock ├── serverless.yaml └── .php-cs-fixer.dist.php /src/Controller/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | /serverless.yaml 2 | /Dockerfile 3 | /.idea 4 | /.serverless 5 | /.github 6 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "serverless-plugin-log-retention": "^2.0.0" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /config/packages/prod/rikudou_memoize.yaml: -------------------------------------------------------------------------------- 1 | rikudou_memoize: 2 | cache_service: rikudou.dynamo_cache.adapter 3 | -------------------------------------------------------------------------------- /config/routes.yaml: -------------------------------------------------------------------------------- 1 | controllers: 2 | resource: 3 | path: ../src/Controller/ 4 | namespace: App\Controller 5 | type: attribute 6 | -------------------------------------------------------------------------------- /config/routes/framework.yaml: -------------------------------------------------------------------------------- 1 | when@dev: 2 | _errors: 3 | resource: '@FrameworkBundle/Resources/config/routing/errors.xml' 4 | prefix: /_error 5 | -------------------------------------------------------------------------------- /src/Enum/BotMode.php: -------------------------------------------------------------------------------- 1 | ['all' => true], 5 | Rikudou\MemoizeBundle\RikudouMemoizeBundle::class => ['all' => true], 6 | Rikudou\DynamoDbCacheBundle\RikudouDynamoDbCacheBundle::class => ['all' => true], 7 | ]; 8 | -------------------------------------------------------------------------------- /src/SummaryProvider/SummaryProvider.php: -------------------------------------------------------------------------------- 1 | 9 | */ 10 | public function getSummary(string $text, int $sentences): array; 11 | 12 | public function getId(): string; 13 | } 14 | -------------------------------------------------------------------------------- /tests/console-application.php: -------------------------------------------------------------------------------- 1 | bootEnv(__DIR__ . '/../.env'); 10 | 11 | $kernel = new Kernel($_SERVER['APP_ENV'], (bool) $_SERVER['APP_DEBUG']); 12 | return new Application($kernel); 13 | -------------------------------------------------------------------------------- /src/SiteHandler/AxiosSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /config/packages/routing.yaml: -------------------------------------------------------------------------------- 1 | framework: 2 | router: 3 | utf8: true 4 | 5 | # Configure how to generate URLs in non-HTTP contexts, such as CLI commands. 6 | # See https://symfony.com/doc/current/routing.html#generating-urls-in-commands 7 | #default_uri: http://localhost 8 | 9 | when@prod: 10 | framework: 11 | router: 12 | strict_requirements: null 13 | -------------------------------------------------------------------------------- /src/SiteHandler/DallasNewsSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/SiteHandler/TheGlobeAndMailSiteHandler.php: -------------------------------------------------------------------------------- 1 | symfony/framework-bundle ### 9 | /.env.local 10 | /.env.local.php 11 | /.env.*.local 12 | /config/secrets/prod/prod.decrypt.private.php 13 | /public/bundles/ 14 | /var/ 15 | /vendor/ 16 | ###< symfony/framework-bundle ### 17 | 18 | ###> friendsofphp/php-cs-fixer ### 19 | /.php-cs-fixer.php 20 | /.php-cs-fixer.cache 21 | ###< friendsofphp/php-cs-fixer ### 22 | -------------------------------------------------------------------------------- /src/SiteHandler/SiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 15 | } 16 | 17 | protected function skipIfMatches(): ?string 18 | { 19 | return '@^\s*Read more: @'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/VoxSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/PhoronixSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/BoingBoingSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/GizmodoSiteHandler.php: -------------------------------------------------------------------------------- 1 | div > p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/JPostSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/NewsweekSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/TheRegisterSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/ApNewsSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/ArsTechnicaSiteHandler.php: -------------------------------------------------------------------------------- 1 | p:not(.footnote)'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/KsatSiteHandler.php: -------------------------------------------------------------------------------- 1 | p, .articleBody li'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/MichaelWestSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/NbcNewsSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/NewslaundrySiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/TechCrunchSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /config/packages/rikudou_memoize.yaml: -------------------------------------------------------------------------------- 1 | # Default configuration for extension with alias: "rikudou_memoize" 2 | rikudou_memoize: 3 | 4 | # Whether memoization is enabled or not. 5 | enabled: true 6 | 7 | # The default memoization period if none is specified in attribute. -1 means until end of request. 8 | default_memoize_seconds: 86400 9 | 10 | # The default cache service to use. If default_memoize_seconds is set to -1 this setting is ignored and internal service is used. 11 | cache_service: cache.app 12 | 13 | -------------------------------------------------------------------------------- /src/SiteHandler/BlockClubChicagoSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/CtvNewsSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/IndependentCoUkSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/TexasMonthlySiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/DailymailSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/TvLineSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/WindowsCentralSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SummaryTextWrapper/SummaryTextWrapperProvider.php: -------------------------------------------------------------------------------- 1 | $summary 15 | */ 16 | public function getSummaryText(array $summary, string $originalText): string; 17 | } 18 | -------------------------------------------------------------------------------- /src/SiteHandler/BusinessInsiderSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/TheGuardianSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/TheVergeSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/StarTelegramSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 15 | } 16 | 17 | protected function getUserAgent(): string 18 | { 19 | return 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/EuronewsSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/TomsHardwareSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/SiteHandler/Abc7SiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | 22 | protected function skipIfMatches(): ?string 23 | { 24 | return '@^MORE: @'; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/SiteHandler/AbcAustraliaSiteHandler.php: -------------------------------------------------------------------------------- 1 | div > p, div[class*="Article_body"] > div > ul:not([class*="ShareUtility"]) > li'; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/SiteHandler/BellingcatSiteHandler.php: -------------------------------------------------------------------------------- 1 | p, p.strapline'; 20 | } 21 | 22 | protected function skipIfMatches(): ?string 23 | { 24 | return '@^Related: @'; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/SiteHandler/ElPasoTimesSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | 22 | protected function skipIfMatches(): ?string 23 | { 24 | return '@^More: @'; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/SiteHandler/CbcSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | 22 | protected function getUserAgent(): string 23 | { 24 | return 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0'; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /config/packages/nyholm_psr7.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | # Register nyholm/psr7 services for autowiring with PSR-17 (HTTP factories) 3 | Psr\Http\Message\RequestFactoryInterface: '@nyholm.psr7.psr17_factory' 4 | Psr\Http\Message\ResponseFactoryInterface: '@nyholm.psr7.psr17_factory' 5 | Psr\Http\Message\ServerRequestFactoryInterface: '@nyholm.psr7.psr17_factory' 6 | Psr\Http\Message\StreamFactoryInterface: '@nyholm.psr7.psr17_factory' 7 | Psr\Http\Message\UploadedFileFactoryInterface: '@nyholm.psr7.psr17_factory' 8 | Psr\Http\Message\UriFactoryInterface: '@nyholm.psr7.psr17_factory' 9 | 10 | nyholm.psr7.psr17_factory: 11 | class: Nyholm\Psr7\Factory\Psr17Factory 12 | -------------------------------------------------------------------------------- /src/SiteHandler/NprSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 20 | } 21 | 22 | protected function getUserAgent(): string 23 | { 24 | return 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0'; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/SiteHandler/TheBigNewsletterSiteHandler.php: -------------------------------------------------------------------------------- 1 | p, .body.markup ul li > p'; 20 | } 21 | 22 | protected function skipIfMatches(): ?string 23 | { 24 | return '@^Welcome to BIG@'; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/SiteHandler/AlSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 21 | } 22 | 23 | protected function breakIf(): ?callable 24 | { 25 | return static fn (DOMNode $node) => str_starts_with($node->nodeValue ?? '', 'Related:'); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/SiteHandler/EnglishNvUa.php: -------------------------------------------------------------------------------- 1 | p, .subtitle > p'; 21 | } 22 | 23 | protected function breakIf(): ?callable 24 | { 25 | return static fn (DOMNode $node) => isset($node->attributes['style']); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/SiteHandler/BbcUkBlogSiteHandler.php: -------------------------------------------------------------------------------- 1 | p, .content-block__text > ul > li'; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/SiteHandler/FuturismSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 22 | } 23 | 24 | protected function skipIf(): ?callable 25 | { 26 | return static fn (DOMNode $node): bool => $node instanceof DOMElement 27 | && $node->tagName === 'p' 28 | && isset($node->attributes['class']); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/SiteHandler/AustralianGeographicSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 21 | } 22 | 23 | protected function skipIf(): ?callable 24 | { 25 | return static function (DOMNode $node) { 26 | return isset($node->attributes['class']) && $node->attributes['class']->value === 'breadcrumbs'; 27 | }; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /config/packages/framework.yaml: -------------------------------------------------------------------------------- 1 | # see https://symfony.com/doc/current/reference/configuration/framework.html 2 | framework: 3 | secret: '%env(APP_SECRET)%' 4 | #csrf_protection: true 5 | http_method_override: false 6 | handle_all_throwables: true 7 | 8 | # Enables session support. Note that the session will ONLY be started if you read or write from it. 9 | # Remove or comment this section to explicitly disable session support. 10 | session: 11 | handler_id: null 12 | cookie_secure: auto 13 | cookie_samesite: lax 14 | storage_factory_id: session.storage.factory.native 15 | 16 | #esi: true 17 | #fragments: true 18 | php_errors: 19 | log: true 20 | 21 | when@test: 22 | framework: 23 | test: true 24 | session: 25 | storage_factory_id: session.storage.factory.mock_file 26 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/rikudousage/lemmy-auto-tldr-bot:base 2 | 3 | COPY . /var/task 4 | WORKDIR /var/task 5 | 6 | # Handle dependencies, build stuff 7 | RUN COMPOSER_ALLOW_SUPERUSER=1 composer install --no-dev --no-scripts && \ 8 | APP_ENV=prod php bin/console cache:warmup && \ 9 | cd python/source && \ 10 | python3.9 -m venv venv && \ 11 | source venv/bin/activate && \ 12 | pip install -r requirements.txt && \ 13 | pyinstaller -F summarizer.py && \ 14 | mv dist/summarizer ../ && \ 15 | rm -rf dist build && \ 16 | cd .. && \ 17 | python -m nltk.downloader punkt -d . && \ 18 | COMPOSER_ALLOW_SUPERUSER=1 composer global clear-cache 19 | 20 | # Lambda 21 | RUN cp lambda/bootstrap.php /var/runtime/bootstrap && \ 22 | cd /opt && \ 23 | composer require guzzlehttp/guzzle && \ 24 | chmod -R +rx /var/task 25 | 26 | CMD [ "bin/console" ] 27 | -------------------------------------------------------------------------------- /src/SiteHandler/AxiomSpaceSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/SiteHandler/BalkanInsightSiteHandler.php: -------------------------------------------------------------------------------- 1 | p, .btArticleBody .btText > p'; 21 | } 22 | 23 | protected function skipIf(): ?callable 24 | { 25 | return static function (DOMNode $node): bool { 26 | $children = array_map(static fn (DOMNode $childNode) => $childNode->nodeName, [...$node->childNodes]); 27 | 28 | return in_array('img', $children, true); 29 | }; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/Listener/ExceptionLogger.php: -------------------------------------------------------------------------------- 1 | 'onException']; 15 | } 16 | 17 | public function onException(ExceptionEvent $event): void 18 | { 19 | $exception = $event->getThrowable(); 20 | if (!$exception instanceof ContentFetchingFailedException) { 21 | return; 22 | } 23 | 24 | error_log(json_encode([ 25 | 'error' => $exception->getMessage(), 26 | ], flags: JSON_THROW_ON_ERROR)); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/SiteHandler/TechnologyReviewSiteHandler.php: -------------------------------------------------------------------------------- 1 | attributes['class'])) { 27 | return false; 28 | } 29 | 30 | return $node->attributes['class']->value === 'imageSet__caption'; 31 | }; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/SiteHandler/FortuneSiteHandler.php: -------------------------------------------------------------------------------- 1 | nodeValue) { 27 | return false; 28 | } 29 | 30 | return str_starts_with($node->nodeValue, '(The writer is a') || str_starts_with($node->nodeValue, '(At The Quint'); 31 | }; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/SiteHandler/DwSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/SiteHandler/VancouverSunSiteHandler.php: -------------------------------------------------------------------------------- 1 | p'; 22 | } 23 | 24 | protected function breakIf(): ?callable 25 | { 26 | return static fn (DOMNode $node): bool => $node instanceof DOMElement 27 | && $node->tagName === 'p' 28 | && isset($node->attributes['data-async']) 29 | && filter_var(trim($node->nodeValue ?: ''), FILTER_VALIDATE_EMAIL); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /python/source/summarizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division, print_function, unicode_literals 4 | 5 | import sys 6 | 7 | from sumy.parsers.plaintext import PlaintextParser 8 | from sumy.nlp.tokenizers import Tokenizer 9 | from sumy.summarizers.lsa import LsaSummarizer as Summarizer 10 | from sumy.nlp.stemmers import Stemmer 11 | from sumy.utils import get_stop_words, parse_stop_words 12 | 13 | from data.english import englishStopWords 14 | 15 | LANGUAGE = "english" 16 | 17 | if __name__ == '__main__': 18 | text = sys.argv[1] 19 | maxSentences = int(sys.argv[2]) 20 | 21 | parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE)) 22 | stemmer = Stemmer(LANGUAGE) 23 | summarizer = Summarizer(stemmer) 24 | summarizer.stop_words = parse_stop_words(englishStopWords) 25 | 26 | for sentence in summarizer(parser.document, maxSentences): 27 | print(sentence, "\n") 28 | -------------------------------------------------------------------------------- /src/Service/SiteHandlerCollection.php: -------------------------------------------------------------------------------- 1 | $handlers 13 | */ 14 | public function __construct( 15 | #[TaggedIterator('app.site_handler')] 16 | private iterable $handlers, 17 | ) { 18 | } 19 | 20 | /** 21 | * @throws ContentFetchingFailedException 22 | */ 23 | public function getContent(string $url): string 24 | { 25 | foreach ($this->handlers as $handler) { 26 | if ($handler->supports($url)) { 27 | return $handler->getContent($url); 28 | } 29 | } 30 | 31 | throw new ContentFetchingFailedException("No handler found for site: {$url}"); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/SiteHandler/BbcUkNewsSiteHandler.php: -------------------------------------------------------------------------------- 1 | isLambda()) { 15 | return '/tmp/cache/' . $this->environment; 16 | } 17 | 18 | return parent::getCacheDir(); 19 | } 20 | 21 | public function getLogDir(): string 22 | { 23 | if ($this->isLambda()) { 24 | return '/tmp/logs/' . $this->environment; 25 | } 26 | 27 | return parent::getLogDir(); 28 | } 29 | 30 | public function getBuildDir(): string 31 | { 32 | return $this->getProjectDir() . '/var/cache/' . $this->environment; 33 | } 34 | 35 | private function isLambda(): bool 36 | { 37 | return getenv('LAMBDA_TASK_ROOT') !== false; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/Service/SummaryTextWrapper.php: -------------------------------------------------------------------------------- 1 | $providers 13 | */ 14 | public function __construct( 15 | #[TaggedIterator('app.summary_wrapper')] 16 | private iterable $providers, 17 | ) { 18 | } 19 | 20 | /** 21 | * @param array $summary 22 | */ 23 | public function getResponseText(Community $community, array $summary, string $originalText): ?string 24 | { 25 | foreach ($this->providers as $provider) { 26 | if ($provider->supports($community)) { 27 | return $provider->getSummaryText($summary, $originalText); 28 | } 29 | } 30 | 31 | return null; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/SiteHandler/PvMagazineSiteHandler.php: -------------------------------------------------------------------------------- 1 | parentNode) { 27 | return false; 28 | } 29 | if (!isset($node->parentNode->attributes['class'])) { 30 | return false; 31 | } 32 | 33 | return $node->parentNode->attributes['class']->value === 'disclaimer'; 34 | }; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /yarn.lock: -------------------------------------------------------------------------------- 1 | # THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. 2 | # yarn lockfile v1 3 | 4 | 5 | nco@1.0.1: 6 | version "1.0.1" 7 | resolved "https://registry.yarnpkg.com/nco/-/nco-1.0.1.tgz#868de423b4f45f8c05555f1ccb28aa788be06c52" 8 | integrity sha512-tjlHSkSvVzRGeBXIOimMTA2J3ZUP9LLUvqv+JeBLGAseZP1syF/LgetKNJ8zMiXosuKJHx7/KSIv8UDyKXoeKQ== 9 | 10 | semver@5.4.1: 11 | version "5.4.1" 12 | resolved "https://registry.yarnpkg.com/semver/-/semver-5.4.1.tgz#e059c09d8571f0540823733433505d3a2f00b18e" 13 | integrity sha512-WfG/X9+oATh81XtllIo/I8gOiY9EXRdv1cQdyykeXK17YcUW3EXUAi2To4pcH6nZtJPr7ZOpM5OMyWJZm+8Rsg== 14 | 15 | serverless-plugin-log-retention@^2.0.0: 16 | version "2.0.0" 17 | resolved "https://registry.yarnpkg.com/serverless-plugin-log-retention/-/serverless-plugin-log-retention-2.0.0.tgz#da0b5b6d01a5b19c6fd6d3e0e36642f1fc5739ad" 18 | integrity sha512-TXKMfLdVxhamyaDbqr+gwjIeiqSnDw3z+bZQy0W2ctRpcedUY8hg58wwMZqAFyMS3QekHT9srCe4Qv7lfPwkGw== 19 | dependencies: 20 | nco "1.0.1" 21 | semver "5.4.1" 22 | -------------------------------------------------------------------------------- /.github/workflows/docker.yaml: -------------------------------------------------------------------------------- 1 | name: Publish docker 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - master 8 | paths: 9 | - base.Dockerfile 10 | 11 | jobs: 12 | publish: 13 | name: Publish docker 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v3 18 | - name: Build docker 19 | run: docker build -f base.Dockerfile -t ghcr.io/rikudousage/lemmy-auto-tldr-bot:base --label "org.opencontainers.image.source=https://github.com/RikudouSage/LemmyAutoTldrBot" --label "org.opencontainers.image.licenses=MIT" . 20 | - name: Login to GHCR 21 | uses: docker/login-action@v2 22 | with: 23 | registry: ghcr.io 24 | username: ${{ github.actor }} 25 | password: ${{ secrets.GITHUB_TOKEN }} 26 | - name: Push to registry 27 | env: 28 | VERSION: ${{ steps.extract_version.outputs.result }} 29 | run: docker push ghcr.io/rikudousage/lemmy-auto-tldr-bot:base 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 Dominik Chrástecký 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /src/SummaryProvider/SumySummaryProvider.php: -------------------------------------------------------------------------------- 1 | '\"', 17 | '$' => '\$', 18 | ]); 19 | $script = __DIR__ . '/../../python/summarizer'; 20 | $currentDir = getcwd() ?: throw new RuntimeException('Getting current directory failed'); 21 | chdir(dirname($script)); 22 | exec("{$script} \"{$text}\" {$sentences} 2>&1", $output, $exitCode); 23 | chdir($currentDir); 24 | if ($exitCode !== 0) { 25 | throw new RuntimeException('Could not get summary using the python script.'); 26 | } 27 | 28 | return array_filter($output); 29 | } 30 | 31 | public function getId(): string 32 | { 33 | return 'sumy'; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/SiteHandler/WashingtonPostSiteHandler.php: -------------------------------------------------------------------------------- 1 | getArticleCrawler($url); 25 | $scriptContent = $crawler->filter($this->getSelector())->text(); 26 | $json = json_decode($scriptContent, true); 27 | assert(is_array($json)); 28 | $parts = array_filter($json['props']['pageProps']['globalContent']['content_elements'], static fn (array $element) => $element['type'] === 'text'); 29 | $parts = array_map(static fn (array $element) => html_entity_decode(strip_tags($element['content'])), $parts); 30 | 31 | return implode("\n\n", $parts); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lemmy AutoTL;DR bot 2 | 3 | This bot reads content from a supported site and reports back the summary. 4 | 5 | ## Interesting parts 6 | 7 | - [summarizer.py](python/source/summarizer.py) - the script that does the summarization itself 8 | - [site handlers](src/SiteHandler) - directory with classes that extract the text from the site 9 | - [handler command](src/Command/ReplyToPostsCommand.php) - the class that handles the bot loop itself, meaning it ties 10 | all the parts together and is the entry point 11 | - all the stuff for building and deploying the project: 12 | - [Dockerfile](Dockerfile) - creates the docker image that runs the bot 13 | - You can also use it as an always-up-to-date reference on how to make the bot work on your local computer 14 | - [serverless.yaml](serverless.yaml) - contains configuration for deploying the docker image to AWS Lambda 15 | - [publish.yaml](.github/workflows/publish.yaml) - publishes the source code to AWS Lambda on every push 16 | 17 | ## Libraries used 18 | 19 | *Only the important ones are listed* 20 | 21 | - [sumy](https://pypi.org/project/sumy/) 22 | - [rikudou/lemmy-api](https://packagist.org/packages/rikudou/lemmy-api) 23 | - [Symfony](https://symfony.com/) 24 | -------------------------------------------------------------------------------- /lambda/bootstrap.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/php 2 | get('http://' . $_ENV['AWS_LAMBDA_RUNTIME_API'] . '/2018-06-01/runtime/invocation/next'); 26 | 27 | return [ 28 | 'invocationId' => $response->getHeader('Lambda-Runtime-Aws-Request-Id')[0], 29 | 'payload' => json_decode((string) $response->getBody(), true) 30 | ]; 31 | } 32 | 33 | function sendResponse($invocationId, $response): void 34 | { 35 | $client = new Client(); 36 | $client->post( 37 | 'http://' . $_ENV['AWS_LAMBDA_RUNTIME_API'] . '/2018-06-01/runtime/invocation/' . $invocationId . '/response', 38 | ['body' => $response] 39 | ); 40 | } 41 | -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | workflow_dispatch: 8 | 9 | jobs: 10 | code_style: 11 | name: Test code style 12 | runs-on: ubuntu-latest 13 | env: 14 | PHP_CS_FIXER_IGNORE_ENV: 1 15 | strategy: 16 | matrix: 17 | version: ['8.2'] 18 | steps: 19 | - name: Setup PHP 20 | uses: shivammathur/setup-php@v2 21 | with: 22 | php-version: ${{ matrix.version }} 23 | - name: Checkout Code 24 | uses: actions/checkout@v2 25 | - name: Install Dependencies 26 | run: composer install 27 | - name: Test code style 28 | run: composer fixer -- --dry-run 29 | # static_analysis: 30 | # name: Static analysis 31 | # runs-on: ubuntu-latest 32 | # strategy: 33 | # matrix: 34 | # version: ['8.2'] 35 | # steps: 36 | # - name: Setup PHP 37 | # uses: shivammathur/setup-php@v2 38 | # with: 39 | # php-version: ${{ matrix.version }} 40 | # - name: Checkout Code 41 | # uses: actions/checkout@v2 42 | # - name: Install Dependencies 43 | # run: composer install 44 | # - name: Run static analysis 45 | # run: composer phpstan 46 | 47 | -------------------------------------------------------------------------------- /src/SiteHandler/DallasObserverSiteHandler.php: -------------------------------------------------------------------------------- 1 | browser->request(Request::METHOD_GET, $url, [ 23 | 'HTTP_USER_AGENT' => $this->getUserAgent(), 24 | ]); 25 | $content = $crawler->filter($this->getSelector())->getNode(0); 26 | $result = ''; 27 | assert($content instanceof DOMElement); 28 | foreach ($content->childNodes as $childNode) { 29 | if ($childNode instanceof DOMElement && $childNode->tagName === 'div') { 30 | continue; 31 | } 32 | if ($childNode instanceof DOMElement && $childNode->tagName === 'br') { 33 | $result .= "\n"; 34 | } 35 | $result .= $childNode->textContent; 36 | } 37 | 38 | return trim($result); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/Service/RatioFormatter.php: -------------------------------------------------------------------------------- 1 | format($originalTextLength) ?: throw new RuntimeException('Failed formatting'), 26 | formattedSummaryLength: $numberFormatter->format($summaryLength) ?: throw new RuntimeException('Failed formatting'), 27 | formattedRatioSaved: $percentFormatter->format($ratioSaved) ?: throw new RuntimeException('Failed formatting'), 28 | ); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/SiteHandler/TexasTribuneSiteHandler.php: -------------------------------------------------------------------------------- 1 | p, .c-story-body > hr'; 22 | } 23 | 24 | protected function skipIf(): ?callable 25 | { 26 | return static function (DOMNode $node, array &$context): bool { 27 | if ($node instanceof DOMElement && $node->tagName === 'hr' && !isset($context['afterFirstHr'])) { 28 | $context['afterFirstHr'] = true; 29 | 30 | return true; 31 | } 32 | if (!isset($context['afterFirstHr']) || isset($context['afterSecondHr'])) { 33 | return true; 34 | } 35 | if ($node instanceof DOMElement && $node->tagName === 'hr') { 36 | $context['afterSecondHr'] = true; 37 | 38 | return true; 39 | } 40 | 41 | return false; 42 | }; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/SummaryTextWrapper/DefaultSummaryTextWrapperProvider.php: -------------------------------------------------------------------------------- 1 | ratioFormatter->getRatio($originalText, implode("\n", $summary)); 26 | 27 | return 'This is the best summary I could come up with:' 28 | . "\n\n---\n\n" 29 | . implode("\n\n", $summary) 30 | . "\n\n---\n\n" 31 | . "The original article contains {$stats->formattedOriginalLength} words, the summary contains {$stats->formattedSummaryLength} words. " 32 | . "Saved {$stats->formattedRatioSaved}. " 33 | . "I'm a bot and I'm [open source]({$this->sourceCodeLink})!"; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | # In all environments, the following files are loaded if they exist, 2 | # the latter taking precedence over the former: 3 | # 4 | # * .env contains default values for the environment variables needed by the app 5 | # * .env.local uncommitted file with local overrides 6 | # * .env.$APP_ENV committed environment-specific defaults 7 | # * .env.$APP_ENV.local uncommitted environment-specific overrides 8 | # 9 | # Real environment variables win over .env files. 10 | # 11 | # DO NOT DEFINE PRODUCTION SECRETS IN THIS FILE NOR IN ANY OTHER COMMITTED FILES. 12 | # https://symfony.com/doc/current/configuration/secrets.html 13 | # 14 | # Run "composer dump-env prod" to compile .env files for production use (requires symfony/flex >=1.2). 15 | # https://symfony.com/doc/current/best_practices.html#use-environment-variables-for-infrastructure-configuration 16 | 17 | ###> symfony/framework-bundle ### 18 | APP_ENV=dev 19 | APP_SECRET=92efd2f2f2d7e63841ccabf3d15bcf7c 20 | ###< symfony/framework-bundle ### 21 | 22 | LEMMY_USER= 23 | LEMMY_PASSWORD= 24 | LEMMY_MAINTAINER=rikudou@lemmings.world 25 | SOURCE_CODE_LINK=https://github.com/RikudouSage/LemmyAutoTldrBot 26 | DYNAMODB_WHITELIST_BLACKLIST_TABLE= 27 | DYNAMODB_WHITELIST_BLACKLIST_TABLE_TYPE_INDEX= 28 | BOT_MODE=all 29 | SUPPORT_COMMUNITY= 30 | 31 | ###> rikudou/psr6-dynamo-db-bundle ### 32 | DYNAMODB_CACHE_TABLE=cache 33 | #AWS_REGION=us-east-1 34 | ###< rikudou/psr6-dynamo-db-bundle ### 35 | -------------------------------------------------------------------------------- /src/SiteHandler/TimeSiteHandler.php: -------------------------------------------------------------------------------- 1 | browser->request(Request::METHOD_GET, $url); 32 | $parts = $crawler->filter('#article-body p'); 33 | $content = ''; 34 | foreach ($parts as $part) { 35 | $partContent = $part->nodeValue; 36 | if ($partContent === null) { 37 | continue; 38 | } 39 | if (str_starts_with($partContent, 'Read More:')) { 40 | continue; 41 | } 42 | if (count($part->attributes ?? [])) { 43 | break; 44 | } 45 | $content .= $partContent . "\n\n"; 46 | } 47 | 48 | return trim($content); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /base.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lambda/provided:al2 2 | 3 | ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib 4 | 5 | ARG PHP_VERSION=8.2.16 6 | ARG PYTHON_VERSION=3.9.18 7 | SHELL ["/bin/bash", "-c"] 8 | 9 | RUN yum clean all && \ 10 | yum install -y autoconf bison bzip2-devel gcc gcc-c++ git gzip libcurl-devel libffi-devel libicu-devel libxml2-devel \ 11 | make oniguruma-devel openssl-devel re2c sqlite-devel tar unzip zip && \ 12 | yum clean all && \ 13 | rm -rf /var/cache/yum 14 | 15 | # PHP & Composer 16 | RUN curl -sL https://github.com/php/php-src/archive/php-${PHP_VERSION}.tar.gz | tar -xz && \ 17 | cd php-src-php-${PHP_VERSION} && \ 18 | ./buildconf --force && \ 19 | ./configure --prefix=/usr --with-openssl --with-curl --with-zlib --without-pear --enable-bcmath --with-bz2 --enable-mbstring --enable-intl && \ 20 | make -j $(nproc) && \ 21 | make install && \ 22 | curl -sS https://getcomposer.org/installer | php -- --install-dir=/usr/bin --filename=composer && \ 23 | cd .. && rm -rf php-src-php-${PHP_VERSION} 24 | 25 | # Python 26 | RUN curl -sL https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz | tar -xz && \ 27 | cd Python-${PYTHON_VERSION} && \ 28 | ./configure --prefix=/usr --enable-optimizations --with-system-ffi --with-computed-gotos --enable-loadable-sqlite-extensions --enable-shared --disable-test-modules && \ 29 | make -j $(nproc) && \ 30 | make altinstall && \ 31 | cd .. && rm -rf Python-${PYTHON_VERSION} 32 | -------------------------------------------------------------------------------- /src/SiteHandler/TheBulwarkSiteHandler.php: -------------------------------------------------------------------------------- 1 | getHostnames() as $hostname) { 24 | if (str_ends_with($host, $hostname)) { 25 | return true; 26 | } 27 | } 28 | 29 | return false; 30 | } 31 | 32 | protected function getSelector(): string 33 | { 34 | return '.body p'; 35 | } 36 | 37 | protected function breakIf(): ?callable 38 | { 39 | return static function (DOMNode $node): bool { 40 | if (!$node instanceof DOMElement) { 41 | return false; 42 | } 43 | 44 | if ($node->tagName !== 'p') { 45 | return false; 46 | } 47 | 48 | if (!isset($node->attributes['class'])) { 49 | return false; 50 | } 51 | 52 | $class = $node->attributes['class']; 53 | assert($class instanceof DOMAttr); 54 | 55 | return str_contains($class->value, 'button-wrapper'); 56 | }; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/SiteHandler/NikkeiAsiaSiteHandler.php: -------------------------------------------------------------------------------- 1 | httpClient->request(Request::METHOD_GET, $newUrl, [ 28 | 'headers' => [ 29 | 'User-Agent' => $this->getUserAgent(), 30 | ], 31 | ]); 32 | if ($response->getStatusCode() !== Response::HTTP_OK) { 33 | throw new RuntimeException('Failed getting article content'); 34 | } 35 | $json = json_decode($response->getContent(), true); 36 | assert(is_array($json)); 37 | 38 | return new Crawler($json['body']); 39 | } 40 | 41 | protected function getSelector(): string 42 | { 43 | return 'p'; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/Service/PostService.php: -------------------------------------------------------------------------------- 1 | 20 | */ 21 | public function getPosts(int $untilId, int $limit = 300): iterable 22 | { 23 | $community = null; 24 | // $community = $this->api->community()->get('bot_playground'); 25 | 26 | $i = 0; 27 | $page = 1; 28 | while (true) { 29 | ++$i; 30 | $posts = $this->api->post()->getPosts( 31 | community: $community, 32 | page: $page, 33 | sort: SortType::New, 34 | listingType: ListingType::All, 35 | ); 36 | foreach ($posts as $post) { 37 | if ($post->post->published > new DateTimeImmutable()) { 38 | continue; 39 | } 40 | if ($post->post->featuredLocal) { 41 | continue; 42 | } 43 | if ($post->post->id > $untilId && $i < $limit) { 44 | yield $post; 45 | } else { 46 | break 2; 47 | } 48 | } 49 | ++$page; 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/SummaryTextWrapper/CondensedSummaryTextWrapperProvider.php: -------------------------------------------------------------------------------- 1 | $condensedInstances 12 | * @param array $condensedCommunities 13 | */ 14 | public function __construct( 15 | private array $condensedInstances, 16 | private array $condensedCommunities, 17 | private RatioFormatter $ratioFormatter, 18 | ) { 19 | } 20 | 21 | public function supports(Community $community): bool 22 | { 23 | $instance = parse_url($community->actorId, PHP_URL_HOST); 24 | $fullCommunityName = "{$community->name}@{$instance}"; 25 | 26 | return in_array($fullCommunityName, $this->condensedCommunities, true) 27 | || in_array($instance, $this->condensedInstances, true); 28 | } 29 | 30 | public function getSummaryText(array $summary, string $originalText): string 31 | { 32 | $stringSummary = implode("\n\n", $summary); 33 | 34 | $stats = $this->ratioFormatter->getRatio($originalText, $stringSummary); 35 | 36 | return <<formattedRatioSaved} of original text. 44 | ::: 45 | CANT_THINK_OF_UNIQUE_DELIMITER; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/Service/HardcodedPermissionChecker.php: -------------------------------------------------------------------------------- 1 | $instanceBlacklist 13 | * @param array $instanceWhitelist 14 | * @param array $communityBlacklist 15 | * @param array $communityWhitelist 16 | */ 17 | public function __construct( 18 | private array $instanceBlacklist, 19 | private array $instanceWhitelist, 20 | private array $communityBlacklist, 21 | private array $communityWhitelist, 22 | private BotMode $botMode, 23 | ) { 24 | } 25 | 26 | public function canPostToCommunity(Community $community): bool 27 | { 28 | $communityName = $this->getCommunityName($community); 29 | $instance = $this->getInstance($community); 30 | 31 | if (in_array($communityName, $this->communityWhitelist, true)) { 32 | return true; 33 | } 34 | if (in_array($communityName, $this->communityBlacklist, true)) { 35 | return false; 36 | } 37 | if (in_array($instance, $this->instanceWhitelist, true)) { 38 | return true; 39 | } 40 | if (in_array($instance, $this->instanceBlacklist, true)) { 41 | return false; 42 | } 43 | 44 | return $this->botMode === BotMode::All; 45 | } 46 | 47 | private function getInstance(Community $community): string 48 | { 49 | return parse_url($community->actorId, PHP_URL_HOST) 50 | ?: throw new RuntimeException('Failed to extract host from community.'); 51 | } 52 | 53 | private function getCommunityName(Community $community): string 54 | { 55 | return "{$community->name}@{$this->getInstance($community)}"; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/Command/ManualRunCommand.php: -------------------------------------------------------------------------------- 1 | addArgument( 29 | name: 'url', 30 | mode: InputArgument::REQUIRED, 31 | ) 32 | ->addOption( 33 | name: 'show-text', 34 | shortcut: 't', 35 | mode: InputOption::VALUE_NONE, 36 | description: 'Show the whole text instead of summary', 37 | ) 38 | ; 39 | } 40 | 41 | protected function execute(InputInterface $input, OutputInterface $output): int 42 | { 43 | $io = new SymfonyStyle($input, $output); 44 | 45 | $url = $input->getArgument('url'); 46 | $content = $this->siteHandler->getContent($url); 47 | if (!$content) { 48 | $io->error('Failed getting content'); 49 | 50 | return self::FAILURE; 51 | } 52 | 53 | if ($input->getOption('show-text')) { 54 | $io->success($content); 55 | } else { 56 | $summary = $this->summaryProvider->getSummary($content, 5); 57 | $io->success($summary); 58 | } 59 | 60 | return self::SUCCESS; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/Service/LinkResolver.php: -------------------------------------------------------------------------------- 1 | apId, PHP_URL_HOST); 24 | $commentInstance = parse_url($comment->apId, PHP_URL_HOST); 25 | 26 | if ($instance === $commentInstance) { 27 | $error = false; 28 | 29 | return $comment->apId; 30 | } 31 | 32 | $targetInstanceApi = new DefaultLemmyApi( 33 | instanceUrl: "https://{$instance}", 34 | version: LemmyApiVersion::Version3, 35 | httpClient: $this->httpClient, 36 | requestFactory: $this->requestFactory, 37 | ); 38 | 39 | try { 40 | $resolved = $targetInstanceApi->miscellaneous()->resolveObject($comment->apId); 41 | assert($resolved->comment !== null); 42 | $error = false; 43 | 44 | return "https://{$instance}/comment/{$resolved->comment->comment->id}"; 45 | } catch (LemmyApiException) { 46 | $error = true; 47 | 48 | return $comment->apId; 49 | } 50 | } 51 | 52 | public function getPostLink(Post $post, ?string $instance = null): string 53 | { 54 | $instance ??= parse_url($post->apId, PHP_URL_HOST); 55 | $postInstance = parse_url($post->apId, PHP_URL_HOST); 56 | if ($instance === $postInstance) { 57 | return $post->apId; 58 | } 59 | 60 | $targetInstanceApi = new DefaultLemmyApi( 61 | instanceUrl: "https://{$instance}", 62 | version: LemmyApiVersion::Version3, 63 | httpClient: $this->httpClient, 64 | requestFactory: $this->requestFactory, 65 | ); 66 | 67 | try { 68 | $resolved = $targetInstanceApi->miscellaneous()->resolveObject($post->apId); 69 | assert($resolved->post !== null); 70 | 71 | return "https://{$instance}/post/{$resolved->post->post->id}"; 72 | } catch (LemmyApiException) { 73 | return $post->apId; 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "project", 3 | "license": "proprietary", 4 | "minimum-stability": "stable", 5 | "prefer-stable": true, 6 | "require": { 7 | "php": "^8.2", 8 | "ext-ctype": "*", 9 | "ext-dom": "*", 10 | "ext-iconv": "*", 11 | "ext-intl": "*", 12 | "nyholm/psr7": "^1.8", 13 | "rikudou/lemmy-api": "^0.5.8", 14 | "rikudou/memoize-bundle": "^1.0", 15 | "rikudou/psr6-dynamo-db-bundle": "^4.0", 16 | "symfony/browser-kit": "6.3.*", 17 | "symfony/console": "6.3.*", 18 | "symfony/css-selector": "6.3.*", 19 | "symfony/dotenv": "6.3.*", 20 | "symfony/flex": "^2", 21 | "symfony/framework-bundle": "6.3.*", 22 | "symfony/http-client": "6.3.*", 23 | "symfony/runtime": "6.3.*", 24 | "symfony/yaml": "6.3.*" 25 | }, 26 | "require-dev": { 27 | "friendsofphp/php-cs-fixer": "^3.22", 28 | "phpstan/extension-installer": "^1.3", 29 | "phpstan/phpstan": "^1.10", 30 | "phpstan/phpstan-symfony": "^1.3" 31 | }, 32 | "config": { 33 | "allow-plugins": { 34 | "php-http/discovery": true, 35 | "symfony/flex": true, 36 | "symfony/runtime": true, 37 | "phpstan/extension-installer": true 38 | }, 39 | "sort-packages": true 40 | }, 41 | "autoload": { 42 | "psr-4": { 43 | "App\\": "src/", 44 | "App\\Memoized\\": "memoized/" 45 | } 46 | }, 47 | "autoload-dev": { 48 | "psr-4": { 49 | "App\\Tests\\": "tests/" 50 | } 51 | }, 52 | "replace": { 53 | "symfony/polyfill-ctype": "*", 54 | "symfony/polyfill-iconv": "*", 55 | "symfony/polyfill-php72": "*", 56 | "symfony/polyfill-php73": "*", 57 | "symfony/polyfill-php74": "*", 58 | "symfony/polyfill-php80": "*", 59 | "symfony/polyfill-php81": "*" 60 | }, 61 | "scripts": { 62 | "auto-scripts": { 63 | "cache:clear": "symfony-cmd", 64 | "assets:install %PUBLIC_DIR%": "symfony-cmd" 65 | }, 66 | "post-install-cmd": [ 67 | "@auto-scripts" 68 | ], 69 | "post-update-cmd": [ 70 | "@auto-scripts" 71 | ], 72 | "fixer": "php-cs-fixer fix --verbose --allow-risky=yes", 73 | "phpstan": "phpstan analyse --level=max src", 74 | "test": [ 75 | "@fixer --dry-run", 76 | "@phpstan" 77 | ] 78 | }, 79 | "conflict": { 80 | "symfony/symfony": "*" 81 | }, 82 | "extra": { 83 | "symfony": { 84 | "allow-contrib": true, 85 | "require": "6.3.*" 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/Command/ReplyToDirectMessagesCommand.php: -------------------------------------------------------------------------------- 1 | api->user()->get($this->maintainer); 28 | $maintainerName = $maintainer->name; 29 | $maintainerInstance = parse_url($maintainer->actorId, PHP_URL_HOST); 30 | 31 | $i = 1; 32 | foreach ($this->getUnreadMessages() as $privateMessage) { 33 | $content = $privateMessage->privateMessage->content; 34 | $author = $privateMessage->creator->name; 35 | $authorInstance = parse_url($privateMessage->creator->actorId, PHP_URL_HOST); 36 | 37 | $textForMaintainer = "Message to bot {$this->currentUsername} from [@{$author}@{$authorInstance}](/u/{$author}@{$authorInstance}):\n\n---\n\n{$content}"; 38 | $textForSender = "Hi there! I'm a bot and this inbox is not regularly checked. I have forwarded your message to my author, [@{$maintainerName}@{$maintainerInstance}](/u/{$maintainerName}@{$maintainerInstance})."; 39 | 40 | error_log("Replying to private message #{$i}"); 41 | 42 | $this->api->currentUser()->sendPrivateMessage(recipient: $maintainer, content: $textForMaintainer); 43 | $this->api->currentUser()->sendPrivateMessage(recipient: $privateMessage->creator, content: $textForSender); 44 | 45 | $this->api->currentUser()->markPrivateMessageAsRead($privateMessage->privateMessage); 46 | error_log("Replied to message #{$i}"); 47 | 48 | ++$i; 49 | } 50 | 51 | error_log('Handling direct messages done.'); 52 | 53 | return self::SUCCESS; 54 | } 55 | 56 | /** 57 | * @return iterable 58 | */ 59 | private function getUnreadMessages(): iterable 60 | { 61 | $page = 1; 62 | do { 63 | $messages = $this->api->currentUser()->getPrivateMessages(page: $page, unreadOnly: true); 64 | 65 | yield from $messages; 66 | ++$page; 67 | } while (count($messages)); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/SiteHandler/AbstractSiteHandler.php: -------------------------------------------------------------------------------- 1 | 24 | */ 25 | abstract protected function getHostnames(): array; 26 | 27 | abstract protected function getSelector(): string; 28 | 29 | public function supports(string $url): bool 30 | { 31 | $host = parse_url($url, PHP_URL_HOST); 32 | if (!$host) { 33 | return false; 34 | } 35 | 36 | return in_array($host, $this->getHostnames(), true); 37 | } 38 | 39 | #[Memoize] 40 | public function getContent(string $url): string 41 | { 42 | $crawler = $this->getArticleCrawler($url); 43 | $parts = $crawler->filter($this->getSelector()); 44 | $content = ''; 45 | 46 | $count = count($parts); 47 | $i = 0; 48 | $ignoreLast = $this->ignoreLast(); 49 | 50 | $regex = $this->skipIfMatches(); 51 | $breakCallable = $this->breakIf() ?? static fn () => false; 52 | $skipCallable = $this->skipIf() ?? static fn () => false; 53 | $context = []; 54 | 55 | foreach ($parts as $part) { 56 | if ($i === $count - $ignoreLast) { 57 | break; 58 | } 59 | if ($breakCallable($part, $context)) { 60 | break; 61 | } 62 | ++$i; 63 | if ($regex && $part->nodeValue && preg_match($regex, $part->nodeValue)) { 64 | continue; 65 | } 66 | if ($skipCallable($part, $context)) { 67 | continue; 68 | } 69 | $content .= $part->nodeValue . "\n\n"; 70 | } 71 | 72 | return trim($content); 73 | } 74 | 75 | protected function getArticleCrawler(string $url): Crawler 76 | { 77 | return $this->browser->request(Request::METHOD_GET, $url, server: [ 78 | 'HTTP_USER_AGENT' => $this->getUserAgent(), 79 | ]); 80 | } 81 | 82 | protected function ignoreLast(): int 83 | { 84 | return 0; 85 | } 86 | 87 | protected function getUserAgent(): string 88 | { 89 | return 'LemmyAutoTldrBot'; 90 | } 91 | 92 | protected function skipIfMatches(): ?string 93 | { 94 | return null; 95 | } 96 | 97 | /** 98 | * @return (callable(DOMNode $node, array $context): bool)|null 99 | */ 100 | protected function skipIf(): ?callable 101 | { 102 | return null; 103 | } 104 | 105 | /** 106 | * @return (callable(DOMNode $node, array $context): bool)|null 107 | */ 108 | protected function breakIf(): ?callable 109 | { 110 | return null; 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /symfony.lock: -------------------------------------------------------------------------------- 1 | { 2 | "friendsofphp/php-cs-fixer": { 3 | "version": "3.22", 4 | "recipe": { 5 | "repo": "github.com/symfony/recipes", 6 | "branch": "main", 7 | "version": "3.0", 8 | "ref": "be2103eb4a20942e28a6dd87736669b757132435" 9 | }, 10 | "files": [ 11 | ".php-cs-fixer.dist.php" 12 | ] 13 | }, 14 | "nyholm/psr7": { 15 | "version": "1.8", 16 | "recipe": { 17 | "repo": "github.com/symfony/recipes", 18 | "branch": "main", 19 | "version": "1.0", 20 | "ref": "4a8c0345442dcca1d8a2c65633dcf0285dd5a5a2" 21 | }, 22 | "files": [ 23 | "config/packages/nyholm_psr7.yaml" 24 | ] 25 | }, 26 | "phpstan/phpstan": { 27 | "version": "1.10", 28 | "recipe": { 29 | "repo": "github.com/symfony/recipes-contrib", 30 | "branch": "main", 31 | "version": "1.0", 32 | "ref": "d74d4d719d5f53856c9c13544aa22d44144b1819" 33 | }, 34 | "files": [ 35 | "phpstan.neon" 36 | ] 37 | }, 38 | "rikudou/memoize-bundle": { 39 | "version": "v1.0.8" 40 | }, 41 | "rikudou/psr6-dynamo-db-bundle": { 42 | "version": "4.0", 43 | "recipe": { 44 | "repo": "github.com/symfony/recipes-contrib", 45 | "branch": "main", 46 | "version": "2.0", 47 | "ref": "b8c90b14aff68e58377f293274db6e5a9855445a" 48 | }, 49 | "files": [ 50 | "config/packages/prod/rikudou_dynamo_db_cache.yaml" 51 | ] 52 | }, 53 | "symfony/console": { 54 | "version": "6.3", 55 | "recipe": { 56 | "repo": "github.com/symfony/recipes", 57 | "branch": "main", 58 | "version": "5.3", 59 | "ref": "da0c8be8157600ad34f10ff0c9cc91232522e047" 60 | }, 61 | "files": [ 62 | "bin/console" 63 | ] 64 | }, 65 | "symfony/flex": { 66 | "version": "2.3", 67 | "recipe": { 68 | "repo": "github.com/symfony/recipes", 69 | "branch": "main", 70 | "version": "1.0", 71 | "ref": "146251ae39e06a95be0fe3d13c807bcf3938b172" 72 | }, 73 | "files": [ 74 | ".env" 75 | ] 76 | }, 77 | "symfony/framework-bundle": { 78 | "version": "6.3", 79 | "recipe": { 80 | "repo": "github.com/symfony/recipes", 81 | "branch": "main", 82 | "version": "6.2", 83 | "ref": "af47254c5e4cd543e6af3e4508298ffebbdaddd3" 84 | }, 85 | "files": [ 86 | "config/packages/cache.yaml", 87 | "config/packages/framework.yaml", 88 | "config/preload.php", 89 | "config/routes/framework.yaml", 90 | "config/services.yaml", 91 | "public/index.php", 92 | "src/Controller/.gitignore", 93 | "src/Kernel.php" 94 | ] 95 | }, 96 | "symfony/routing": { 97 | "version": "6.3", 98 | "recipe": { 99 | "repo": "github.com/symfony/recipes", 100 | "branch": "main", 101 | "version": "6.2", 102 | "ref": "e0a11b4ccb8c9e70b574ff5ad3dfdcd41dec5aa6" 103 | }, 104 | "files": [ 105 | "config/packages/routing.yaml", 106 | "config/routes.yaml" 107 | ] 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /serverless.yaml: -------------------------------------------------------------------------------- 1 | service: LemmyAutoTldrBot 2 | 3 | provider: 4 | name: aws 5 | region: eu-central-1 6 | stage: ${opt:stage, 'prod'} 7 | iam: 8 | role: 9 | statements: 10 | - Effect: Allow 11 | Resource: 12 | - !GetAtt CacheTable.Arn 13 | - !GetAtt BlacklistWhitelistTable.Arn 14 | - !Join ['/', [!GetAtt BlacklistWhitelistTable.Arn, index, '*']] 15 | Action: 16 | - dynamodb:DescribeTable 17 | - dynamodb:Query 18 | - dynamodb:Scan 19 | - dynamodb:GetItem 20 | - dynamodb:PutItem 21 | - dynamodb:UpdateItem 22 | - dynamodb:DeleteItem 23 | stackTags: 24 | BillingProject: LemmyAutoTldr 25 | ecr: 26 | images: 27 | auto_tldr: 28 | path: ./ 29 | environment: 30 | APP_ENV: ${self:provider.stage} 31 | APP_SECRET: !Join [ '', [ '{{resolve:secretsmanager:', !Ref AppSecret, ':SecretString:secret}}' ] ] 32 | DYNAMODB_CACHE_TABLE: !Ref CacheTable 33 | DYNAMODB_WHITELIST_BLACKLIST_TABLE: !Ref BlacklistWhitelistTable 34 | DYNAMODB_WHITELIST_BLACKLIST_TABLE_TYPE_INDEX: ${self:custom.TypeIndexName} 35 | LEMMY_PASSWORD: ${env:LEMMY_PASSWORD} 36 | LEMMY_USER: ${env:LEMMY_USER} 37 | SUPPORT_COMMUNITY: autotldr@lemmings.world 38 | 39 | plugins: 40 | - serverless-plugin-log-retention 41 | 42 | custom: 43 | TypeIndexName: type-index 44 | logRetentionInDays: 90 45 | 46 | functions: 47 | cronPosts: 48 | image: 49 | name: auto_tldr 50 | memorySize: 512 51 | timeout: 300 52 | events: 53 | - schedule: 54 | rate: rate(5 minutes) 55 | input: '"app:reply-to-posts"' 56 | cronMentions: 57 | image: 58 | name: auto_tldr 59 | memorySize: 512 60 | timeout: 300 61 | events: 62 | - schedule: 63 | rate: rate(5 minutes) 64 | input: '"app:reply-to-mentions"' 65 | cronPrivateMessages: 66 | image: 67 | name: auto_tldr 68 | memorySize: 512 69 | timeout: 300 70 | events: 71 | - schedule: 72 | rate: rate(15 minutes) 73 | input: '"app:reply-to-pms"' 74 | 75 | resources: 76 | Resources: 77 | CacheTable: 78 | Type: AWS::DynamoDB::Table 79 | Properties: 80 | AttributeDefinitions: 81 | - AttributeName: id 82 | AttributeType: S 83 | BillingMode: PAY_PER_REQUEST 84 | TimeToLiveSpecification: 85 | AttributeName: ttl 86 | Enabled: true 87 | KeySchema: 88 | - AttributeName: id 89 | KeyType: HASH 90 | BlacklistWhitelistTable: 91 | Type: AWS::DynamoDB::Table 92 | Properties: 93 | AttributeDefinitions: 94 | - AttributeName: id 95 | AttributeType: S 96 | - AttributeName: type 97 | AttributeType: S 98 | BillingMode: PAY_PER_REQUEST 99 | GlobalSecondaryIndexes: 100 | - IndexName: ${self:custom.TypeIndexName} 101 | KeySchema: 102 | - AttributeName: type 103 | KeyType: HASH 104 | Projection: 105 | ProjectionType: ALL 106 | KeySchema: 107 | - AttributeName: id 108 | KeyType: HASH 109 | AppSecret: 110 | Type: AWS::SecretsManager::Secret 111 | Properties: 112 | Description: Lemmy LinkFixer bot app secret 113 | GenerateSecretString: 114 | SecretStringTemplate: '{}' 115 | GenerateStringKey: "secret" 116 | PasswordLength: 32 117 | RequireEachIncludedType: true 118 | ExcludeUppercase: true 119 | ExcludePunctuation: true 120 | ExcludeCharacters: ghijklmnopqrstuvwxyz 121 | -------------------------------------------------------------------------------- /config/services.yaml: -------------------------------------------------------------------------------- 1 | # This file is the entry point to configure your own services. 2 | # Files in the packages/ subdirectory configure your dependencies. 3 | 4 | # Put parameters here that don't need to change on each machine where the app is deployed 5 | # https://symfony.com/doc/current/best_practices.html#use-parameters-for-application-configuration 6 | parameters: 7 | app.summary_paragraphs: 6 8 | lemmy.instance: lemmings.world 9 | lemmy.support_community: '%env(SUPPORT_COMMUNITY)%' 10 | instance.blacklist: 11 | - lemmy.world 12 | community.blacklist: 13 | - forteana@feddit.uk 14 | - britishfolktraditions@feddit.uk 15 | instance.whitelist: [] 16 | community.whitelist: 17 | - technology@lemmy.world 18 | - world@lemmy.world 19 | - globalpolitics@lemmy.world 20 | - fediverse@lemmy.world 21 | - dfw@lemmy.world 22 | - politics@lemmy.world 23 | - coloradopolitics@lemmy.world 24 | - archaeology@lemmy.world 25 | - texas@lemmy.world 26 | - gaybros@lemmy.world 27 | instance.condensed: 28 | - beehaw.org 29 | - lemmy.cafe 30 | community.condensed: [] 31 | 32 | app.not_working: 33 | - axios.com # doesn't work without JS 34 | - mining.com # doesn't work without JS 35 | 36 | services: 37 | # default configuration for services in *this* file 38 | _defaults: 39 | autowire: true # Automatically injects dependencies in your services. 40 | autoconfigure: true # Automatically registers your services as commands, event subscribers, etc. 41 | 42 | # makes classes in src/ available to be used as services 43 | # this creates a service per class whose id is the fully-qualified class name 44 | App\: 45 | resource: '../src/' 46 | exclude: 47 | - '../src/DependencyInjection/' 48 | - '../src/Entity/' 49 | - '../src/Kernel.php' 50 | 51 | # add more service definitions when explicit configuration is needed 52 | # please note that last definitions always *replace* previous ones 53 | 54 | api_version: 55 | class: Rikudou\LemmyApi\Enum\LemmyApiVersion 56 | factory: Rikudou\LemmyApi\Enum\LemmyApiVersion::from 57 | arguments: 58 | - 'v3' 59 | 60 | Symfony\Component\BrowserKit\CookieJar: 61 | 62 | Symfony\Component\BrowserKit\HttpBrowser: 63 | arguments: 64 | $client: '@http_client' 65 | $cookieJar: '@Symfony\Component\BrowserKit\CookieJar' 66 | 67 | App\SummaryProvider\SummaryProvider: '@App\SummaryProvider\SumySummaryProvider' 68 | 69 | Rikudou\LemmyApi\LemmyApi: 70 | class: Rikudou\LemmyApi\DefaultLemmyApi 71 | arguments: 72 | - 'https://%lemmy.instance%' 73 | - '@api_version' 74 | - '@psr18.http_client' 75 | - '@psr18.http_client' 76 | calls: 77 | - login: [ '%env(LEMMY_USER)%', '%env(LEMMY_PASSWORD)%' ] 78 | 79 | App\Command\ReplyToPostsCommand: 80 | arguments: 81 | $instance: '%lemmy.instance%' 82 | $summaryParagraphs: '%app.summary_paragraphs%' 83 | 84 | App\Service\HardcodedPermissionChecker: 85 | arguments: 86 | $instanceBlacklist: '%instance.blacklist%' 87 | $instanceWhitelist: '%instance.whitelist%' 88 | $communityBlacklist: '%community.blacklist%' 89 | $communityWhitelist: '%community.whitelist%' 90 | 91 | App\Command\ReplyToDirectMessagesCommand: 92 | arguments: 93 | $currentUsername: '%env(LEMMY_USER)%' 94 | $maintainer: '%env(LEMMY_MAINTAINER)%' 95 | 96 | App\Command\ReplyToMentionsCommand: 97 | arguments: 98 | $maintainer: '%env(LEMMY_MAINTAINER)%' 99 | $summaryParagraphs: '%app.summary_paragraphs%' 100 | $supportCommunity: '%lemmy.support_community%' 101 | 102 | App\SummaryTextWrapper\DefaultSummaryTextWrapperProvider: 103 | arguments: 104 | $sourceCodeLink: '%env(SOURCE_CODE_LINK)%' 105 | 106 | App\SummaryTextWrapper\CondensedSummaryTextWrapperProvider: 107 | arguments: 108 | $condensedCommunities: '%community.condensed%' 109 | $condensedInstances: '%instance.condensed%' 110 | 111 | App\Enum\BotMode: 112 | factory: [App\Enum\BotMode, from] 113 | arguments: 114 | - '%env(BOT_MODE)%' 115 | -------------------------------------------------------------------------------- /.php-cs-fixer.dist.php: -------------------------------------------------------------------------------- 1 | in(__DIR__ . '/src') 5 | ; 6 | 7 | return (new PhpCsFixer\Config()) 8 | ->setRules([ 9 | '@PSR2' => true, 10 | '@PSR12' => true, 11 | 'array_syntax' => ['syntax' => 'short'], 12 | 'assign_null_coalescing_to_coalesce_equal' => true, 13 | 'backtick_to_shell_exec' => true, 14 | 'binary_operator_spaces' => true, 15 | 'blank_line_before_statement' => [ 16 | 'statements' => ['declare', 'return', 'try', 'yield', 'yield_from'], 17 | ], 18 | 'cast_spaces' => ['space' => 'single'], 19 | 'class_attributes_separation' => true, 20 | 'class_reference_name_casing' => true, 21 | 'clean_namespace' => true, 22 | 'concat_space' => [ 23 | 'spacing' => 'one', 24 | ], 25 | 'control_structure_continuation_position' => true, 26 | 'date_time_immutable' => true, 27 | 'dir_constant' => true, 28 | 'explicit_indirect_variable' => true, 29 | 'explicit_string_variable' => true, 30 | 'final_internal_class' => true, 31 | 'fully_qualified_strict_types' => true, 32 | 'function_to_constant' => true, 33 | 'function_typehint_space' => true, 34 | 'get_class_to_class_keyword' => true, 35 | 'global_namespace_import' => true, 36 | 'include' => true, 37 | 'increment_style' => true, 38 | 'integer_literal_case' => true, 39 | 'is_null' => true, 40 | 'lambda_not_used_import' => true, 41 | 'linebreak_after_opening_tag' => true, 42 | 'list_syntax' => true, 43 | 'magic_constant_casing' => true, 44 | 'magic_method_casing' => true, 45 | 'modernize_strpos' => true, 46 | 'modernize_types_casting' => true, 47 | 'multiline_comment_opening_closing' => true, 48 | 'native_function_casing' => true, 49 | 'native_function_type_declaration_casing' => true, 50 | 'new_with_braces' => true, 51 | 'no_alternative_syntax' => true, 52 | 'no_blank_lines_after_phpdoc' => true, 53 | 'no_break_comment' => false, 54 | 'no_empty_comment' => true, 55 | 'no_empty_phpdoc' => true, 56 | 'no_empty_statement' => true, 57 | 'no_extra_blank_lines' => [ 58 | 'tokens' => [ 59 | 'extra', 60 | 'break', 61 | 'continue', 62 | 'curly_brace_block', 63 | 'parenthesis_brace_block', 64 | 'return', 65 | 'square_brace_block', 66 | 'throw', 67 | 'use', 68 | 'switch', 69 | 'case', 70 | 'default', 71 | ], 72 | ], 73 | 'no_homoglyph_names' => true, 74 | 'no_leading_namespace_whitespace' => true, 75 | 'no_mixed_echo_print' => true, 76 | 'no_null_property_initialization' => true, 77 | 'no_spaces_around_offset' => true, 78 | 'no_superfluous_elseif' => true, 79 | 'no_superfluous_phpdoc_tags' => true, 80 | 'no_trailing_comma_in_singleline_array' => true, 81 | 'no_unneeded_control_parentheses' => true, 82 | 'no_unneeded_curly_braces' => true, 83 | 'no_unneeded_final_method' => true, 84 | 'no_unset_cast' => true, 85 | 'no_unused_imports' => true, 86 | 'no_useless_return' => true, 87 | 'no_useless_sprintf' => true, 88 | 'no_whitespace_before_comma_in_array' => true, 89 | 'normalize_index_brace' => true, 90 | 'nullable_type_declaration_for_default_null_value' => true, 91 | 'object_operator_without_whitespace' => true, 92 | 'octal_notation' => true, 93 | 'operator_linebreak' => true, 94 | 'php_unit_construct' => true, 95 | 'php_unit_dedicate_assert' => true, 96 | 'php_unit_dedicate_assert_internal_type' => true, 97 | 'php_unit_method_casing' => true, 98 | 'php_unit_namespaced' => true, 99 | 'php_unit_set_up_tear_down_visibility' => true, 100 | 'php_unit_test_case_static_method_calls' => ['call_type' => 'this'], 101 | 'phpdoc_align' => true, 102 | 'phpdoc_indent' => true, 103 | 'phpdoc_no_package' => true, 104 | 'phpdoc_no_useless_inheritdoc' => true, 105 | 'phpdoc_order' => true, 106 | 'phpdoc_return_self_reference' => true, 107 | 'phpdoc_scalar' => true, 108 | 'phpdoc_separation' => true, 109 | 'phpdoc_single_line_var_spacing' => true, 110 | 'phpdoc_trim' => true, 111 | 'pow_to_exponentiation' => true, 112 | 'protected_to_private' => true, 113 | 'regular_callable_call' => true, 114 | 'self_accessor' => true, 115 | 'self_static_accessor' => true, 116 | 'set_type_to_cast' => true, 117 | 'simple_to_complex_string_variable' => true, 118 | 'simplified_null_return' => true, 119 | 'single_line_comment_style' => true, 120 | 'single_quote' => true, 121 | 'standardize_not_equals' => true, 122 | 'static_lambda' => true, 123 | 'switch_case_semicolon_to_colon' => true, 124 | 'ternary_to_null_coalescing' => true, 125 | 'trailing_comma_in_multiline' => true, 126 | 'unary_operator_spaces' => true, 127 | 'void_return' => true, 128 | 'whitespace_after_comma_in_array' => true, 129 | ]) 130 | ->setFinder($finder); 131 | -------------------------------------------------------------------------------- /src/Command/ReplyToPostsCommand.php: -------------------------------------------------------------------------------- 1 | cache->getItem('lastHandled'); 42 | if ($lastHandledIdCache->isHit()) { 43 | $lastHandledId = $lastHandledIdCache->get(); 44 | } else { 45 | $lastHandledId = 0; 46 | } 47 | assert(is_int($lastHandledId)); 48 | $storedLastHandledId = $lastHandledId; 49 | 50 | error_log("Last previous handled post ID: {$storedLastHandledId}"); 51 | 52 | $posts = $this->postService->getPosts($lastHandledId); 53 | 54 | $handledThisRun = []; 55 | 56 | $i = 1; 57 | foreach ($posts as $post) { 58 | try { 59 | if (isset($handledThisRun[$post->post->id])) { 60 | continue; 61 | } 62 | error_log("Handling post #{$i} (id: {$post->post->id})"); 63 | if ($post->post->id <= $storedLastHandledId) { 64 | error_log('The post has lower ID than the from previous runs, not handling it'); 65 | break; 66 | } 67 | 68 | if ($post->post->id > $lastHandledId) { 69 | $lastHandledId = $post->post->id; 70 | } 71 | 72 | if (!$post->post->url) { 73 | error_log("Post doesn't contain a link, skipping"); 74 | continue; 75 | } 76 | 77 | if (!$this->permissionChecker->canPostToCommunity($post->community)) { 78 | error_log('Cannot post to the community, skipping'); 79 | continue; 80 | } 81 | 82 | $text = $this->siteHandler->getContent($post->post->url); 83 | if (!$text) { 84 | error_log("Failed reading text for {$post->post->url}, skipping"); 85 | continue; 86 | } 87 | $summary = $this->summaryProvider->getSummary($text, $this->summaryParagraphs); 88 | if (!$summary) { 89 | error_log("Failed generating summary for {$post->post->url}, skipping"); 90 | continue; 91 | } 92 | 93 | $response = $this->summaryTextWrapper->getResponseText($post->community, $summary, $text); 94 | if ($response === null) { 95 | error_log('Failed generating wrapped summary, skipping'); 96 | continue; 97 | } 98 | 99 | try { 100 | $comment = $this->api->comment()->create( 101 | post: $post->post, 102 | content: $response, 103 | language: Language::English 104 | ); 105 | $this->api->currentUser()->resetCommentUpvoteDownvote($comment->comment); 106 | error_log("Replying to '{$this->instance}/post/{$post->post->id}' using model '{$this->summaryProvider->getId()}'"); 107 | $handledThisRun[$post->post->id] = true; 108 | } catch (LanguageNotAllowedException) { 109 | $comment = $this->api->comment()->create( 110 | post: $post->post, 111 | content: $response, 112 | language: Language::Undetermined 113 | ); 114 | $this->api->currentUser()->resetCommentUpvoteDownvote($comment->comment); 115 | error_log("Replying to '{$this->instance}/post/{$post->post->id}' using model '{$this->summaryProvider->getId()}'"); 116 | $handledThisRun[$post->post->id] = true; 117 | } 118 | } catch (ContentFetchingFailedException|LanguageNotAllowedException) { 119 | error_log('Got an exception, skipping'); 120 | continue; 121 | } finally { 122 | error_log("Done handling post #{$i}"); 123 | $lastHandledIdCache->set($lastHandledId); 124 | $this->cache->save($lastHandledIdCache); 125 | } 126 | } 127 | 128 | return self::SUCCESS; 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /python/source/data/english.py: -------------------------------------------------------------------------------- 1 | englishStopWords = """ 2 | a 3 | a's 4 | able 5 | about 6 | above 7 | according 8 | accordingly 9 | across 10 | actually 11 | after 12 | afterwards 13 | again 14 | against 15 | ain't 16 | all 17 | allow 18 | allows 19 | almost 20 | alone 21 | along 22 | already 23 | also 24 | although 25 | always 26 | am 27 | among 28 | amongst 29 | an 30 | and 31 | another 32 | any 33 | anybody 34 | anyhow 35 | anyone 36 | anything 37 | anyway 38 | anyways 39 | anywhere 40 | apart 41 | appear 42 | appreciate 43 | appropriate 44 | are 45 | aren't 46 | around 47 | as 48 | aside 49 | ask 50 | asking 51 | associated 52 | at 53 | available 54 | away 55 | awfully 56 | b 57 | be 58 | became 59 | because 60 | become 61 | becomes 62 | becoming 63 | been 64 | before 65 | beforehand 66 | behind 67 | being 68 | believe 69 | below 70 | beside 71 | besides 72 | best 73 | better 74 | between 75 | beyond 76 | both 77 | brief 78 | but 79 | by 80 | c 81 | c'mon 82 | c's 83 | came 84 | can 85 | can't 86 | cannot 87 | cant 88 | cause 89 | causes 90 | certain 91 | certainly 92 | changes 93 | clearly 94 | co 95 | com 96 | come 97 | comes 98 | concerning 99 | consequently 100 | consider 101 | considering 102 | contain 103 | containing 104 | contains 105 | corresponding 106 | could 107 | couldn't 108 | course 109 | currently 110 | d 111 | definitely 112 | described 113 | despite 114 | did 115 | didn't 116 | different 117 | do 118 | does 119 | doesn't 120 | doing 121 | don't 122 | done 123 | down 124 | downwards 125 | during 126 | e 127 | each 128 | edu 129 | eg 130 | eight 131 | either 132 | else 133 | elsewhere 134 | enough 135 | entirely 136 | especially 137 | et 138 | etc 139 | even 140 | ever 141 | every 142 | everybody 143 | everyone 144 | everything 145 | everywhere 146 | ex 147 | exactly 148 | example 149 | except 150 | f 151 | far 152 | few 153 | fifth 154 | first 155 | five 156 | followed 157 | following 158 | follows 159 | for 160 | former 161 | formerly 162 | forth 163 | four 164 | from 165 | further 166 | furthermore 167 | g 168 | get 169 | gets 170 | getting 171 | given 172 | gives 173 | go 174 | goes 175 | going 176 | gone 177 | got 178 | gotten 179 | greetings 180 | h 181 | had 182 | hadn't 183 | happens 184 | hardly 185 | has 186 | hasn't 187 | have 188 | haven't 189 | having 190 | he 191 | he'd 192 | he'll 193 | he's 194 | hello 195 | help 196 | hence 197 | her 198 | here 199 | here's 200 | hereafter 201 | hereby 202 | herein 203 | hereupon 204 | hers 205 | herself 206 | hi 207 | him 208 | himself 209 | his 210 | hither 211 | hopefully 212 | how 213 | how's 214 | howbeit 215 | however 216 | i 217 | i'd 218 | i'll 219 | i'm 220 | i've 221 | ie 222 | if 223 | ignored 224 | immediate 225 | in 226 | inasmuch 227 | inc 228 | indeed 229 | indicate 230 | indicated 231 | indicates 232 | inner 233 | insofar 234 | instead 235 | into 236 | inward 237 | is 238 | isn't 239 | it 240 | it'd 241 | it'll 242 | it's 243 | its 244 | itself 245 | j 246 | just 247 | k 248 | keep 249 | keeps 250 | kept 251 | know 252 | known 253 | knows 254 | l 255 | last 256 | lately 257 | later 258 | latter 259 | latterly 260 | least 261 | less 262 | lest 263 | let 264 | let's 265 | like 266 | liked 267 | likely 268 | little 269 | look 270 | looking 271 | looks 272 | ltd 273 | m 274 | mainly 275 | many 276 | may 277 | maybe 278 | me 279 | mean 280 | meanwhile 281 | merely 282 | might 283 | more 284 | moreover 285 | most 286 | mostly 287 | much 288 | must 289 | mustn't 290 | my 291 | myself 292 | n 293 | name 294 | namely 295 | nd 296 | near 297 | nearly 298 | necessary 299 | need 300 | needs 301 | neither 302 | never 303 | nevertheless 304 | new 305 | next 306 | nine 307 | no 308 | nobody 309 | non 310 | none 311 | noone 312 | nor 313 | normally 314 | not 315 | nothing 316 | novel 317 | now 318 | nowhere 319 | o 320 | obviously 321 | of 322 | off 323 | often 324 | oh 325 | ok 326 | okay 327 | old 328 | on 329 | once 330 | one 331 | ones 332 | only 333 | onto 334 | or 335 | other 336 | others 337 | otherwise 338 | ought 339 | our 340 | ours 341 | ourselves 342 | out 343 | outside 344 | over 345 | overall 346 | own 347 | p 348 | particular 349 | particularly 350 | per 351 | perhaps 352 | placed 353 | please 354 | plus 355 | possible 356 | presumably 357 | probably 358 | provides 359 | q 360 | que 361 | quite 362 | qv 363 | r 364 | rather 365 | rd 366 | re 367 | really 368 | reasonably 369 | regarding 370 | regardless 371 | regards 372 | relatively 373 | respectively 374 | right 375 | s 376 | said 377 | same 378 | saw 379 | say 380 | saying 381 | says 382 | second 383 | secondly 384 | see 385 | seeing 386 | seem 387 | seemed 388 | seeming 389 | seems 390 | seen 391 | self 392 | selves 393 | sensible 394 | sent 395 | serious 396 | seriously 397 | seven 398 | several 399 | shall 400 | shan't 401 | she 402 | she'd 403 | she'll 404 | she's 405 | should 406 | shouldn't 407 | since 408 | six 409 | so 410 | some 411 | somebody 412 | somehow 413 | someone 414 | something 415 | sometime 416 | sometimes 417 | somewhat 418 | somewhere 419 | soon 420 | sorry 421 | specified 422 | specify 423 | specifying 424 | still 425 | sub 426 | such 427 | sup 428 | sure 429 | t 430 | t's 431 | take 432 | taken 433 | tell 434 | tends 435 | th 436 | than 437 | thank 438 | thanks 439 | thanx 440 | that 441 | that's 442 | thats 443 | the 444 | their 445 | theirs 446 | them 447 | themselves 448 | then 449 | thence 450 | there 451 | there's 452 | thereafter 453 | thereby 454 | therefore 455 | therein 456 | theres 457 | thereupon 458 | these 459 | they 460 | they'd 461 | they'll 462 | they're 463 | they've 464 | think 465 | third 466 | this 467 | thorough 468 | thoroughly 469 | those 470 | though 471 | three 472 | through 473 | throughout 474 | thru 475 | thus 476 | to 477 | together 478 | too 479 | took 480 | toward 481 | towards 482 | tried 483 | tries 484 | truly 485 | try 486 | trying 487 | twice 488 | two 489 | u 490 | un 491 | under 492 | unfortunately 493 | unless 494 | unlikely 495 | until 496 | unto 497 | up 498 | upon 499 | us 500 | use 501 | used 502 | useful 503 | uses 504 | using 505 | usually 506 | uucp 507 | v 508 | value 509 | various 510 | very 511 | via 512 | viz 513 | vs 514 | w 515 | want 516 | wants 517 | was 518 | wasn't 519 | way 520 | we 521 | we'd 522 | we'll 523 | we're 524 | we've 525 | welcome 526 | well 527 | went 528 | were 529 | weren't 530 | what 531 | what's 532 | whatever 533 | when 534 | when's 535 | whence 536 | whenever 537 | where 538 | where's 539 | whereafter 540 | whereas 541 | whereby 542 | wherein 543 | whereupon 544 | wherever 545 | whether 546 | which 547 | while 548 | whither 549 | who 550 | who's 551 | whoever 552 | whole 553 | whom 554 | whose 555 | why 556 | why's 557 | will 558 | willing 559 | wish 560 | with 561 | within 562 | without 563 | won't 564 | wonder 565 | would 566 | wouldn't 567 | x 568 | y 569 | yes 570 | yet 571 | you 572 | you'd 573 | you'll 574 | you're 575 | you've 576 | your 577 | yours 578 | yourself 579 | yourselves 580 | z 581 | zero 582 | """ -------------------------------------------------------------------------------- /src/Command/ReplyToMentionsCommand.php: -------------------------------------------------------------------------------- 1 | api->user()->get($this->maintainer); 47 | $maintainerName = $maintainer->name; 48 | $maintainerInstance = parse_url($maintainer->actorId, PHP_URL_HOST); 49 | 50 | $i = 1; 51 | foreach ($this->getUnreadMentions() as $unreadMention) { 52 | error_log("Handling mention #{$i}"); 53 | 54 | try { 55 | $hasPermissionToPost = $this->permissionChecker->canPostToCommunity($unreadMention->community); 56 | $mentionerInstance = parse_url($unreadMention->creator->actorId, PHP_URL_HOST); 57 | assert(is_string($mentionerInstance)); 58 | 59 | error_log('Has permission to post: ' . ($hasPermissionToPost ? 'true' : 'false')); 60 | 61 | $me = $unreadMention->recipient; 62 | $post = $unreadMention->post; 63 | if (!$post->url) { 64 | error_log('No URL'); 65 | $this->sendReply( 66 | "I'm sorry, I don't see any link in the post, I'm not sure what I should summarize.", 67 | $unreadMention, 68 | $unreadMention->comment, 69 | ); 70 | continue; 71 | } 72 | 73 | $url = $post->url; 74 | 75 | try { 76 | $topComments = [...$this->getAllTopComments($post)]; 77 | $topCommentsByMe = array_filter($topComments, static fn (CommentView $comment) => $comment->creator->id === $me->id); 78 | 79 | if (!count($topCommentsByMe)) { 80 | error_log('Creating summary'); 81 | $articleContent = $this->siteHandler->getContent($url); 82 | if (!$articleContent) { 83 | error_log('No content from article'); 84 | $this->sendReply( 85 | "I'm sorry, I couldn't create a summary for the article.", 86 | $unreadMention, 87 | $unreadMention->comment, 88 | ); 89 | continue; 90 | } 91 | $summary = $this->summaryProvider->getSummary($articleContent, $this->summaryParagraphs); 92 | 93 | if (!$hasPermissionToPost) { 94 | $this->sendReply(implode("\n\n", $summary), $unreadMention); 95 | continue; 96 | } 97 | 98 | $response = $this->summaryTextWrapper->getResponseText($unreadMention->community, $summary, $articleContent); 99 | if ($response === null) { 100 | continue; 101 | } 102 | 103 | try { 104 | $summaryComment = $this->api->comment()->create( 105 | post: $unreadMention->post, 106 | content: $response, 107 | language: Language::English, 108 | ); 109 | } catch (LanguageNotAllowedException) { 110 | $summaryComment = $this->api->comment()->create( 111 | post: $unreadMention->post, 112 | content: $response, 113 | language: Language::Undetermined, 114 | ); 115 | } 116 | $response = 'I just created the summary! '; 117 | } else { 118 | error_log('Summary already posted'); 119 | $summaryComment = $topCommentsByMe[array_key_first($topCommentsByMe)]; 120 | $response = 'I already created the summary. '; 121 | } 122 | $summaryCommentLink = $this->linkResolver->getCommentLink($summaryComment->comment, $mentionerInstance, $error); 123 | $response .= "You can find it at {$summaryCommentLink}."; 124 | if ($error) { 125 | $response .= " (I tried to create the link for your instance but I failed miserably, for which I'm very sorry)."; 126 | } 127 | 128 | $this->sendReply($response, $unreadMention, $unreadMention->comment); 129 | } catch (ContentFetchingFailedException) { 130 | error_log('Unsupported site'); 131 | 132 | $officialSupportCommunityText = ''; 133 | if ($this->supportCommunity) { 134 | $officialSupportCommunityText = " or visit the official community at [!{$this->supportCommunity}](/c/{$this->supportCommunity})"; 135 | } 136 | $response = "I'm sorry, I don't know how to handle links for that site. You may contact my maintainer, [@{$maintainerName}@{$maintainerInstance}](/u/{$maintainerName}@{$maintainerInstance}){$officialSupportCommunityText}, if you wish to add it to supported sites!"; 137 | $this->sendReply($response, $unreadMention, $unreadMention->comment); 138 | if (!$hasPermissionToPost) { 139 | $this->api->currentUser()->sendPrivateMessage( 140 | recipient: $maintainer, 141 | content: "@{$me->name} bot got called for a site it can't handle: {$this->linkResolver->getPostLink($unreadMention->post)}", 142 | ); 143 | } 144 | continue; 145 | } 146 | } finally { 147 | error_log("Handling mention #{$i} done."); 148 | ++$i; 149 | $this->api->currentUser()->markMentionAsRead($unreadMention->personMention); 150 | } 151 | } 152 | 153 | return self::SUCCESS; 154 | } 155 | 156 | /** 157 | * @return iterable 158 | */ 159 | private function getUnreadMentions(): iterable 160 | { 161 | $page = 1; 162 | do { 163 | $mentions = $this->api->currentUser()->getMentions(page: $page, unreadOnly: true); 164 | 165 | yield from $mentions; 166 | ++$page; 167 | } while (count($mentions)); 168 | } 169 | 170 | /** 171 | * @return iterable 172 | */ 173 | private function getAllTopComments(Post $post): iterable 174 | { 175 | $alreadySent = []; 176 | $page = 1; 177 | do { 178 | $comments = $this->api->comment()->getComments( 179 | maxDepth: 1, 180 | page: $page, 181 | post: $post, 182 | sortType: CommentSortType::New, 183 | listingType: ListingType::All, 184 | ); 185 | $comments = array_filter($comments, static fn (CommentView $comment) => !in_array($comment->comment->id, $alreadySent, true)); 186 | 187 | yield from $comments; 188 | $alreadySent = array_merge($alreadySent, array_map(static fn (CommentView $comment) => $comment->comment->id, $comments)); 189 | ++$page; 190 | } while (count($comments)); 191 | } 192 | 193 | private function sendReply(string $reply, PersonMentionView $mention, ?Comment $parent = null): void 194 | { 195 | $mentionerInstance = parse_url($mention->creator->actorId, PHP_URL_HOST) ?: null; 196 | $hasPermission = $this->permissionChecker->canPostToCommunity($mention->community); 197 | $text = ''; 198 | if (!$hasPermission) { 199 | $text .= "I'm replying to the mention at {$this->linkResolver->getPostLink($mention->post, $mentionerInstance)} in private, because I've been forbidden from replying in comments:\n\n---\n\n"; 200 | } 201 | $text .= $reply; 202 | if (!$hasPermission) { 203 | $text .= "\n\n---\n\nIf you believe this bot is useful, please contact the mods of your favorite community and let them know!"; 204 | } 205 | 206 | if ($hasPermission) { 207 | try { 208 | $this->api->comment()->create( 209 | post: $mention->post, 210 | content: $text, 211 | language: Language::English, 212 | parent: $parent, 213 | ); 214 | } catch (LanguageNotAllowedException) { 215 | $this->api->comment()->create( 216 | post: $mention->post, 217 | content: $text, 218 | language: Language::Undetermined, 219 | parent: $parent, 220 | ); 221 | } 222 | } else { 223 | $this->api->currentUser()->sendPrivateMessage( 224 | recipient: $mention->creator, 225 | content: $text, 226 | ); 227 | } 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /src/Command/GetStatsCommand.php: -------------------------------------------------------------------------------- 1 | addArgument( 34 | name: 'date', 35 | mode: InputArgument::OPTIONAL, 36 | description: 'The date to print stats for. Use all for all-time stats', 37 | default: (new DateTimeImmutable(timezone: new DateTimeZone('UTC')))->format('Y-m-d'), 38 | ); 39 | } 40 | 41 | protected function execute(InputInterface $input, OutputInterface $output): int 42 | { 43 | $io = new SymfonyStyle($input, $output); 44 | 45 | $dateArgument = $input->getArgument('date'); 46 | if ($dateArgument === 'all') { 47 | $startDate = new DateTimeImmutable('2020-01-01'); 48 | $endDate = new DateTimeImmutable('2100-01-01'); 49 | } else { 50 | $dateArgument = date('Y-m-d', strtotime($dateArgument) ?: throw new RuntimeException('Invalid date')); 51 | $startDate = new DateTimeImmutable("{$dateArgument}T00:00:00Z"); 52 | $endDate = new DateTimeImmutable("{$dateArgument}T23:59:59Z"); 53 | } 54 | 55 | $commentCount = 0; 56 | $upvotes = 0; 57 | $downvotes = 0; 58 | $neutralCount = 0; 59 | $positiveCount = 0; 60 | $negativeCount = 0; 61 | $goodBots = 0; 62 | $badBots = 0; 63 | 64 | $communities = []; 65 | $perInstanceUpvotes = []; 66 | $perInstanceDownvotes = []; 67 | $perInstanceComments = []; 68 | 69 | $localUser = $this->api->site()->getSite()->myUser?->localUserView ?? throw new RuntimeException('Failed to get current user'); 70 | $me = $localUser->person; 71 | 72 | $progressBar = $io->createProgressBar(); 73 | $progressBar->setMessage('Initializing...'); 74 | if ($dateArgument === 'all') { 75 | $progressBar->setMaxSteps($localUser->counts->commentCount); 76 | $progressBar->setFormat('[%bar%] [%current%/%max%] - %message% (running %elapsed% of ~%estimated%)'); 77 | } else { 78 | $progressBar->setFormat('[%bar%] [%current%] - %message% (running %elapsed%)'); 79 | } 80 | 81 | $progressBar->start(); 82 | foreach ($this->getComments($startDate, $endDate) as $comment) { 83 | $progressBar->setMessage("Processing comment from {$comment->comment->published->format('c')}"); 84 | $progressBar->advance(); 85 | 86 | ++$commentCount; 87 | $upvotes += $comment->counts->upvotes - 1; 88 | $downvotes += $comment->counts->downvotes; 89 | if ($upvotes > $downvotes) { 90 | ++$positiveCount; 91 | } elseif ($downvotes > $upvotes) { 92 | ++$negativeCount; 93 | } else { 94 | ++$neutralCount; 95 | } 96 | $goodBots += $this->getGoodBots($comment); 97 | $badBots += $this->getBadBots($comment); 98 | 99 | $instance = parse_url($comment->community->actorId, PHP_URL_HOST); 100 | 101 | $perInstanceUpvotes[$instance] ??= 0; 102 | $perInstanceDownvotes[$instance] ??= 0; 103 | $perInstanceComments[$instance] ??= 0; 104 | $perInstanceUpvotes[$instance] += $comment->counts->upvotes - 1; 105 | $perInstanceDownvotes[$instance] += $comment->counts->downvotes; 106 | $perInstanceComments[$instance] += 1; 107 | 108 | $communities[$comment->community->actorId] ??= 0; 109 | $communities[$comment->community->actorId] += 1; 110 | } 111 | natsort($communities); 112 | $communities = array_reverse($communities); 113 | 114 | if ($dateArgument !== 'all') { 115 | $io->comment("Stats for {$dateArgument} (UTC)"); 116 | } else { 117 | $io->comment('All time stats'); 118 | } 119 | $io->table([ 120 | 'Comments', 121 | 'Upvotes', 122 | 'Downvotes', 123 | 'Negative comments count', 124 | 'Positive comments count', 125 | 'Neutral comments count', 126 | 'Good bots', 127 | 'Bad bots', 128 | ], [ 129 | [$commentCount, $upvotes, $downvotes, $negativeCount, $positiveCount, $neutralCount, $goodBots, $badBots], 130 | ]); 131 | 132 | $sentMessageCount = 0; 133 | $receivedMessageCount = 0; 134 | 135 | foreach ($this->getPrivateMessages($startDate, $endDate) as $privateMessage) { 136 | if ($privateMessage->creator->id === $me->id) { 137 | ++$sentMessageCount; 138 | } else { 139 | ++$receivedMessageCount; 140 | } 141 | } 142 | $io->table(['Sent messages', 'Received messages'], [[$sentMessageCount, $receivedMessageCount]]); 143 | 144 | $mentionsResponded = 0; 145 | $mentionsUnresponded = 0; 146 | 147 | foreach ($this->getMentions($startDate, $endDate) as $mention) { 148 | $comments = $this->api->comment()->getComments(parent: $mention->comment); 149 | if (!count($comments)) { 150 | ++$mentionsUnresponded; 151 | continue; 152 | } 153 | 154 | $commentsByMe = array_filter($comments, static fn (CommentView $comment) => $comment->creator->id === $me->id); 155 | if (count($commentsByMe)) { 156 | ++$mentionsResponded; 157 | } else { 158 | ++$mentionsUnresponded; 159 | } 160 | } 161 | $io->table( 162 | ['Mentions', 'Responded', "Didn't respond"], 163 | [[$mentionsResponded + $mentionsUnresponded, $mentionsResponded, $mentionsUnresponded]], 164 | ); 165 | 166 | $io->table( 167 | ['Instance', 'Comments', 'Upvotes', 'Downvotes', 'Like ratio', 'Upvotes per comment'], 168 | array_map( 169 | static fn (string $instance, int $upvotes, int $downvotes, int $comments) => [ // @phpstan-ignore-line 170 | $instance, 171 | $comments, 172 | $upvotes, 173 | $downvotes, 174 | $upvotes !== 0 || $downvotes !== 0 ? number_format($upvotes / ($downvotes + $upvotes) * 100, 2) . '%' : 'N/A', 175 | number_format($upvotes / $comments, 2), 176 | ], 177 | array_keys($perInstanceUpvotes), 178 | $perInstanceUpvotes, 179 | $perInstanceDownvotes, 180 | $perInstanceComments, 181 | ) 182 | ); 183 | 184 | $io->table( 185 | ['Community', 'Comment count'], 186 | array_map(static fn (int $count, string $community) => [$community, $count], $communities, array_keys($communities)), 187 | ); 188 | 189 | return self::SUCCESS; 190 | } 191 | 192 | /** 193 | * @return iterable 194 | */ 195 | private function getComments(DateTimeImmutable $startDate, DateTimeImmutable $endDate): iterable 196 | { 197 | $me = $this->api->site()->getSite()->myUser?->localUserView->person ?? throw new RuntimeException('Failed to get current user'); 198 | 199 | $page = 1; 200 | do { 201 | $comments = $this->api->user()->getComments( 202 | user: $me, 203 | limit: 50, 204 | page: $page, 205 | sort: SortType::New, 206 | ); 207 | 208 | foreach ($comments as $comment) { 209 | if ($comment->comment->published > $endDate) { 210 | continue; 211 | } 212 | if ($comment->comment->published < $startDate) { 213 | break 2; 214 | } 215 | 216 | yield $comment; 217 | } 218 | 219 | ++$page; 220 | } while (count($comments)); 221 | } 222 | 223 | /** 224 | * @return iterable 225 | */ 226 | private function getPrivateMessages(DateTimeImmutable $startDate, DateTimeImmutable $endDate): iterable 227 | { 228 | $page = 1; 229 | 230 | do { 231 | $messages = $this->api->currentUser()->getPrivateMessages(page: $page); 232 | foreach ($messages as $message) { 233 | if ($message->privateMessage->published > $endDate) { 234 | continue; 235 | } 236 | if ($message->privateMessage->published < $startDate) { 237 | break 2; 238 | } 239 | 240 | yield $message; 241 | } 242 | ++$page; 243 | } while (count($messages)); 244 | } 245 | 246 | /** 247 | * @return iterable 248 | */ 249 | private function getMentions(DateTimeImmutable $startDate, DateTimeImmutable $endDate): iterable 250 | { 251 | $page = 1; 252 | 253 | do { 254 | $mentions = $this->api->currentUser()->getMentions(page: $page); 255 | foreach ($mentions as $mention) { 256 | if ($mention->personMention->published > $endDate) { 257 | continue; 258 | } 259 | if ($mention->personMention->published < $startDate) { 260 | break 2; 261 | } 262 | 263 | yield $mention; 264 | } 265 | ++$page; 266 | } while (count($mentions)); 267 | } 268 | 269 | private function getGoodBots(CommentView $parent): int 270 | { 271 | $page = 1; 272 | 273 | $handled = []; 274 | 275 | $result = 0; 276 | do { 277 | $comments = $this->api->comment()->getComments(page: 1, parent: $parent->comment, sortType: CommentSortType::New); 278 | $comments = array_filter($comments, static fn (CommentView $comment) => $comment->comment->id !== $parent->comment->id); 279 | $comments = array_filter($comments, static fn (CommentView $comment) => !in_array($comment->comment->id, $handled, true)); 280 | foreach ($comments as $comment) { 281 | assert($comment instanceof CommentView); 282 | $handled[] = $comment->comment->id; 283 | if (str_contains(mb_strtolower($comment->comment->content), 'good bot')) { 284 | ++$result; 285 | } 286 | } 287 | ++$page; 288 | } while (count($comments)); 289 | 290 | return $result; 291 | } 292 | 293 | private function getBadBots(CommentView $parent): int 294 | { 295 | $page = 1; 296 | 297 | $handled = []; 298 | 299 | $result = 0; 300 | do { 301 | $comments = $this->api->comment()->getComments(page: 1, parent: $parent->comment); 302 | $comments = array_filter($comments, static fn (CommentView $comment) => $comment->comment->id !== $parent->comment->id); 303 | $comments = array_filter($comments, static fn (CommentView $comment) => !in_array($comment->comment->id, $handled, true)); 304 | foreach ($comments as $comment) { 305 | assert($comment instanceof CommentView); 306 | $handled[] = $comment->comment->id; 307 | if (str_contains(mb_strtolower($comment->comment->content), 'bad bot')) { 308 | ++$result; 309 | } 310 | } 311 | ++$page; 312 | } while (count($comments)); 313 | 314 | return $result; 315 | } 316 | } 317 | --------------------------------------------------------------------------------