├── .gitignore ├── LICENSE ├── README.md ├── feediverse.py └── pyproject.toml /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | feediverse.egg-info 3 | Pipfile* 4 | __pycache__ 5 | .config.yml.swp 6 | poetry.lock 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Ed Summers 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | *feediverse* will read RSS/Atom feeds and send the messages as Mastodon posts. 2 | It's meant to add a little bit of spice to your timeline from other places. 3 | Please use it responsibly. 4 | 5 | ## Install 6 | 7 | pip install feediverse 8 | 9 | ## Run 10 | 11 | The first time you run *feediverse* you'll need to tell it your Mastodon 12 | instance and get an access token which it will save in a configuration file. If 13 | you don't specify a config file it will use `~/.feediverse`: 14 | 15 | feediverse 16 | 17 | Once *feediverse* is configured you can add it to your crontab: 18 | 19 | */15 * * * * /usr/local/bin/feediverse 20 | 21 | Run `feediverse --help` to show the command line options. 22 | 23 | ## Post Format 24 | 25 | You can customize the post format by opening the configuration file (default is 26 | ~/.feediverse) and updating the *template* property of your feed. The default 27 | format is: 28 | 29 | {title} {url} 30 | 31 | If you want you can use `{summary}` in your template, and add boilerplate text 32 | like so: 33 | 34 | Bookmark: {title} {url} {summary} 35 | 36 | `{hashtags}` will look for tags in the feed entry and turn them into a space 37 | separated list of hashtags. For some feeds (e.g. youtube-rss) you should use `{link}` instead of `{url}`. 38 | 39 | `{content}` is the whole content of the feed entry (with html-tags 40 | stripped). Please be aware that this might easily exceed Mastodon's 41 | limit of 512 characters. 42 | 43 | 44 | ## De-duping 45 | 46 | If you are attempting to use the RSS feed of a major news site, you may find 47 | that they change / update (or just re-post) the same items multiple times which 48 | will lead to duplicate toots. To enable de-duplication, use the `{--dedupe}` 49 | option to check for duplicates based on a tag before tooting, e.g. 50 | 51 | feediverse --dedupe url 52 | 53 | ## Multiple Feeds 54 | 55 | Since *feeds* is a list you can add additional feeds to watch if you want. 56 | 57 | ... 58 | feeds: 59 | - url: https://example.com/feed/ 60 | template: "dot com: {title} {url}" 61 | - url: https://example.org/feed/ 62 | template: "dot org: {title} {url}" 63 | 64 | ## Develop 65 | 66 | poetry install 67 | poetry run feediverse 68 | -------------------------------------------------------------------------------- /feediverse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import re 5 | import yaml 6 | import argparse 7 | import dateutil 8 | import feedparser 9 | import random 10 | import time 11 | import requests 12 | 13 | from bs4 import BeautifulSoup 14 | from mastodon import Mastodon 15 | from datetime import datetime, timezone, MINYEAR 16 | 17 | DEFAULT_CONFIG_FILE = os.path.join("~", ".feediverse") 18 | 19 | def main(): 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("-n", "--dry-run", action="store_true", 22 | help=("perform a trial run with no changes made: " 23 | "don't toot, don't save config")) 24 | parser.add_argument("-v", "--verbose", action="store_true", 25 | help="be verbose") 26 | parser.add_argument("-c", "--config", 27 | help="config file to use", 28 | default=os.path.expanduser(DEFAULT_CONFIG_FILE)) 29 | parser.add_argument("-d", "--delay", action="store_true", 30 | help="delay randomly from 10 to 30 seconds between each post") 31 | parser.add_argument("-p", "--dedupe", 32 | help="dedupe against the given tag", 33 | default="", metavar="TAG") 34 | 35 | args = parser.parse_args() 36 | config_file = args.config 37 | dedupe_field = args.dedupe 38 | 39 | if args.verbose: 40 | print("using config file", config_file) 41 | 42 | if not os.path.isfile(config_file): 43 | setup(config_file) 44 | 45 | config = read_config(config_file) 46 | 47 | masto = Mastodon( 48 | api_base_url=config['url'], 49 | client_id=config['client_id'], 50 | client_secret=config['client_secret'], 51 | access_token=config['access_token'] 52 | ) 53 | 54 | newest_post = config['updated'] 55 | dupes = config['dupecheck'] 56 | for feed in config['feeds']: 57 | if args.verbose: 58 | print(f"fetching {feed['url']} entries since {config['updated']}") 59 | for entry in get_feed(feed['url'], config['updated']): 60 | newest_post = max(newest_post, entry['updated']) 61 | entry_text = feed['template'].format(**entry)[:499] 62 | 63 | if args.dry_run: 64 | print(entry_text) 65 | continue 66 | 67 | if args.verbose: 68 | print(entry_text) 69 | 70 | if dedupe_field: 71 | if entry[dedupe_field] in dupes: 72 | if args.verbose: 73 | print(f"Skipping dupe post: {entry_text} based on dedupe field {dedupe_field}") 74 | continue 75 | update_dupes(dupes, entry[dedupe_field]) 76 | 77 | image_medias = [] 78 | if feed.get('include_images', False) and entry['images']: 79 | for image in entry['images'][:4]: 80 | # TODO: handle image fetch and upload exceptions 81 | image_response = requests.get(image) 82 | image_medias.append(masto.media_post(image_response.content, mime_type=image_response.headers['Content-Type'])) 83 | 84 | if not args.dry_run: 85 | masto.status_post( 86 | entry_text, 87 | media_ids=image_medias 88 | ) 89 | 90 | if args.delay: 91 | delay = random.randrange(10,30) 92 | print("Delaying..." + str(delay) + " seconds...") 93 | time.sleep(delay) 94 | 95 | if not args.dry_run: 96 | config['updated'] = newest_post.isoformat() 97 | config['dupecheck'] = dupes 98 | save_config(config, config_file) 99 | 100 | def get_feed(feed_url, last_update): 101 | feed = feedparser.parse(feed_url) 102 | # RSS feeds can contain future dates that we don't want to post yet, 103 | # so we filter them out 104 | now = datetime.now(timezone.utc) 105 | entries = [e for e in feed.entries 106 | if dateutil.parser.parse(e['updated']) <= now] 107 | # Now we can filter for date normally 108 | if last_update: 109 | entries = [e for e in entries 110 | if dateutil.parser.parse(e['updated']) > last_update] 111 | 112 | entries.sort(key=lambda e: e.updated_parsed) 113 | for entry in entries: 114 | yield get_entry(entry) 115 | 116 | def update_dupes(dupes, new): 117 | if len(dupes) > 10: 118 | del dupes[0] 119 | dupes.append(new) 120 | 121 | def get_entry(entry): 122 | hashtags = [] 123 | for tag in entry.get('tags', []): 124 | t = tag['term'].replace(' ', '_').replace('.', '').replace('-', '') 125 | hashtags.append('#{}'.format(t)) 126 | summary = entry.get('summary', '') 127 | content = entry.get('content', '') 128 | comments = entry.get('comments', '') 129 | if content: 130 | content = cleanup(content[0].get('value', '')) 131 | url = entry.id 132 | return { 133 | 'url': url, 134 | 'link': entry.link, 135 | 'links': entry.links, 136 | 'comments': comments, 137 | 'title': cleanup(entry.title), 138 | 'summary': cleanup(summary), 139 | 'content': content, 140 | 'hashtags': ' '.join(hashtags), 141 | 'images': find_images(summary), 142 | 'updated': dateutil.parser.parse(entry['updated']) 143 | } 144 | 145 | def cleanup(text): 146 | html = BeautifulSoup(text, 'html.parser') 147 | text = html.get_text() 148 | text = re.sub('\xa0+', ' ', text) 149 | text = re.sub(' +', ' ', text) 150 | text = re.sub(' +\n', '\n', text) 151 | text = re.sub('\n\n\n+', '\n\n', text, flags=re.M) 152 | return text.strip() 153 | 154 | def find_urls(html): 155 | if not html: 156 | return 157 | urls = [] 158 | soup = BeautifulSoup(html, 'html.parser') 159 | for tag in soup.find_all(["a", "img"]): 160 | if tag.name == "a": 161 | url = tag.get("href") 162 | elif tag.name == "img": 163 | url = tag.get("src") 164 | if url and url not in urls: 165 | urls.append(url) 166 | return urls 167 | 168 | def find_images(html): 169 | if not html: 170 | return 171 | urls = [] 172 | soup = BeautifulSoup(html, 'html.parser') 173 | for tag in soup.find_all(["img"]): 174 | if tag.name == "img": 175 | url = tag.get("src") 176 | if url and url not in urls: 177 | urls.append(url) 178 | return urls 179 | 180 | def yes_no(question): 181 | res = input(question + ' [y/n] ') 182 | return res.lower() in "y1" 183 | 184 | def save_config(config, config_file): 185 | copy = dict(config) 186 | with open(config_file, 'w') as fh: 187 | fh.write(yaml.dump(copy, default_flow_style=False)) 188 | 189 | def read_config(config_file): 190 | config = { 191 | 'updated': datetime(MINYEAR, 1, 1, 0, 0, 0, 0, timezone.utc), 192 | 'dupecheck': [], 193 | } 194 | with open(config_file) as fh: 195 | cfg = yaml.load(fh, yaml.SafeLoader) 196 | if 'updated' in cfg: 197 | cfg['updated'] = dateutil.parser.parse(cfg['updated']) 198 | config.update(cfg) 199 | return config 200 | 201 | def setup(config_file): 202 | url = input('What is your Mastodon Instance URL? ') 203 | have_app = yes_no('Do you have your app credentials already?') 204 | if have_app: 205 | name = 'feediverse' 206 | client_id = input('What is your app\'s client id: ') 207 | client_secret = input('What is your client secret: ') 208 | access_token = input('access_token: ') 209 | else: 210 | print("Ok, I'll need a few things in order to get your access token") 211 | name = input('app name (e.g. feediverse): ') 212 | client_id, client_secret = Mastodon.create_app( 213 | api_base_url=url, 214 | client_name=name, 215 | #scopes=['read', 'write'], 216 | website='https://github.com/edsu/feediverse' 217 | ) 218 | username = input('mastodon username (email): ') 219 | password = input('mastodon password (not stored): ') 220 | m = Mastodon(client_id=client_id, client_secret=client_secret, api_base_url=url) 221 | access_token = m.log_in(username, password) 222 | 223 | feed_url = input('RSS/Atom feed URL to watch: ') 224 | old_posts = yes_no('Shall already existing entries be tooted, too?') 225 | include_images = yes_no('Do you want to attach images (the first 4) found in entries to your toot?') 226 | config = { 227 | 'name': name, 228 | 'url': url, 229 | 'client_id': client_id, 230 | 'client_secret': client_secret, 231 | 'access_token': access_token, 232 | 'feeds': [ 233 | {'url': feed_url, 'template': '{title} {url}', 'include_images': include_images} 234 | ] 235 | } 236 | if not old_posts: 237 | config['updated'] = datetime.now(tz=timezone.utc).isoformat() 238 | save_config(config, config_file) 239 | print("") 240 | print("Your feediverse configuration has been saved to {}".format(config_file)) 241 | print("Add a line line this to your crontab to check every 15 minutes:") 242 | print("*/15 * * * * /usr/local/bin/feediverse") 243 | print("") 244 | 245 | if __name__ == "__main__": 246 | main() 247 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "feediverse" 3 | version = "0.4.1" 4 | description = "Connect an RSS Feed to Mastodon" 5 | authors = ["Ed Summers "] 6 | license = "MIT" 7 | readme = "README.md" 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.9" 11 | beautifulsoup4 = "^4.12.3" 12 | feedparser = "^6.0.11" 13 | mastodon-py = "^1.8.1" 14 | python-dateutil = "^2.9.0.post0" 15 | pyyaml = "^6.0.1" 16 | requests = "^2.32.2" 17 | 18 | [tool.poetry.scripts] 19 | feediverse = "feediverse:main" 20 | 21 | [build-system] 22 | requires = ["poetry-core"] 23 | build-backend = "poetry.core.masonry.api" 24 | --------------------------------------------------------------------------------