├── .gitignore
├── LICENSE
├── README.md
├── feediverse.py
└── pyproject.toml


/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | feediverse.egg-info
3 | Pipfile*
4 | __pycache__
5 | .config.yml.swp
6 | poetry.lock
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) Ed Summers
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | *feediverse* will read RSS/Atom feeds and send the messages as Mastodon posts.
 2 | It's meant to add a little bit of spice to your timeline from other places.
 3 | Please use it responsibly.
 4 | 
 5 | ## Install
 6 | 
 7 |     pip install feediverse
 8 | 
 9 | ## Run
10 | 
11 | The first time you run *feediverse* you'll need to tell it your Mastodon
12 | instance and get an access token which it will save in a configuration file. If
13 | you don't specify a config file it will use `~/.feediverse`:
14 | 
15 |     feediverse
16 | 
17 | Once *feediverse* is configured you can add it to your crontab:
18 | 
19 |     */15 * * * * /usr/local/bin/feediverse    
20 | 
21 | Run `feediverse --help` to show the command line options.
22 | 
23 | ## Post Format
24 | 
25 | You can customize the post format by opening the configuration file (default is
26 | ~/.feediverse) and updating the *template* property of your feed. The default
27 | format is:
28 | 
29 |     {title} {url}
30 | 
31 | If you want you can use `{summary}` in your template, and add boilerplate text
32 | like so:
33 | 
34 |     Bookmark: {title} {url} {summary}
35 | 
36 | `{hashtags}` will look for tags in the feed entry and turn them into a space
37 | separated list of hashtags. For some feeds (e.g. youtube-rss) you should use `{link}` instead of `{url}`.
38 | 
39 | `{content}` is the whole content of the feed entry (with html-tags
40 | stripped). Please be aware that this might easily exceed Mastodon's
41 | limit of 512 characters.
42 | 
43 | 
44 | ## De-duping
45 | 
46 | If you are attempting to use the RSS feed of a major news site, you may find
47 | that they change / update (or just re-post) the same items multiple times which
48 | will lead to duplicate toots. To enable de-duplication, use the `{--dedupe}`
49 | option to check for duplicates based on a tag before tooting, e.g.
50 | 
51 |     feediverse --dedupe url
52 | 
53 | ## Multiple Feeds
54 | 
55 | Since *feeds* is a list you can add additional feeds to watch if you want.
56 | 
57 |     ...
58 |     feeds:
59 |       - url: https://example.com/feed/
60 |         template: "dot com: {title} {url}"
61 |       - url: https://example.org/feed/
62 |         template: "dot org: {title} {url}"
63 | 
64 | ## Develop
65 | 
66 |     poetry install
67 |     poetry run feediverse
68 | 


--------------------------------------------------------------------------------
/feediverse.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import os
  4 | import re
  5 | import yaml
  6 | import argparse
  7 | import dateutil
  8 | import feedparser
  9 | import random
 10 | import time
 11 | import requests
 12 | 
 13 | from bs4 import BeautifulSoup
 14 | from mastodon import Mastodon
 15 | from datetime import datetime, timezone, MINYEAR
 16 | 
 17 | DEFAULT_CONFIG_FILE = os.path.join("~", ".feediverse")
 18 | 
 19 | def main():
 20 |     parser = argparse.ArgumentParser()
 21 |     parser.add_argument("-n", "--dry-run", action="store_true",
 22 |                         help=("perform a trial run with no changes made: "
 23 |                               "don't toot, don't save config"))
 24 |     parser.add_argument("-v", "--verbose", action="store_true",
 25 |                         help="be verbose")
 26 |     parser.add_argument("-c", "--config",
 27 |                         help="config file to use",
 28 |                         default=os.path.expanduser(DEFAULT_CONFIG_FILE))
 29 |     parser.add_argument("-d", "--delay", action="store_true",
 30 |                         help="delay randomly from 10 to 30 seconds between each post")
 31 |     parser.add_argument("-p", "--dedupe",
 32 |                         help="dedupe against the given tag",
 33 |                         default="", metavar="TAG")
 34 | 
 35 |     args = parser.parse_args()
 36 |     config_file = args.config
 37 |     dedupe_field = args.dedupe
 38 | 
 39 |     if args.verbose:
 40 |         print("using config file", config_file)
 41 | 
 42 |     if not os.path.isfile(config_file):
 43 |         setup(config_file)
 44 | 
 45 |     config = read_config(config_file)
 46 | 
 47 |     masto = Mastodon(
 48 |         api_base_url=config['url'],
 49 |         client_id=config['client_id'],
 50 |         client_secret=config['client_secret'],
 51 |         access_token=config['access_token']
 52 |     )
 53 | 
 54 |     newest_post = config['updated']
 55 |     dupes = config['dupecheck']
 56 |     for feed in config['feeds']:
 57 |         if args.verbose:
 58 |             print(f"fetching {feed['url']} entries since {config['updated']}")
 59 |         for entry in get_feed(feed['url'], config['updated']):
 60 |             newest_post = max(newest_post, entry['updated'])
 61 |             entry_text = feed['template'].format(**entry)[:499]
 62 | 
 63 |             if args.dry_run:
 64 |                 print(entry_text)
 65 |                 continue
 66 | 
 67 |             if args.verbose:
 68 |                 print(entry_text)
 69 | 
 70 |             if dedupe_field:
 71 |                 if entry[dedupe_field] in dupes:
 72 |                     if args.verbose:
 73 |                         print(f"Skipping dupe post: {entry_text} based on dedupe field {dedupe_field}")
 74 |                     continue
 75 |                 update_dupes(dupes, entry[dedupe_field])
 76 |            
 77 |             image_medias = []
 78 |             if feed.get('include_images', False) and entry['images']:
 79 |                 for image in entry['images'][:4]:
 80 |                     # TODO: handle image fetch and upload exceptions
 81 |                     image_response = requests.get(image)
 82 |                     image_medias.append(masto.media_post(image_response.content, mime_type=image_response.headers['Content-Type']))
 83 | 
 84 |             if not args.dry_run:
 85 |                 masto.status_post(
 86 |                     entry_text,
 87 |                     media_ids=image_medias
 88 |                 )
 89 | 
 90 |             if args.delay:
 91 |                 delay = random.randrange(10,30)
 92 |                 print("Delaying..." + str(delay) + " seconds...")
 93 |                 time.sleep(delay)
 94 | 
 95 |     if not args.dry_run:
 96 |         config['updated'] = newest_post.isoformat()
 97 |         config['dupecheck'] = dupes
 98 |         save_config(config, config_file)
 99 | 
100 | def get_feed(feed_url, last_update):
101 |     feed = feedparser.parse(feed_url)
102 |     # RSS feeds can contain future dates that we don't want to post yet,
103 |     # so we filter them out
104 |     now = datetime.now(timezone.utc)
105 |     entries = [e for e in feed.entries
106 |                if dateutil.parser.parse(e['updated']) <= now]
107 |     # Now we can filter for date normally
108 |     if last_update:
109 |         entries = [e for e in entries
110 |                    if dateutil.parser.parse(e['updated']) > last_update]
111 | 
112 |     entries.sort(key=lambda e: e.updated_parsed)
113 |     for entry in entries:
114 |         yield get_entry(entry)
115 | 
116 | def update_dupes(dupes, new):
117 |    if len(dupes) > 10:
118 |      del dupes[0]
119 |    dupes.append(new)
120 | 
121 | def get_entry(entry):
122 |     hashtags = []
123 |     for tag in entry.get('tags', []):
124 |         t = tag['term'].replace(' ', '_').replace('.', '').replace('-', '')
125 |         hashtags.append('#{}'.format(t))
126 |     summary = entry.get('summary', '')
127 |     content = entry.get('content', '')
128 |     comments = entry.get('comments', '')
129 |     if content:
130 |         content = cleanup(content[0].get('value', ''))
131 |     url = entry.id
132 |     return {
133 |         'url': url,
134 |         'link': entry.link,
135 |         'links': entry.links,
136 |         'comments': comments,
137 |         'title': cleanup(entry.title),
138 |         'summary': cleanup(summary),
139 |         'content': content,
140 |         'hashtags': ' '.join(hashtags),
141 |         'images': find_images(summary),
142 |         'updated': dateutil.parser.parse(entry['updated'])
143 |     }
144 | 
145 | def cleanup(text):
146 |     html = BeautifulSoup(text, 'html.parser')
147 |     text = html.get_text()
148 |     text = re.sub('\xa0+', ' ', text)
149 |     text = re.sub('  +', ' ', text)
150 |     text = re.sub(' +\n', '\n', text)
151 |     text = re.sub('\n\n\n+', '\n\n', text, flags=re.M)
152 |     return text.strip()
153 | 
154 | def find_urls(html):
155 |     if not html:
156 |         return
157 |     urls = []
158 |     soup = BeautifulSoup(html, 'html.parser')
159 |     for tag in soup.find_all(["a", "img"]):
160 |         if tag.name == "a":
161 |             url = tag.get("href")
162 |         elif tag.name == "img":
163 |             url = tag.get("src")
164 |         if url and url not in urls:
165 |             urls.append(url)
166 |     return urls
167 | 
168 | def find_images(html):
169 |     if not html:
170 |         return
171 |     urls = []
172 |     soup = BeautifulSoup(html, 'html.parser')
173 |     for tag in soup.find_all(["img"]):
174 |         if tag.name == "img":
175 |             url = tag.get("src")
176 |         if url and url not in urls:
177 |             urls.append(url)
178 |     return urls
179 | 
180 | def yes_no(question):
181 |     res = input(question + ' [y/n] ')
182 |     return res.lower() in "y1"
183 | 
184 | def save_config(config, config_file):
185 |     copy = dict(config)
186 |     with open(config_file, 'w') as fh:
187 |         fh.write(yaml.dump(copy, default_flow_style=False))
188 | 
189 | def read_config(config_file):
190 |     config = {
191 |         'updated': datetime(MINYEAR, 1, 1, 0, 0, 0, 0, timezone.utc),
192 |         'dupecheck': [],
193 |     }
194 |     with open(config_file) as fh:
195 |         cfg = yaml.load(fh, yaml.SafeLoader)
196 |         if 'updated' in cfg:
197 |             cfg['updated'] = dateutil.parser.parse(cfg['updated'])
198 |     config.update(cfg)
199 |     return config
200 | 
201 | def setup(config_file):
202 |     url = input('What is your Mastodon Instance URL? ')
203 |     have_app = yes_no('Do you have your app credentials already?')
204 |     if have_app:
205 |         name = 'feediverse'
206 |         client_id = input('What is your app\'s client id: ')
207 |         client_secret = input('What is your client secret: ')
208 |         access_token = input('access_token: ')
209 |     else:
210 |         print("Ok, I'll need a few things in order to get your access token")
211 |         name = input('app name (e.g. feediverse): ')
212 |         client_id, client_secret = Mastodon.create_app(
213 |             api_base_url=url,
214 |             client_name=name,
215 |             #scopes=['read', 'write'],
216 |             website='https://github.com/edsu/feediverse'
217 |         )
218 |         username = input('mastodon username (email): ')
219 |         password = input('mastodon password (not stored): ')
220 |         m = Mastodon(client_id=client_id, client_secret=client_secret, api_base_url=url)
221 |         access_token = m.log_in(username, password)
222 | 
223 |     feed_url = input('RSS/Atom feed URL to watch: ')
224 |     old_posts = yes_no('Shall already existing entries be tooted, too?')
225 |     include_images = yes_no('Do you want to attach images (the first 4) found in entries to your toot?')
226 |     config = {
227 |         'name': name,
228 |         'url': url,
229 |         'client_id': client_id,
230 |         'client_secret': client_secret,
231 |         'access_token': access_token,
232 |         'feeds': [
233 |             {'url': feed_url, 'template': '{title} {url}', 'include_images': include_images}
234 |         ]
235 |     }
236 |     if not old_posts:
237 |         config['updated'] = datetime.now(tz=timezone.utc).isoformat()
238 |     save_config(config, config_file)
239 |     print("")
240 |     print("Your feediverse configuration has been saved to {}".format(config_file))
241 |     print("Add a line line this to your crontab to check every 15 minutes:")
242 |     print("*/15 * * * * /usr/local/bin/feediverse")
243 |     print("")
244 | 
245 | if __name__ == "__main__":
246 |     main()
247 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "feediverse"
 3 | version = "0.4.1"
 4 | description = "Connect an RSS Feed to Mastodon"
 5 | authors = ["Ed Summers <ehs@pobox.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.9"
11 | beautifulsoup4 = "^4.12.3"
12 | feedparser = "^6.0.11"
13 | mastodon-py = "^1.8.1"
14 | python-dateutil = "^2.9.0.post0"
15 | pyyaml = "^6.0.1"
16 | requests = "^2.32.2"
17 | 
18 | [tool.poetry.scripts]
19 | feediverse = "feediverse:main"
20 | 
21 | [build-system]
22 | requires = ["poetry-core"]
23 | build-backend = "poetry.core.masonry.api"
24 | 


--------------------------------------------------------------------------------