├── .gitignore ├── README.md ├── config.ini └── rssify.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.xml 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | I looked at a few online services that provide custom created RSS feeds 2 | for websites that don't have one. None of them were free of charge 3 | without being too much limited in functionality for me. 4 | 5 | So I hacked together this very simple rssify.py script. 6 | It reads from a config file your websites you want to rssify. 7 | It could be easily extened for more features if needed. 8 | For now it only parses the title and date via css selectors and generates 9 | a feed.xml file which can be imported into newsboat/newsbeuter or I guess any 10 | other rss reader. 11 | 12 | ```config.ini 13 | [Jodel Engineering Blog] 14 | url = https://jodel.com/engineering/ 15 | item_title = .post-title > a 16 | item_date = .post-date 17 | item_date_format = %%b %%d, %%Y 18 | item_timezone = Europe/Berlin 19 | ``` 20 | 21 | The script runs once daily in a cronjob on my local machine. 22 | -------------------------------------------------------------------------------- /config.ini: -------------------------------------------------------------------------------- 1 | [Jodel Engineering Blog] 2 | url = https://jodel.com/engineering/ 3 | item_title = .post-title > a 4 | item_url = .post-title > a 5 | item_date = .post-date 6 | item_date_format = %%b %%d, %%Y 7 | item_timezone = Europe/Berlin 8 | 9 | [Jodel Company Blog] 10 | url = https://jodel.com/blog 11 | item_title = .post-title > a 12 | item_url = .post-title > a 13 | item_date = .post-date 14 | item_date_format = %%b %%d, %%Y 15 | item_timezone = Europe/Berlin 16 | 17 | [Take The Bait] 18 | url = https://www.veed.io/takethebait/ 19 | item_title = .homePage 20 | item_url = .blog-post-preview.grow > a 21 | -------------------------------------------------------------------------------- /rssify.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | from datetime import datetime 3 | import requests 4 | from feedgen.feed import FeedGenerator 5 | from bs4 import BeautifulSoup 6 | from pytz import timezone 7 | 8 | config = configparser.ConfigParser() 9 | config.read('config.ini') 10 | 11 | for section in config.sections(): 12 | s = dict(config.items(section)) 13 | r = requests.get(s['url']) 14 | soup = BeautifulSoup(r.text, 'lxml') 15 | titles = soup.select(s['item_title']) 16 | urls = soup.select(s['item_url']) 17 | 18 | if 'item_date' in s: 19 | dates = soup.select(s['item_date']) 20 | else: 21 | dates = None 22 | 23 | fg = FeedGenerator() 24 | fg.title(section) 25 | fg.description(section) 26 | fg.link(href=s['url'], rel='alternate') 27 | 28 | for i in range(len(titles)): 29 | if i > len(urls) - 1: 30 | break 31 | 32 | fe = fg.add_entry() 33 | fe.title(titles[i].text) 34 | fe.link(href=urls[i].get('href'), rel='alternate') 35 | if dates is not None: 36 | date = datetime.strptime(dates[i].text.strip(), s['item_date_format']) 37 | if config.has_option(section, 'item_timezone'): 38 | localtz = timezone(s['item_timezone']) 39 | date = localtz.localize(date) 40 | else: 41 | #date = datetime.now(timezone("Europe/Berlin")) 42 | date = '1970-01-01 00:00:00+02:00' 43 | 44 | fe.published(date) 45 | 46 | fg.rss_file(section.replace(' ', '_') + '.xml') 47 | --------------------------------------------------------------------------------