├── .gitignore ├── LICENSE ├── README.md ├── add.py ├── api ├── __init__.py └── api.py ├── config.example.yaml ├── main.py ├── poetry.lock ├── pyproject.toml ├── requirements.txt └── sites.example.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | config.yaml 3 | sites.yaml 4 | db.yaml 5 | __pycache__/ 6 | debug.log -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Lukas Winkler 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rss2wallabag 2 | 3 | **This project is archived, I am no longer maintaining it. I personally recommend using https://miniflux.app/ as an RSS reader instead (which also supports sending entries to Wallabag)** 4 | 5 | A small python script for importing RSS feeds to Wallabag 6 | -------------------------------------------------------------------------------- /add.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import yaml 3 | from urllib.parse import urlparse 4 | 5 | with open("sites.yaml", 'r') as stream: 6 | sites = yaml.safe_load(stream) 7 | 8 | try: 9 | name = sys.argv[1] 10 | feedurl = input("URL: ") 11 | parsed = urlparse(feedurl) 12 | if not (parsed.scheme and parsed.netloc and parsed.path): 13 | print("invalid URL") 14 | exit() 15 | tags = [] 16 | while True: 17 | tag = input("Tag: ") 18 | if tag == "": 19 | break 20 | tags.append(tag) 21 | 22 | sites[name] = {"url": feedurl, "tags": tags} 23 | 24 | except Exception as e: 25 | print("invalid input") 26 | print(e) 27 | 28 | with open('sites.yaml', 'w') as outfile: 29 | yaml.dump(sites, outfile, default_flow_style=False) 30 | -------------------------------------------------------------------------------- /api/__init__.py: -------------------------------------------------------------------------------- 1 | from .api import WallabagAPI 2 | -------------------------------------------------------------------------------- /api/api.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | from datetime import datetime 3 | from typing import Dict, List 4 | 5 | from requests import Session 6 | 7 | 8 | class WallabagAPI: 9 | def __init__(self, host: str, 10 | user_agent="RSS2Wallabag +https://github.com/Findus23/rss2wallabag", 11 | requests_session: Session = None 12 | ) -> None: 13 | 14 | self.host = host 15 | self.token = None 16 | self.user_agent = user_agent 17 | if requests_session: 18 | self.s = requests_session 19 | else: 20 | self.s = Session() 21 | self.s.headers.update({"User-Agent": user_agent}) 22 | 23 | def auth(self, client_id: str, client_secret: str, username: str, password: str) -> None: 24 | r = self.s.post(self.host + "/oauth/v2/token", data={ 25 | "grant_type": "password", 26 | "client_id": client_id, 27 | "client_secret": client_secret, 28 | "username": username, 29 | "password": password 30 | }) 31 | r.raise_for_status() 32 | self.token = r.json()["access_token"] 33 | 34 | def check_auth(self): 35 | if not self.token: 36 | raise RuntimeError("call auth() first to log in") 37 | 38 | @property 39 | def auth_headers(self) -> Dict[str, str]: 40 | self.check_auth() 41 | return {"Authorization": "Bearer " + self.token} 42 | 43 | def add_entry(self, url: str, title: str = None, 44 | tags: List[str] = None, published: datetime = None) -> None: 45 | if tags is None: 46 | tags = [] 47 | 48 | data = { 49 | "url": url, 50 | } 51 | if title: 52 | data["title"] = title 53 | if tags: 54 | data["tags"] = ",".join(tags) 55 | if published: 56 | data["published_at"] = published.timestamp() 57 | # TODO: doesn't seem to be working 58 | r = self.s.post(self.host + "/api/entries.json", data=data, headers=self.auth_headers) 59 | r.raise_for_status() 60 | 61 | def check_exist(self, url: str) -> bool: 62 | sha1 = hashlib.sha1(url.encode()).hexdigest() 63 | r = self.s.get(self.host + "/api/entries/exists.json", params={ 64 | "hashed_url": sha1, 65 | }, headers=self.auth_headers) 66 | r.raise_for_status() 67 | return r.json()["exists"] 68 | -------------------------------------------------------------------------------- /config.example.yaml: -------------------------------------------------------------------------------- 1 | wallabag: 2 | host: https://wallabag.example 3 | client_id: client_id 4 | client_secret: client_secret 5 | username: username 6 | password: password 7 | github_username: Findus23 8 | debug: false 9 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from dataclasses import dataclass 4 | from datetime import datetime 5 | from time import mktime 6 | from typing import List, Optional, Dict 7 | from urllib.parse import urljoin 8 | 9 | import feedparser 10 | import requests 11 | import yaml 12 | from feedparser import FeedParserDict 13 | 14 | from api import WallabagAPI 15 | 16 | 17 | @dataclass 18 | class WallabagConfig: 19 | host: str 20 | client_id: str 21 | client_secret: str 22 | username: str 23 | password: str 24 | 25 | 26 | class Config: 27 | def __init__(self): 28 | with open("config.yaml", 'r') as stream: 29 | data = yaml.safe_load(stream) 30 | self.wallabag = WallabagConfig(**data["wallabag"]) 31 | if data["github_username"]: 32 | self.github_username = data["github_username"] 33 | else: 34 | self.github_username = None 35 | self.debug = data["debug"] 36 | 37 | @property 38 | def production(self): 39 | return not self.debug 40 | 41 | 42 | @dataclass 43 | class Site: 44 | title: str 45 | url: str 46 | github: bool 47 | tags: List[str] 48 | latest_article: Optional[str] 49 | filter: Optional[str] 50 | 51 | 52 | def load_sites() -> Dict[str, Site]: 53 | with open("sites.yaml", 'r') as stream: 54 | data = yaml.safe_load(stream) 55 | sites: Dict[str, Site] = {} 56 | for title, entry in data.items(): 57 | if "latest_article" not in entry: 58 | entry["latest_article"] = None 59 | if "github" not in entry: 60 | entry["github"] = None 61 | if "filter" not in entry: 62 | entry["filter"] = None 63 | sites[title] = Site(title, **entry) 64 | return sites 65 | 66 | 67 | def get_starred_repos(username, sites: Dict[str, Site]): 68 | r = requests.get("https://api.github.com/users/{user}/starred".format(user=username)) 69 | stars = r.json() 70 | for repo in stars: 71 | if repo["full_name"] not in sites: 72 | sites[repo["full_name"]] = Site( 73 | url=repo["html_url"] + "/releases.atom", 74 | tags=["github", repo["name"]], 75 | github=True, 76 | title=repo["full_name"], 77 | latest_article=None 78 | ) 79 | return sites 80 | 81 | 82 | def main(): 83 | sites = load_sites() 84 | config = Config() 85 | 86 | logger = logging.getLogger() 87 | logger.handlers = [] 88 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 89 | logger.setLevel(logging.DEBUG) 90 | 91 | ch = logging.StreamHandler(stream=sys.stdout) 92 | ch.setLevel(logging.WARNING if config.production else logging.DEBUG) 93 | ch.setFormatter(formatter) 94 | logger.addHandler(ch) 95 | 96 | fh = logging.FileHandler('debug.log') 97 | fh.setFormatter(formatter) 98 | fh.setLevel(logging.WARNING if config.production else logging.DEBUG) 99 | logger.addHandler(fh) 100 | 101 | wallabag_config = config.wallabag 102 | api = WallabagAPI(host=wallabag_config.host) 103 | api.auth(client_secret=wallabag_config.client_secret, client_id=wallabag_config.client_id, 104 | username=wallabag_config.username, password=wallabag_config.password) 105 | 106 | if config.github_username: 107 | sites = get_starred_repos(config.github_username, sites) 108 | 109 | new_sites: Dict[str, Dict] = {} 110 | for title, site in sites.items(): 111 | new_site = handle_feed(api, site, logger, config) 112 | new_sites[title] = new_site.__dict__ 113 | del new_sites[title]["title"] 114 | if config.production: 115 | with open("sites.yaml", 'w') as stream: 116 | yaml.dump(new_sites, stream, default_flow_style=False) 117 | 118 | 119 | def handle_feed(api: WallabagAPI, site: Site, logger: logging.Logger, config: Config) -> Site: 120 | logger.info("Downloading feed: " + site.title) 121 | r = api.s.get(site.url) 122 | if r.status_code != 404: 123 | r.raise_for_status() 124 | rss = r.text 125 | logger.info("Parsing feed: " + site.title) 126 | f = feedparser.parse(rss) 127 | logger.debug("finished parsing: " + site.title) 128 | 129 | articles: List[FeedParserDict] = f.entries 130 | for article in articles: 131 | if article.title == site.latest_article: 132 | logger.debug("already added: " + article.title) 133 | break 134 | if site.filter and not site.filter in article.title: 135 | logger.debug("article filtered: " + article.title) 136 | continue 137 | logger.info("article found: " + article.title) 138 | taglist = [site.title] 139 | if site.tags: 140 | taglist.extend(site.tags) 141 | if "published_parsed" in article: 142 | published = datetime.fromtimestamp(mktime(article.published_parsed)) 143 | elif "updated_parsed" in article: 144 | published = datetime.fromtimestamp(mktime(article.updated_parsed)) 145 | else: 146 | published = None 147 | logger.info("add to wallabag: " + article.title) 148 | if site.github: 149 | title = site.title + ": " + article.title 150 | else: 151 | title = article.title 152 | if not hasattr(article, 'link'): 153 | logger.info("no link, skipping!") 154 | continue 155 | url = urljoin(site.url, article.link) 156 | if api.check_exist(url): 157 | logger.info("already found in wallabag: " + article.title) 158 | continue 159 | if config.production: 160 | api.add_entry(url=url, title=title, tags=taglist, published=published) 161 | else: 162 | logger.info("warning: running in debug mode - not adding links to wallabag") 163 | if articles: 164 | site.latest_article = articles[0].title 165 | 166 | return site 167 | 168 | 169 | if __name__ == '__main__': 170 | main() 171 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "certifi" 3 | version = "2020.6.20" 4 | description = "Python package for providing Mozilla's CA Bundle." 5 | category = "main" 6 | optional = false 7 | python-versions = "*" 8 | 9 | [[package]] 10 | name = "chardet" 11 | version = "3.0.4" 12 | description = "Universal encoding detector for Python 2 and 3" 13 | category = "main" 14 | optional = false 15 | python-versions = "*" 16 | 17 | [[package]] 18 | name = "feedparser" 19 | version = "6.0.2" 20 | description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds" 21 | category = "main" 22 | optional = false 23 | python-versions = ">=3.6" 24 | 25 | [package.dependencies] 26 | sgmllib3k = "*" 27 | 28 | [[package]] 29 | name = "idna" 30 | version = "2.10" 31 | description = "Internationalized Domain Names in Applications (IDNA)" 32 | category = "main" 33 | optional = false 34 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 35 | 36 | [[package]] 37 | name = "pyyaml" 38 | version = "5.3.1" 39 | description = "YAML parser and emitter for Python" 40 | category = "main" 41 | optional = false 42 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 43 | 44 | [[package]] 45 | name = "requests" 46 | version = "2.24.0" 47 | description = "Python HTTP for Humans." 48 | category = "main" 49 | optional = false 50 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 51 | 52 | [package.dependencies] 53 | certifi = ">=2017.4.17" 54 | chardet = ">=3.0.2,<4" 55 | idna = ">=2.5,<3" 56 | urllib3 = ">=1.21.1,<1.25.0 || >1.25.0,<1.25.1 || >1.25.1,<1.26" 57 | 58 | [package.extras] 59 | security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"] 60 | socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] 61 | 62 | [[package]] 63 | name = "sgmllib3k" 64 | version = "1.0.0" 65 | description = "Py3k port of sgmllib." 66 | category = "main" 67 | optional = false 68 | python-versions = "*" 69 | 70 | [[package]] 71 | name = "urllib3" 72 | version = "1.25.11" 73 | description = "HTTP library with thread-safe connection pooling, file post, and more." 74 | category = "main" 75 | optional = false 76 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" 77 | 78 | [package.extras] 79 | brotli = ["brotlipy (>=0.6.0)"] 80 | secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] 81 | socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] 82 | 83 | [metadata] 84 | lock-version = "1.1" 85 | python-versions = "^3.7" 86 | content-hash = "1804291feb860b5ec22fdd23269eb00a5e8d0dd0cb8537506e4f7df32c441de4" 87 | 88 | [metadata.files] 89 | certifi = [ 90 | {file = "certifi-2020.6.20-py2.py3-none-any.whl", hash = "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"}, 91 | {file = "certifi-2020.6.20.tar.gz", hash = "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3"}, 92 | ] 93 | chardet = [ 94 | {file = "chardet-3.0.4-py2.py3-none-any.whl", hash = "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"}, 95 | {file = "chardet-3.0.4.tar.gz", hash = "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"}, 96 | ] 97 | feedparser = [ 98 | {file = "feedparser-6.0.2-py3-none-any.whl", hash = "sha256:f596c4b34fb3e2dc7e6ac3a8191603841e8d5d267210064e94d4238737452ddd"}, 99 | {file = "feedparser-6.0.2.tar.gz", hash = "sha256:1b00a105425f492f3954fd346e5b524ca9cef3a4bbf95b8809470e9857aa1074"}, 100 | ] 101 | idna = [ 102 | {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, 103 | {file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"}, 104 | ] 105 | pyyaml = [ 106 | {file = "PyYAML-5.3.1-cp27-cp27m-win32.whl", hash = "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f"}, 107 | {file = "PyYAML-5.3.1-cp27-cp27m-win_amd64.whl", hash = "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76"}, 108 | {file = "PyYAML-5.3.1-cp35-cp35m-win32.whl", hash = "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2"}, 109 | {file = "PyYAML-5.3.1-cp35-cp35m-win_amd64.whl", hash = "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c"}, 110 | {file = "PyYAML-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2"}, 111 | {file = "PyYAML-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648"}, 112 | {file = "PyYAML-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"}, 113 | {file = "PyYAML-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf"}, 114 | {file = "PyYAML-5.3.1-cp38-cp38-win32.whl", hash = "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97"}, 115 | {file = "PyYAML-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee"}, 116 | {file = "PyYAML-5.3.1.tar.gz", hash = "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d"}, 117 | ] 118 | requests = [ 119 | {file = "requests-2.24.0-py2.py3-none-any.whl", hash = "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898"}, 120 | {file = "requests-2.24.0.tar.gz", hash = "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b"}, 121 | ] 122 | sgmllib3k = [ 123 | {file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"}, 124 | ] 125 | urllib3 = [ 126 | {file = "urllib3-1.25.11-py2.py3-none-any.whl", hash = "sha256:f5321fbe4bf3fefa0efd0bfe7fb14e90909eb62a48ccda331726b4319897dd5e"}, 127 | {file = "urllib3-1.25.11.tar.gz", hash = "sha256:8d7eaa5a82a1cac232164990f04874c594c9453ec55eef02eab885aa02fc17a2"}, 128 | ] 129 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "rss2wallabag" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Lukas Winkler "] 6 | 7 | [tool.poetry.dependencies] 8 | python = "^3.7" 9 | feedparser = "^6.0.2" 10 | PyYAML = "^5.3" 11 | requests = "^2.22.0" 12 | 13 | [tool.poetry.dev-dependencies] 14 | 15 | [build-system] 16 | requires = ["poetry>=0.12"] 17 | build-backend = "poetry.masonry.api" 18 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2020.6.20; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" 2 | chardet==3.0.4; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" 3 | feedparser==6.0.2; python_version >= "3.6" 4 | idna==2.10; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" 5 | pyyaml==5.3.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") 6 | requests==2.24.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") 7 | sgmllib3k==1.0.0; python_version >= "3.6" 8 | urllib3==1.25.11; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4" 9 | -------------------------------------------------------------------------------- /sites.example.yaml: -------------------------------------------------------------------------------- 1 | whatif: 2 | latest_article: Earth-Moon Fire Pole 3 | tags: 4 | - science 5 | url: https://what-if.xkcd.com/feed.atom 6 | --------------------------------------------------------------------------------