├── .github └── workflows │ ├── renew.yml.bak │ └── run.yml.bak ├── LICENSE.txt ├── README.md ├── img └── GitHub_Action_Step3.png ├── last_renew_time.txt ├── main.py ├── requirements.txt └── rss.yaml /.github/workflows/renew.yml.bak: -------------------------------------------------------------------------------- 1 | name: Renew CI 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 1 * *' 6 | 7 | jobs: 8 | renew: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - name: Checkout codes 13 | uses: actions/checkout@v3 14 | - name: Renew 15 | run: | 16 | echo $(date '+%Y-%m-%d %H:%M:%S') > last_renew_time.txt 17 | - name: push changes to remote 18 | run: | 19 | git config --global user.name "renew" 20 | git config --global user.email "renew@example.com" 21 | 22 | git add last_renew_time.txt 23 | git commit -m "Renew" 24 | git push 25 | 26 | -------------------------------------------------------------------------------- /.github/workflows/run.yml.bak: -------------------------------------------------------------------------------- 1 | name: Run CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | schedule: 8 | - cron: '0 * * * *' 9 | 10 | jobs: 11 | update: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: Checkout codes 16 | uses: actions/checkout@v4 17 | - name: Set up Python 3.8 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: '3.8' 21 | - name: Install system dependencies 22 | run: sudo apt install jq curl 23 | - name: Cache python dependencies 24 | uses: actions/cache@v4 25 | with: 26 | path: ~/.cache/pip 27 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} 28 | restore-keys: | 29 | ${{ runner.os }}-pip- 30 | - name: Install python dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 34 | - name: Get previous rss_database.csv 35 | uses: dawidd6/action-download-artifact@v8 36 | with: 37 | skip_unpack: true 38 | if_no_artifact_found: ignore 39 | - name: Run code 40 | env: 41 | CONSUMER_KEY: ${{ secrets.CONSUMER_KEY }} 42 | ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }} 43 | run: | 44 | python main.py 45 | - name: Upload rss_database.csv 46 | uses: actions/upload-artifact@v4 47 | with: 48 | name: rss_database 49 | path: rss_database.csv 50 | 51 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Hank Liao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 |

RSS2Pocket

3 | 4 |

5 | An awesome tool to save articles from RSS feed to Pocket automatically. 6 |

7 | 8 |

9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |

22 | 23 |

24 | 25 | **Note: Pocket will no longer be available after July 8, 2025.** 26 | https://support.mozilla.org/en-US/kb/future-of-pocket 27 | 28 | ## About the Project 29 | I used to use IFTTT to [save articles from RSS feed to Pocket](https://ifttt.com/applets/gnf8UbSV). 30 | But in Sept. 2020, [IFTTT starts the Pro plan which makes the Standard plan can only create 3 applets](https://ifttt.com/plans). 31 | Therefore, I try to use GitHub Action to do the jobs. 32 | 33 | This project will execute the python script every hour, 34 | and it will save articles from RSS feed to Pocket. 35 | 36 | ## Getting Started 37 | 38 | ### Install Dependencies (CLI only) 39 | 1. `$ python3 -m pip install --upgrade pip` 40 | 2. `$ pip3 install -r requirements.txt` 41 | 42 | ### Get Pocket Token 43 | Please follow 44 | [GETTING STARTED WITH THE POCKET DEVELOPER API](https://www.jamesfmackenzie.com/getting-started-with-the-pocket-developer-api/) 45 | to get `consumer_key` and `access_token`. 46 | 47 | ## Usage 48 | 49 | ### GitHub Action 50 | 1. Fork this project 51 | 2. Edit rss.yaml 52 | 3. Fill `consumer_key` and `access_token` in the Secrets tab in Settings of the repository. 53 | 54 |
55 | 56 | [![GitHub Action Step 3](img/GitHub_Action_Step3.png)](img/GitHub_Action_Step3.png) 57 | 58 |
59 | 60 | ### CLI 61 | 1. Edit rss.yaml 62 | 2. `$ CONSUMER_KEY='consumer_key' ACCESS_TOKEN='access_token' python3 main.py` 63 | 64 | 65 | ## Known Issues 66 | 67 | ## Contributing 68 | 69 | Contributions are what make the open source community such an amazing place to be learn, inspire, and create. Any contributions you make are **greatly appreciated**. 70 | 71 | 1. Fork the Project 72 | 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`) 73 | 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`) 74 | 4. Push to the Branch (`git push origin feature/AmazingFeature`) 75 | 5. Open a Pull Request 76 | 77 | ## License 78 | 79 | Distributed under the MIT License. See `LICENSE` for more information. 80 | 81 | ## Contact 82 | 83 | Project Link: [https://github.com/hankhjliao/RSS2Pocket](https://github.com/hankhjliao/RSS2Pocket) 84 | 85 | ## Acknowledgements 86 | - [GETTING STARTED WITH THE POCKET DEVELOPER API](https://www.jamesfmackenzie.com/getting-started-with-the-pocket-developer-api/) 87 | - [Pocket API: Documentation Overview](https://getpocket.com/developer/docs/overview) 88 | - [Best README Template](https://github.com/othneildrew/Best-README-Template) 89 | 90 | -------------------------------------------------------------------------------- /img/GitHub_Action_Step3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hankhjliao/RSS2Pocket/41a486b7859d437200a12ac23fbcfc994d0569b7/img/GitHub_Action_Step3.png -------------------------------------------------------------------------------- /last_renew_time.txt: -------------------------------------------------------------------------------- 1 | 2025-05-01 02:03:56 2 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import os 5 | import zipfile 6 | 7 | import feedparser 8 | import pandas as pd 9 | import requests 10 | import yaml 11 | 12 | from datetime import datetime 13 | from io import BytesIO 14 | from pathlib import Path 15 | 16 | logging.basicConfig( 17 | format="%(asctime)s [%(levelname)s] %(message)s", 18 | level=logging.INFO, 19 | ) 20 | 21 | 22 | class RSS: 23 | def __init__(self, rss_config_path="rss.yaml", rss_database_path="rss_database.zip", request_timeout=10.0): 24 | self.CONSUMER_KEY = os.environ.get("CONSUMER_KEY", None) 25 | self.ACCESS_TOKEN = os.environ.get("ACCESS_TOKEN", None) 26 | self.URL = "https://getpocket.com/v3/add" 27 | 28 | self.NOW = datetime.now() 29 | self.REQUEST_TIMEOUT = request_timeout 30 | self.rss_config_path = rss_config_path 31 | self.rss_database_path = rss_database_path 32 | self.rss_configs = None 33 | self.rss_database = pd.DataFrame( 34 | columns=[ 35 | "feed_url", 36 | "saved_item_link_latest", 37 | "saved_item_link_second_latest", 38 | "updated_time", 39 | ] 40 | ) 41 | 42 | def addArticle(self, url, tags=[]): 43 | tags.append("feed") 44 | data = { 45 | "consumer_key": self.CONSUMER_KEY, 46 | "access_token": self.ACCESS_TOKEN, 47 | "url": url, 48 | "tags": ",".join(tags), 49 | } 50 | ret = requests.post(self.URL, data=data) 51 | if ret.text == "": 52 | logging.error("Pocket return empty string") 53 | return False 54 | ret = json.loads(ret.text) 55 | if ret.get("status", None) is None: 56 | logging.error("%s: %s", ret.get("error", ""), ret.get("message", "")) 57 | # exit() 58 | return False 59 | return ret["status"] 60 | 61 | def openRSSConfig(self): 62 | if os.path.exists(self.rss_config_path): 63 | with open(self.rss_config_path, "r") as stream: 64 | try: 65 | self.rss_configs = yaml.safe_load(stream) 66 | except Exception as e: 67 | logging.error(f"Unexpected error when parsing yaml: {str(e)}") 68 | exit() 69 | else: 70 | logging.error(f"{self.rss_config_path} not exists.") 71 | exit() 72 | 73 | def readRSSDatabase(self): 74 | if os.path.exists(self.rss_database_path): 75 | self.rss_database = pd.read_csv(self.rss_database_path) 76 | else: 77 | self.rss_database = pd.DataFrame( 78 | columns=[ 79 | "feed_url", 80 | "saved_item_link_latest", 81 | "saved_item_link_second_latest", 82 | "updated_time", 83 | ] 84 | ) 85 | 86 | def getLastTimeRSSData(self, rss_url): 87 | feed_location = self.rss_database["feed_url"] == rss_url 88 | idx = self.rss_database[feed_location].index.values[0] 89 | link_latest = self.rss_database.loc[idx, "saved_item_link_latest"] 90 | link_second_latest = self.rss_database.loc[idx, "saved_item_link_second_latest"] 91 | return idx, link_latest, link_second_latest 92 | 93 | def saveRSSDatabase(self): 94 | # Save the rss database 95 | archive_name = Path(self.rss_database_path).with_suffix(".csv").name 96 | self.rss_database.sort_values("feed_url").to_csv(archive_name, index=False) 97 | 98 | # This is for CLI user 99 | # As for GitHub Action user, the GitHub Action will compress the csv to zip 100 | # file when running upload-artifact@v2 101 | with zipfile.ZipFile(self.rss_database_path, "w") as zf: 102 | zf.write("rss_database.csv") 103 | 104 | def run(self): 105 | # Iter all the feed configs 106 | for rss_config in self.rss_configs: 107 | # Get the feed config 108 | rss_url = rss_config["url"] 109 | rss_tags = rss_config.get("tags", ["feed"]) 110 | rss_filter = rss_config.get("filter", "") 111 | 112 | # Get the feed content 113 | logging.info(f"Checking {rss_url}") 114 | try: 115 | resp = requests.get(rss_url, timeout=self.REQUEST_TIMEOUT) 116 | except requests.ReadTimeout: 117 | logging.warning(f"Timeout when reading feed: {rss_url}") 118 | continue 119 | except requests.ConnectionError: 120 | logging.warning(f"Cannot access feed: {rss_url}") 121 | continue 122 | except Exception as e: 123 | logging.error(f"Unexpected error: {str(e)}") 124 | continue 125 | content = BytesIO(resp.content) 126 | Feed = feedparser.parse(content) 127 | 128 | # Check the feed is first run or not 129 | flag_first_run = False 130 | if rss_url not in self.rss_database["feed_url"].values: 131 | self.rss_database.loc[-1] = { 132 | "feed_url": rss_url, 133 | "saved_item_link_latest": None, 134 | "saved_item_link_second_latest": None, 135 | "updated_time": None, 136 | } 137 | self.rss_database.index = self.rss_database.index + 1 138 | flag_first_run = True 139 | 140 | # Get last time rss data 141 | idx, link_latest, link_second_latest = self.getLastTimeRSSData(rss_url) 142 | 143 | # Sort the article according to the published time 144 | try: 145 | entries = Feed.get("entries", []) 146 | entries = sorted(entries, key=lambda e: e.published_parsed, reverse=True) 147 | except Exception as e: 148 | entries = Feed.get("entries", []) 149 | logging.warning(f"Feed doesn't support published_parsed attribute: {rss_url}") 150 | 151 | # Iter the article in the feed 152 | for entry in entries: 153 | # Break if added 154 | if (entry.link == link_latest) or (entry.link == link_second_latest): 155 | break 156 | 157 | # Print article information 158 | entry_published_time = entry.get("published", None) 159 | logging.info(f"Article Info:\n\tTitle: {entry.title}\n\tPublished time: {entry_published_time}\n\tLink: {entry.link}") 160 | 161 | # Add the article 162 | if self.addArticle(entry.link, rss_tags): 163 | logging.info("Article added") 164 | 165 | # Update the rss database 166 | if self.rss_database.loc[idx, "updated_time"] != self.NOW: 167 | self.rss_database.loc[idx, "saved_item_link_second_latest"] = link_latest 168 | self.rss_database.loc[idx, "saved_item_link_latest"] = entry.link 169 | self.rss_database.loc[idx, "updated_time"] = self.NOW 170 | else: 171 | logging.warning(f"Article not added: {entry.link}") 172 | 173 | # Add only one article when first run 174 | if flag_first_run: 175 | break 176 | 177 | 178 | if __name__ == "__main__": 179 | rss2pocket = RSS() 180 | rss2pocket.openRSSConfig() 181 | rss2pocket.readRSSDatabase() 182 | rss2pocket.run() 183 | rss2pocket.saveRSSDatabase() 184 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | feedparser<=7.0.0,>6.0.0 2 | pandas 3 | pyyaml 4 | requests 5 | -------------------------------------------------------------------------------- /rss.yaml: -------------------------------------------------------------------------------- 1 | # Attributes: 2 | # Required: 3 | # url: feed URL 4 | # Optional: 5 | # name: feed name (not used in the program) 6 | # tags: tags applied in Pocket (need to be a list) 7 | # filter: only add the article title pass the filter (not implement) 8 | # 9 | # Example: 10 | # - url: https://example.com 11 | # name: Example Website 12 | # tags: [tag1, tag2] 13 | # filter: "Test" 14 | 15 | # Test for the wrong URL 16 | # - url: https://example.com 17 | # - url: https://a.c 18 | # - url: apple 19 | 20 | # Blog 21 | # - url: https://feeds.feedburner.com/playpc 22 | # name: 電腦玩物 23 | # tags: [blog] 24 | - url: https://feeds.feedburner.com/ruanyifeng 25 | name: 阮一峰的网络日志 26 | tags: [blog] 27 | # - url: https://feeds.feedburner.com/blogspot/gJZg 28 | # name: Google AI Blog 29 | # tags: [blog] 30 | # - url: https://feeds.feedburner.com/GDBcode 31 | # name: Google Developers Blog 32 | # tags: [blog] 33 | - url: https://feeds.feedburner.com/blogspot/RLXA 34 | name: Google Testing Blog 35 | tags: [blog] 36 | - url: https://distill.pub/rss.xml 37 | name: Distill 38 | tags: [blog] 39 | # - url: https://kexue.fm/feed 40 | # name: 科学空间 41 | # tags: [blog] 42 | - url: https://lilianweng.github.io/index.xml 43 | name: Lil’Log 44 | tags: [blog] 45 | # - url: https://rss.shab.fun/blogs/wordpress/haogroot.com/ 46 | # name: haogroot’s Blog 47 | # tags: [blog] 48 | # - url: https://cjting.me/index.xml 49 | # name: CJ Ting’s Blog 50 | # tags: [blog] 51 | - url: https://sspai.com/feed 52 | name: 少数派 53 | tags: [blog] 54 | - url: https://blog.lilydjwg.me/feed 55 | name: 依云’s Blog 56 | tags: [blog] 57 | - url: http://www.matrix67.com/blog/feed 58 | name: Matrix67 59 | tags: [blog] 60 | - url: https://research.ibm.com/haifa/ponderthis/rss/index.xml 61 | name: IBM Ponder This 62 | tags: [blog] 63 | - url: https://catcoding.me/atom.xml 64 | name: 程序员的喵 65 | tags: [blog] 66 | # - url: https://medium.com/feed/@jiayaushiau 67 | # name: Medium - Jia-Yau Shiau 68 | # tags: [blog] 69 | # - url: https://medium.com/feed/@netflixtechblog 70 | # name: Medium - Netflix Tech Blog 71 | # tags: [blog] 72 | # - url: https://www.robalni.org/posts/posts.rss 73 | # name: robalni.org 74 | # tags: [blog] 75 | - url: https://diygod.cc/feed 76 | name: DIYgod 77 | tags: [blog] 78 | - url: https://ivonblog.com/index.xml 79 | name: Ivon 80 | tags: [blog] 81 | - url: https://blog.kyomind.tw/atom.xml 82 | name: Code and Me 83 | tags: [blog] 84 | # - url: https://einverne.github.io/feed.xml 85 | # name: Verne in GitHub 86 | # tags: [blog] 87 | - url: https://jvns.ca/atom.xml 88 | name: Julia Evans 89 | tags: [blog] 90 | 91 | 92 | # Game 93 | # - url: https://gamejilu.com/rss/ 94 | # name: Gamejilu 遊戲筆記 95 | # tags: [game] 96 | - url: https://www.4gamers.com.tw/rss/latest-news 97 | name: 4gamers 98 | tags: [game] 99 | 100 | # News 101 | - url: https://rsshub.rssforever.com/github/issue/headllines/hackernews-daily 102 | name: Hacker News 103 | tags: [news] 104 | # - url: https://www.ithome.com.tw/rss 105 | # name: iThome 新聞 106 | # tags: [news] 107 | # - url: https://global.udn.com/rss/news/1020/8662 108 | # name: udn 轉角國際 109 | # tags: [news] 110 | # - url: https://www.twreporter.org/a/rss2.xml 111 | # name: 報導者 The Reporter 112 | # tags: [news] 113 | # - url: https://www.phoronix.com/rss.php 114 | # name: Phoronix 115 | # tags: [news] 116 | - url: https://archlinux.org/feeds/news/ 117 | name: Arch News 118 | tags: [news] 119 | - url: https://hub.slarker.me/zhihu/daily 120 | name: 知乎日报 121 | tags: [news] 122 | # - url: https://thisweek.gnome.org/index.xml 123 | # name: This Week in Gnome 124 | # tags: [news] 125 | 126 | # Software 127 | - url: https://feeds.feedburner.com/freegroup 128 | name: 免費資源網路社群 129 | tags: [software] 130 | - url: https://feeds.appinn.com/appinns/ 131 | name: 小众软件 132 | tags: [software] 133 | 134 | # Story 135 | # - url: https://rsshub.feeded.xyz/wxkol/show/3239197880 136 | # name: 魔宙 137 | # tags: [story] 138 | --------------------------------------------------------------------------------