├── .github
└── workflows
│ ├── renew.yml.bak
│ └── run.yml.bak
├── LICENSE.txt
├── README.md
├── img
└── GitHub_Action_Step3.png
├── last_renew_time.txt
├── main.py
├── requirements.txt
└── rss.yaml
/.github/workflows/renew.yml.bak:
--------------------------------------------------------------------------------
1 | name: Renew CI
2 |
3 | on:
4 | schedule:
5 | - cron: '0 0 1 * *'
6 |
7 | jobs:
8 | renew:
9 | runs-on: ubuntu-latest
10 |
11 | steps:
12 | - name: Checkout codes
13 | uses: actions/checkout@v3
14 | - name: Renew
15 | run: |
16 | echo $(date '+%Y-%m-%d %H:%M:%S') > last_renew_time.txt
17 | - name: push changes to remote
18 | run: |
19 | git config --global user.name "renew"
20 | git config --global user.email "renew@example.com"
21 |
22 | git add last_renew_time.txt
23 | git commit -m "Renew"
24 | git push
25 |
26 |
--------------------------------------------------------------------------------
/.github/workflows/run.yml.bak:
--------------------------------------------------------------------------------
1 | name: Run CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | schedule:
8 | - cron: '0 * * * *'
9 |
10 | jobs:
11 | update:
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - name: Checkout codes
16 | uses: actions/checkout@v4
17 | - name: Set up Python 3.8
18 | uses: actions/setup-python@v5
19 | with:
20 | python-version: '3.8'
21 | - name: Install system dependencies
22 | run: sudo apt install jq curl
23 | - name: Cache python dependencies
24 | uses: actions/cache@v4
25 | with:
26 | path: ~/.cache/pip
27 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
28 | restore-keys: |
29 | ${{ runner.os }}-pip-
30 | - name: Install python dependencies
31 | run: |
32 | python -m pip install --upgrade pip
33 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
34 | - name: Get previous rss_database.csv
35 | uses: dawidd6/action-download-artifact@v8
36 | with:
37 | skip_unpack: true
38 | if_no_artifact_found: ignore
39 | - name: Run code
40 | env:
41 | CONSUMER_KEY: ${{ secrets.CONSUMER_KEY }}
42 | ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }}
43 | run: |
44 | python main.py
45 | - name: Upload rss_database.csv
46 | uses: actions/upload-artifact@v4
47 | with:
48 | name: rss_database
49 | path: rss_database.csv
50 |
51 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Hank Liao
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
RSS2Pocket
3 |
4 |
5 | An awesome tool to save articles from RSS feed to Pocket automatically.
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | **Note: Pocket will no longer be available after July 8, 2025.**
26 | https://support.mozilla.org/en-US/kb/future-of-pocket
27 |
28 | ## About the Project
29 | I used to use IFTTT to [save articles from RSS feed to Pocket](https://ifttt.com/applets/gnf8UbSV).
30 | But in Sept. 2020, [IFTTT starts the Pro plan which makes the Standard plan can only create 3 applets](https://ifttt.com/plans).
31 | Therefore, I try to use GitHub Action to do the jobs.
32 |
33 | This project will execute the python script every hour,
34 | and it will save articles from RSS feed to Pocket.
35 |
36 | ## Getting Started
37 |
38 | ### Install Dependencies (CLI only)
39 | 1. `$ python3 -m pip install --upgrade pip`
40 | 2. `$ pip3 install -r requirements.txt`
41 |
42 | ### Get Pocket Token
43 | Please follow
44 | [GETTING STARTED WITH THE POCKET DEVELOPER API](https://www.jamesfmackenzie.com/getting-started-with-the-pocket-developer-api/)
45 | to get `consumer_key` and `access_token`.
46 |
47 | ## Usage
48 |
49 | ### GitHub Action
50 | 1. Fork this project
51 | 2. Edit rss.yaml
52 | 3. Fill `consumer_key` and `access_token` in the Secrets tab in Settings of the repository.
53 |
54 |
55 |
56 | [](img/GitHub_Action_Step3.png)
57 |
58 |
59 |
60 | ### CLI
61 | 1. Edit rss.yaml
62 | 2. `$ CONSUMER_KEY='consumer_key' ACCESS_TOKEN='access_token' python3 main.py`
63 |
64 |
65 | ## Known Issues
66 |
67 | ## Contributing
68 |
69 | Contributions are what make the open source community such an amazing place to be learn, inspire, and create. Any contributions you make are **greatly appreciated**.
70 |
71 | 1. Fork the Project
72 | 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
73 | 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
74 | 4. Push to the Branch (`git push origin feature/AmazingFeature`)
75 | 5. Open a Pull Request
76 |
77 | ## License
78 |
79 | Distributed under the MIT License. See `LICENSE` for more information.
80 |
81 | ## Contact
82 |
83 | Project Link: [https://github.com/hankhjliao/RSS2Pocket](https://github.com/hankhjliao/RSS2Pocket)
84 |
85 | ## Acknowledgements
86 | - [GETTING STARTED WITH THE POCKET DEVELOPER API](https://www.jamesfmackenzie.com/getting-started-with-the-pocket-developer-api/)
87 | - [Pocket API: Documentation Overview](https://getpocket.com/developer/docs/overview)
88 | - [Best README Template](https://github.com/othneildrew/Best-README-Template)
89 |
90 |
--------------------------------------------------------------------------------
/img/GitHub_Action_Step3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hankhjliao/RSS2Pocket/41a486b7859d437200a12ac23fbcfc994d0569b7/img/GitHub_Action_Step3.png
--------------------------------------------------------------------------------
/last_renew_time.txt:
--------------------------------------------------------------------------------
1 | 2025-05-01 02:03:56
2 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import logging
4 | import os
5 | import zipfile
6 |
7 | import feedparser
8 | import pandas as pd
9 | import requests
10 | import yaml
11 |
12 | from datetime import datetime
13 | from io import BytesIO
14 | from pathlib import Path
15 |
16 | logging.basicConfig(
17 | format="%(asctime)s [%(levelname)s] %(message)s",
18 | level=logging.INFO,
19 | )
20 |
21 |
22 | class RSS:
23 | def __init__(self, rss_config_path="rss.yaml", rss_database_path="rss_database.zip", request_timeout=10.0):
24 | self.CONSUMER_KEY = os.environ.get("CONSUMER_KEY", None)
25 | self.ACCESS_TOKEN = os.environ.get("ACCESS_TOKEN", None)
26 | self.URL = "https://getpocket.com/v3/add"
27 |
28 | self.NOW = datetime.now()
29 | self.REQUEST_TIMEOUT = request_timeout
30 | self.rss_config_path = rss_config_path
31 | self.rss_database_path = rss_database_path
32 | self.rss_configs = None
33 | self.rss_database = pd.DataFrame(
34 | columns=[
35 | "feed_url",
36 | "saved_item_link_latest",
37 | "saved_item_link_second_latest",
38 | "updated_time",
39 | ]
40 | )
41 |
42 | def addArticle(self, url, tags=[]):
43 | tags.append("feed")
44 | data = {
45 | "consumer_key": self.CONSUMER_KEY,
46 | "access_token": self.ACCESS_TOKEN,
47 | "url": url,
48 | "tags": ",".join(tags),
49 | }
50 | ret = requests.post(self.URL, data=data)
51 | if ret.text == "":
52 | logging.error("Pocket return empty string")
53 | return False
54 | ret = json.loads(ret.text)
55 | if ret.get("status", None) is None:
56 | logging.error("%s: %s", ret.get("error", ""), ret.get("message", ""))
57 | # exit()
58 | return False
59 | return ret["status"]
60 |
61 | def openRSSConfig(self):
62 | if os.path.exists(self.rss_config_path):
63 | with open(self.rss_config_path, "r") as stream:
64 | try:
65 | self.rss_configs = yaml.safe_load(stream)
66 | except Exception as e:
67 | logging.error(f"Unexpected error when parsing yaml: {str(e)}")
68 | exit()
69 | else:
70 | logging.error(f"{self.rss_config_path} not exists.")
71 | exit()
72 |
73 | def readRSSDatabase(self):
74 | if os.path.exists(self.rss_database_path):
75 | self.rss_database = pd.read_csv(self.rss_database_path)
76 | else:
77 | self.rss_database = pd.DataFrame(
78 | columns=[
79 | "feed_url",
80 | "saved_item_link_latest",
81 | "saved_item_link_second_latest",
82 | "updated_time",
83 | ]
84 | )
85 |
86 | def getLastTimeRSSData(self, rss_url):
87 | feed_location = self.rss_database["feed_url"] == rss_url
88 | idx = self.rss_database[feed_location].index.values[0]
89 | link_latest = self.rss_database.loc[idx, "saved_item_link_latest"]
90 | link_second_latest = self.rss_database.loc[idx, "saved_item_link_second_latest"]
91 | return idx, link_latest, link_second_latest
92 |
93 | def saveRSSDatabase(self):
94 | # Save the rss database
95 | archive_name = Path(self.rss_database_path).with_suffix(".csv").name
96 | self.rss_database.sort_values("feed_url").to_csv(archive_name, index=False)
97 |
98 | # This is for CLI user
99 | # As for GitHub Action user, the GitHub Action will compress the csv to zip
100 | # file when running upload-artifact@v2
101 | with zipfile.ZipFile(self.rss_database_path, "w") as zf:
102 | zf.write("rss_database.csv")
103 |
104 | def run(self):
105 | # Iter all the feed configs
106 | for rss_config in self.rss_configs:
107 | # Get the feed config
108 | rss_url = rss_config["url"]
109 | rss_tags = rss_config.get("tags", ["feed"])
110 | rss_filter = rss_config.get("filter", "")
111 |
112 | # Get the feed content
113 | logging.info(f"Checking {rss_url}")
114 | try:
115 | resp = requests.get(rss_url, timeout=self.REQUEST_TIMEOUT)
116 | except requests.ReadTimeout:
117 | logging.warning(f"Timeout when reading feed: {rss_url}")
118 | continue
119 | except requests.ConnectionError:
120 | logging.warning(f"Cannot access feed: {rss_url}")
121 | continue
122 | except Exception as e:
123 | logging.error(f"Unexpected error: {str(e)}")
124 | continue
125 | content = BytesIO(resp.content)
126 | Feed = feedparser.parse(content)
127 |
128 | # Check the feed is first run or not
129 | flag_first_run = False
130 | if rss_url not in self.rss_database["feed_url"].values:
131 | self.rss_database.loc[-1] = {
132 | "feed_url": rss_url,
133 | "saved_item_link_latest": None,
134 | "saved_item_link_second_latest": None,
135 | "updated_time": None,
136 | }
137 | self.rss_database.index = self.rss_database.index + 1
138 | flag_first_run = True
139 |
140 | # Get last time rss data
141 | idx, link_latest, link_second_latest = self.getLastTimeRSSData(rss_url)
142 |
143 | # Sort the article according to the published time
144 | try:
145 | entries = Feed.get("entries", [])
146 | entries = sorted(entries, key=lambda e: e.published_parsed, reverse=True)
147 | except Exception as e:
148 | entries = Feed.get("entries", [])
149 | logging.warning(f"Feed doesn't support published_parsed attribute: {rss_url}")
150 |
151 | # Iter the article in the feed
152 | for entry in entries:
153 | # Break if added
154 | if (entry.link == link_latest) or (entry.link == link_second_latest):
155 | break
156 |
157 | # Print article information
158 | entry_published_time = entry.get("published", None)
159 | logging.info(f"Article Info:\n\tTitle: {entry.title}\n\tPublished time: {entry_published_time}\n\tLink: {entry.link}")
160 |
161 | # Add the article
162 | if self.addArticle(entry.link, rss_tags):
163 | logging.info("Article added")
164 |
165 | # Update the rss database
166 | if self.rss_database.loc[idx, "updated_time"] != self.NOW:
167 | self.rss_database.loc[idx, "saved_item_link_second_latest"] = link_latest
168 | self.rss_database.loc[idx, "saved_item_link_latest"] = entry.link
169 | self.rss_database.loc[idx, "updated_time"] = self.NOW
170 | else:
171 | logging.warning(f"Article not added: {entry.link}")
172 |
173 | # Add only one article when first run
174 | if flag_first_run:
175 | break
176 |
177 |
178 | if __name__ == "__main__":
179 | rss2pocket = RSS()
180 | rss2pocket.openRSSConfig()
181 | rss2pocket.readRSSDatabase()
182 | rss2pocket.run()
183 | rss2pocket.saveRSSDatabase()
184 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | feedparser<=7.0.0,>6.0.0
2 | pandas
3 | pyyaml
4 | requests
5 |
--------------------------------------------------------------------------------
/rss.yaml:
--------------------------------------------------------------------------------
1 | # Attributes:
2 | # Required:
3 | # url: feed URL
4 | # Optional:
5 | # name: feed name (not used in the program)
6 | # tags: tags applied in Pocket (need to be a list)
7 | # filter: only add the article title pass the filter (not implement)
8 | #
9 | # Example:
10 | # - url: https://example.com
11 | # name: Example Website
12 | # tags: [tag1, tag2]
13 | # filter: "Test"
14 |
15 | # Test for the wrong URL
16 | # - url: https://example.com
17 | # - url: https://a.c
18 | # - url: apple
19 |
20 | # Blog
21 | # - url: https://feeds.feedburner.com/playpc
22 | # name: 電腦玩物
23 | # tags: [blog]
24 | - url: https://feeds.feedburner.com/ruanyifeng
25 | name: 阮一峰的网络日志
26 | tags: [blog]
27 | # - url: https://feeds.feedburner.com/blogspot/gJZg
28 | # name: Google AI Blog
29 | # tags: [blog]
30 | # - url: https://feeds.feedburner.com/GDBcode
31 | # name: Google Developers Blog
32 | # tags: [blog]
33 | - url: https://feeds.feedburner.com/blogspot/RLXA
34 | name: Google Testing Blog
35 | tags: [blog]
36 | - url: https://distill.pub/rss.xml
37 | name: Distill
38 | tags: [blog]
39 | # - url: https://kexue.fm/feed
40 | # name: 科学空间
41 | # tags: [blog]
42 | - url: https://lilianweng.github.io/index.xml
43 | name: Lil’Log
44 | tags: [blog]
45 | # - url: https://rss.shab.fun/blogs/wordpress/haogroot.com/
46 | # name: haogroot’s Blog
47 | # tags: [blog]
48 | # - url: https://cjting.me/index.xml
49 | # name: CJ Ting’s Blog
50 | # tags: [blog]
51 | - url: https://sspai.com/feed
52 | name: 少数派
53 | tags: [blog]
54 | - url: https://blog.lilydjwg.me/feed
55 | name: 依云’s Blog
56 | tags: [blog]
57 | - url: http://www.matrix67.com/blog/feed
58 | name: Matrix67
59 | tags: [blog]
60 | - url: https://research.ibm.com/haifa/ponderthis/rss/index.xml
61 | name: IBM Ponder This
62 | tags: [blog]
63 | - url: https://catcoding.me/atom.xml
64 | name: 程序员的喵
65 | tags: [blog]
66 | # - url: https://medium.com/feed/@jiayaushiau
67 | # name: Medium - Jia-Yau Shiau
68 | # tags: [blog]
69 | # - url: https://medium.com/feed/@netflixtechblog
70 | # name: Medium - Netflix Tech Blog
71 | # tags: [blog]
72 | # - url: https://www.robalni.org/posts/posts.rss
73 | # name: robalni.org
74 | # tags: [blog]
75 | - url: https://diygod.cc/feed
76 | name: DIYgod
77 | tags: [blog]
78 | - url: https://ivonblog.com/index.xml
79 | name: Ivon
80 | tags: [blog]
81 | - url: https://blog.kyomind.tw/atom.xml
82 | name: Code and Me
83 | tags: [blog]
84 | # - url: https://einverne.github.io/feed.xml
85 | # name: Verne in GitHub
86 | # tags: [blog]
87 | - url: https://jvns.ca/atom.xml
88 | name: Julia Evans
89 | tags: [blog]
90 |
91 |
92 | # Game
93 | # - url: https://gamejilu.com/rss/
94 | # name: Gamejilu 遊戲筆記
95 | # tags: [game]
96 | - url: https://www.4gamers.com.tw/rss/latest-news
97 | name: 4gamers
98 | tags: [game]
99 |
100 | # News
101 | - url: https://rsshub.rssforever.com/github/issue/headllines/hackernews-daily
102 | name: Hacker News
103 | tags: [news]
104 | # - url: https://www.ithome.com.tw/rss
105 | # name: iThome 新聞
106 | # tags: [news]
107 | # - url: https://global.udn.com/rss/news/1020/8662
108 | # name: udn 轉角國際
109 | # tags: [news]
110 | # - url: https://www.twreporter.org/a/rss2.xml
111 | # name: 報導者 The Reporter
112 | # tags: [news]
113 | # - url: https://www.phoronix.com/rss.php
114 | # name: Phoronix
115 | # tags: [news]
116 | - url: https://archlinux.org/feeds/news/
117 | name: Arch News
118 | tags: [news]
119 | - url: https://hub.slarker.me/zhihu/daily
120 | name: 知乎日报
121 | tags: [news]
122 | # - url: https://thisweek.gnome.org/index.xml
123 | # name: This Week in Gnome
124 | # tags: [news]
125 |
126 | # Software
127 | - url: https://feeds.feedburner.com/freegroup
128 | name: 免費資源網路社群
129 | tags: [software]
130 | - url: https://feeds.appinn.com/appinns/
131 | name: 小众软件
132 | tags: [software]
133 |
134 | # Story
135 | # - url: https://rsshub.feeded.xyz/wxkol/show/3239197880
136 | # name: 魔宙
137 | # tags: [story]
138 |
--------------------------------------------------------------------------------