├── codespell-ignore-words.txt ├── inoreader ├── __init__.py ├── exception.py ├── consts.py ├── subscription.py ├── filter.py ├── article.py ├── utils.py ├── config.py ├── sim.py ├── client.py └── main.py ├── .travis.yml ├── requirements.txt ├── .github └── workflows │ ├── pre-commit.yaml │ └── publish.yaml ├── examples └── rules.example.yaml ├── Makefile ├── README.md ├── .gitignore ├── .pre-commit-config.yaml ├── pyproject.toml └── CHANGELOG.md /codespell-ignore-words.txt: -------------------------------------------------------------------------------- 1 | ot 2 | -------------------------------------------------------------------------------- /inoreader/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from .client import InoreaderClient 3 | 4 | __all__ = ["InoreaderClient"] 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.6" 5 | 6 | install: "make deps" 7 | 8 | script: 9 | - "make lint" 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | requests 3 | PyYAML 4 | click 5 | flask 6 | requests-oauthlib 7 | tabulate 8 | flake8 9 | pytest 10 | black 11 | isort 12 | -------------------------------------------------------------------------------- /inoreader/exception.py: -------------------------------------------------------------------------------- 1 | class NotLoginError(ValueError): 2 | def __repr__(self): 3 | return "" 4 | 5 | 6 | class APIError(ValueError): 7 | def __repr__(self): 8 | return "" 9 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [master] 7 | 8 | jobs: 9 | pre-commit: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | - uses: actions/setup-python@v3 14 | - uses: pre-commit/action@v3.0.0 15 | -------------------------------------------------------------------------------- /inoreader/consts.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | 4 | BASE_URL = "https://www.inoreader.com/reader/api/0/" 5 | LOGIN_URL = "https://www.inoreader.com/accounts/ClientLogin" 6 | 7 | DEFAULT_APPID = "your_app_id" 8 | DEFAULT_APPKEY = "your_app_key" 9 | 10 | CONFIG_FILE = os.path.join(os.environ.get("HOME"), ".inoreader") 11 | -------------------------------------------------------------------------------- /examples/rules.example.yaml: -------------------------------------------------------------------------------- 1 | - name: test 2 | folders: 3 | - inbox 4 | fields: ["title", "content"] 5 | filter: 6 | type: include_any # include_any/include_all/exclude 7 | rules: 8 | - 权威认证 9 | - 值得拥有 10 | - 潮牌 11 | - 新款 12 | - 求助 13 | - 招聘 14 | - 免费版 15 | - 包邮 16 | - 天猫 17 | - 淘宝 18 | actions: 19 | - type: mark_as_read 20 | - type: tag 21 | tags: tag1,tag2,tag3 22 | - type: like 23 | - type: star 24 | - type: broadcast 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | lint: clean 2 | - pip install ruff codespell -q 3 | - ruff check inoreader/ 4 | - codespell 5 | 6 | format: 7 | - pip install ruff -q 8 | - ruff format inoreader/ 9 | 10 | clean: 11 | - find . -iname "*__pycache__" | xargs rm -rf 12 | - find . -iname "*.pyc" | xargs rm -rf 13 | - rm cobertura.xml -f 14 | - rm testresult.xml -f 15 | - rm .coverage -f 16 | - rm .pytest_cache -rf 17 | 18 | venv: 19 | - virtualenv --python=$(shell which python3) --prompt '' venv 20 | 21 | lock-requirements: 22 | - pip install pip-tools -q 23 | - pip-compile -o requirements.txt 24 | 25 | deps: lock-requirements 26 | - pip-sync 27 | 28 | build: lint test 29 | - python -m build 30 | -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: Build distribution 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test: 7 | runs-on: "ubuntu-latest" 8 | 9 | steps: 10 | - uses: actions/checkout@v3 11 | - uses: actions/setup-python@v3 12 | with: 13 | python-version: 3.8 14 | 15 | - name: Install build dependencies 16 | run: python -m pip install build wheel 17 | 18 | - name: Build distributions 19 | shell: bash -l {0} 20 | run: python -m build 21 | 22 | - name: Publish package to PyPI 23 | if: github.repository == 'Linusp/python-inoreader' && github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 24 | uses: pypa/gh-action-pypi-publish@master 25 | with: 26 | user: __token__ 27 | password: ${{ secrets.pypi_password }} 28 | -------------------------------------------------------------------------------- /inoreader/subscription.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | class Subscription(object): 6 | def __init__(self, id, title, categories, sortid, firstitemmsec, url, htmlUrl, iconUrl): 7 | self.id = id 8 | self.title = title 9 | self.categories = categories 10 | self.sortid = sortid 11 | self.firstitemmsec = firstitemmsec 12 | self.url = url 13 | self.htmlUrl = htmlUrl 14 | self.iconUrl = iconUrl 15 | 16 | @classmethod 17 | def from_json(cls, data): 18 | subscription_info = { 19 | "id": data["id"], 20 | "title": data["title"], 21 | "categories": list(data["categories"]), 22 | "sortid": data["sortid"], 23 | "firstitemmsec": data["firstitemmsec"], 24 | "url": data["url"], 25 | "htmlUrl": data["htmlUrl"], 26 | "iconUrl": data["iconUrl"], 27 | } 28 | return cls(**subscription_info) 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Inoreader 2 | ========= 3 | 4 | ![](https://api.travis-ci.org/Linusp/python-inoreader.png?branch=master) 5 | 6 | Python wrapper of Inoreader API. 7 | 8 | ## Installation 9 | 10 | Install with `pip` directly 11 | 12 | ```shell 13 | pip install python-inoreader 14 | ``` 15 | 16 | ## Usage 17 | 18 | 1. [Register your application](https://www.inoreader.com/developers/register-app). Use `http://localhost:8080/oauth/redirect` for the redirect URI and set scope to "Read and Write". Then create the configuration file `$HOME/.inoreader` 19 | 20 | An example of the configuration file: 21 | 22 | ``` 23 | [auth] 24 | appid = your_app_id 25 | appkey = your_app_key 26 | ``` 27 | 28 | 2. Login to your Inoreader account 29 | 30 | ```shell 31 | inoreader login 32 | ``` 33 | 34 | 2. Use the command line tool `inoreader` to do something, run `inoreader --help` for details. Or in code do: 35 | 36 | ```python 37 | from inoreader.main import get_client 38 | client = get_client() 39 | ``` 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Django stuff: 33 | *.log 34 | local_settings.py 35 | 36 | # Flask stuff: 37 | instance/ 38 | .webassets-cache 39 | 40 | # Scrapy stuff: 41 | .scrapy 42 | 43 | # Sphinx documentation 44 | docs/_build/ 45 | 46 | # PyBuilder 47 | target/ 48 | 49 | # IPython Notebook 50 | .ipynb_checkpoints 51 | 52 | # pyenv 53 | .python-version 54 | 55 | # dotenv 56 | .env 57 | 58 | # virtualenv 59 | env/ 60 | venv/ 61 | ENV/ 62 | 63 | # Spyder project settings 64 | .spyderproject 65 | .idea/* 66 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | fail_fast: true 2 | repos: 3 | - repo: meta 4 | hooks: 5 | - id: check-hooks-apply 6 | - id: check-useless-excludes 7 | 8 | - repo: https://github.com/pre-commit/pre-commit-hooks 9 | rev: v4.1.0 10 | hooks: 11 | - id: check-merge-conflict 12 | - id: check-yaml 13 | - id: end-of-file-fixer 14 | exclude: | 15 | (?x)( 16 | .md$| 17 | ^.gitignore| 18 | ^.ipython/| 19 | ^.pytest_cache/ 20 | ) 21 | - id: trailing-whitespace 22 | exclude: | 23 | (?x)( 24 | .md$| 25 | ^.ipython/| 26 | ^.pytest_cache/ 27 | ) 28 | 29 | - repo: https://github.com/astral-sh/ruff-pre-commit 30 | rev: v0.3.3 31 | hooks: 32 | - id: ruff 33 | args: [ --fix ] 34 | - id: ruff-format 35 | 36 | - repo: https://github.com/codespell-project/codespell 37 | rev: v2.1.0 38 | hooks: 39 | - id: codespell 40 | name: codespell 41 | entry: codespell -q 3 -S "*.lock" -I codespell-ignore-words.txt --regex="[a-zA-Z0-9\-'’`]+" 42 | types: [text] 43 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "python-inoreader" 3 | version = "0.7.1" 4 | description = "Python wrapper of Inoreader API" 5 | authors = [ 6 | {name = "Linusp", email = "linusp1024@gmail.com"}, 7 | ] 8 | dependencies = [ 9 | "lxml", 10 | "requests", 11 | "PyYAML", 12 | "click", 13 | "requests-oauthlib", 14 | "flask", 15 | "tabulate", 16 | ] 17 | requires-python = ">=3.6" 18 | readme = "README.md" 19 | license = {text = "MIT"} 20 | classifiers = [ 21 | "License :: OSI Approved :: MIT License", 22 | "Programming Language :: Python", 23 | "Programming Language :: Python :: 3", 24 | ] 25 | 26 | [build-system] 27 | requires = ["setuptools", "wheel"] 28 | build-backend = "setuptools.build_meta" 29 | 30 | [project.scripts] 31 | inoreader = "inoreader.main:main" 32 | 33 | [project.urls] 34 | Homepage = "https://github.com/Linusp/python-inoreader" 35 | 36 | [tool.codespell] 37 | skip = "*.lock,./.tox,./.venv,./.git,./venv,./*.json,./*.jsonl,./*.yaml" 38 | quiet-level = 3 39 | ignore-words-list = "inoreader" 40 | count = "" 41 | 42 | [tool.ruff] 43 | exclude = [ 44 | ".bzr", 45 | ".direnv", 46 | ".eggs", 47 | ".git", 48 | ".git-rewrite", 49 | ".hg", 50 | ".ipynb_checkpoints", 51 | ".mypy_cache", 52 | ".nox", 53 | ".pants.d", 54 | ".pyenv", 55 | ".pytest_cache", 56 | ".pytype", 57 | ".ruff_cache", 58 | ".svn", 59 | ".tox", 60 | ".venv", 61 | ".vscode", 62 | "__pypackages__", 63 | "_build", 64 | "buck-out", 65 | "build", 66 | "dist", 67 | "node_modules", 68 | "site-packages", 69 | "venv", 70 | ] 71 | line-length = 100 72 | indent-width = 4 73 | 74 | [tool.ruff.lint] 75 | select = ["E", "F"] 76 | ignore = ["E201", "E202"] 77 | -------------------------------------------------------------------------------- /inoreader/filter.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | _FILTERS = {} 4 | 5 | 6 | def register_filter(name, override=False): 7 | def wrap(cls): 8 | global _FILTERS 9 | if name not in _FILTERS or override: 10 | _FILTERS[name] = cls 11 | 12 | return cls 13 | 14 | return wrap 15 | 16 | 17 | @register_filter("include_any") 18 | class IncludeAnyFilter(object): 19 | def __init__(self, rules): 20 | self.rules = [re.compile(regexp, re.IGNORECASE) for regexp in rules] 21 | 22 | def validate(self, text): 23 | for regexp in self.rules: 24 | if regexp.findall(text): 25 | return True 26 | 27 | return False 28 | 29 | 30 | @register_filter("include_all") 31 | class IncludeAllFilter(object): 32 | def __init__(self, rules): 33 | self.rules = [re.compile(regexp, re.IGNORECASE) for regexp in rules] 34 | 35 | def validate(self, text): 36 | for regexp in self.rules: 37 | if not regexp.findall(text): 38 | return False 39 | 40 | return True 41 | 42 | 43 | @register_filter("exclude") 44 | class ExcludeFilter(object): 45 | def __init__(self, rules): 46 | self.rules = [re.compile(regexp, re.IGNORECASE) for regexp in rules] 47 | 48 | def validate(self, text): 49 | for regexp in self.rules: 50 | if regexp.findall(text): 51 | return False 52 | 53 | return True 54 | 55 | 56 | def get_filter(config): 57 | filter_type = config["type"] 58 | if filter_type not in _FILTERS: 59 | raise ValueError("unsupported filter type: {}".format(filter_type)) 60 | 61 | filter_cls = _FILTERS[filter_type] 62 | params = {k: v for k, v in config.items() if k != "type"} 63 | return filter_cls(**params) 64 | -------------------------------------------------------------------------------- /inoreader/article.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import print_function, unicode_literals 3 | 4 | from .utils import extract_text, normalize_whitespace 5 | 6 | 7 | class Article(object): 8 | def __init__( 9 | self, 10 | id, 11 | title, 12 | categories, 13 | link, 14 | published=None, 15 | content=None, 16 | author=None, 17 | feed_id=None, 18 | feed_title=None, 19 | feed_link=None, 20 | ): 21 | self.id = id 22 | self.title = normalize_whitespace(title) 23 | self.categories = categories 24 | self.link = link 25 | self.published = published 26 | self.content = content.strip() if content else "" 27 | self.text = extract_text(self.content) 28 | self.author = author 29 | self.feed_id = feed_id 30 | self.feed_title = feed_title.strip() 31 | self.feed_link = feed_link 32 | 33 | @classmethod 34 | def from_json(cls, data): 35 | article_data = { 36 | "id": data["id"], 37 | "title": data["title"], 38 | "categories": data["categories"], 39 | "published": data["published"], 40 | "content": data.get("summary", {}).get("content"), 41 | "author": data.get("author"), 42 | } 43 | links = [item["href"] for item in data["canonical"]] 44 | article_data["link"] = links[0] if links else "" 45 | 46 | # feed info 47 | article_data.update( 48 | { 49 | "feed_id": data["origin"]["streamId"], 50 | "feed_title": normalize_whitespace(data["origin"]["title"]), 51 | "feed_link": data["origin"]["htmlUrl"], 52 | } 53 | ) 54 | 55 | return cls(**article_data) 56 | -------------------------------------------------------------------------------- /inoreader/utils.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import print_function, unicode_literals 3 | 4 | import os 5 | import re 6 | import shutil 7 | 8 | import requests 9 | from lxml import html 10 | 11 | 12 | def normalize_whitespace(text): 13 | text = re.sub(r"[\n\r\t]", " ", text) 14 | text = re.sub(r" +", " ", text) 15 | 16 | return text.strip() 17 | 18 | 19 | def extract_text(html_content): 20 | if not html_content: 21 | return html_content 22 | 23 | content = html.fromstring(html_content) 24 | for img in content.iter("img"): 25 | img_src = img.get("src") 26 | img_alt = img.get("alt") or img_src 27 | if not img_src: 28 | continue 29 | 30 | img.text = "![%s](%s)" % (img_alt, img_src) 31 | 32 | for link in content.iter("a"): 33 | url = link.get("href") 34 | text = link.text or url 35 | if not url: 36 | continue 37 | 38 | link.text = "[%s](%s)" % (text, url) 39 | try: 40 | return content.text_content().replace("\xa0", "").strip() 41 | except Exception: 42 | return "" 43 | 44 | 45 | def download_image(url, path, filename, proxies=None): 46 | response = requests.get(url, stream=True, proxies=proxies) 47 | if response.status_code not in (200, 201): 48 | return None 49 | 50 | content_type = response.headers.get("Content-Type", "") 51 | if not content_type or not content_type.startswith("image/"): 52 | return None 53 | 54 | content_length = int(response.headers.get("Content-Length") or "0") 55 | if content_length <= 0: 56 | return None 57 | 58 | suffix = content_type.replace("image/", "") 59 | if suffix == "svg+xml": 60 | suffix = "svg" 61 | 62 | image_filename = filename + "." + suffix 63 | with open(os.path.join(path, image_filename), "wb") as f: 64 | response.raw.decode_content = True 65 | shutil.copyfileobj(response.raw, f) 66 | 67 | return image_filename 68 | -------------------------------------------------------------------------------- /inoreader/config.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import print_function, unicode_literals 3 | 4 | import codecs 5 | import os 6 | from configparser import ConfigParser 7 | 8 | 9 | class InoreaderConfigManager: 10 | def __init__(self, config_file): 11 | self.config_file = config_file 12 | self.data = {} 13 | if os.path.exists(config_file): 14 | self.load() 15 | 16 | def load(self): 17 | config_parser = ConfigParser() 18 | config_parser.read(self.config_file) 19 | for section_name in config_parser.sections(): 20 | self.data[section_name] = dict(config_parser[section_name]) 21 | 22 | def save(self): 23 | with codecs.open(self.config_file, mode="w", encoding="utf-8") as f: 24 | config_parser = ConfigParser() 25 | config_parser.update(self.data) 26 | config_parser.write(f) 27 | 28 | @property 29 | def app_id(self): 30 | return self.data.get("auth", {}).get("appid") 31 | 32 | @app_id.setter 33 | def app_id(self, value): 34 | self.data.setdefault("auth", {})["appid"] = value 35 | 36 | @property 37 | def app_key(self): 38 | return self.data.get("auth", {}).get("appkey") 39 | 40 | @app_key.setter 41 | def app_key(self, value): 42 | self.data.setdefault("auth", {})["appkey"] = value 43 | 44 | @property 45 | def access_token(self): 46 | return self.data.get("auth", {}).get("access_token") 47 | 48 | @access_token.setter 49 | def access_token(self, value): 50 | self.data.setdefault("auth", {})["access_token"] = value 51 | 52 | @property 53 | def refresh_token(self): 54 | return self.data.get("auth", {}).get("refresh_token") 55 | 56 | @refresh_token.setter 57 | def refresh_token(self, value): 58 | self.data.setdefault("auth", {})["refresh_token"] = value 59 | 60 | @property 61 | def expires_at(self): 62 | return self.data.get("auth", {}).get("expires_at") 63 | 64 | @expires_at.setter 65 | def expires_at(self, value): 66 | self.data.setdefault("auth", {})["expires_at"] = value 67 | 68 | @property 69 | def proxies(self): 70 | return self.data.get("proxies", {}) 71 | 72 | @proxies.setter 73 | def proxies(self, value): 74 | self.data["proxies"] = value 75 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## v0.7.1 4 | 5 | Changed 6 | 7 | - Optimized `InoreaderClient.fetch_articles` for less API calls when tags are specified 8 | 9 | ## v0.7.0 10 | 11 | Removed 12 | 13 | - Removed `InoreaderClient.get_stream_contents`. 14 | 15 | Changed 16 | 17 | - Add param `n` to `InoreaderClient.fetch_articles` to reduce the number of API calls, thanks to [tosborne-slalom](https://github.com/tosborne-slalom) 18 | - Supported `--batch-size` option in commands `fetch-articles`/`fetch-unread`/`fetch-starred` 19 | 20 | ## v0.6.0 21 | 22 | Publish to pypi! 23 | 24 | ## v0.5.0 25 | 26 | Added 27 | 28 | - New method: `Inoreader.edit_subscription` 29 | - New command `edit-subscription`, now you can do these with this command: 30 | 31 | - Subscribe a new feed 32 | - Unsubscribe a subscription 33 | - Rename a subscription 34 | - Add a subscription to a folder or remove it from a folder 35 | 36 | Thanks to [Robért](https://github.com/rsguhr) for his [advice](https://github.com/Linusp/python-inoreader/issues/25). 37 | 38 | ## v0.4.6 39 | 40 | Added 41 | 42 | - New methods: 43 | 44 | - `InoreaderClient.remove_general_label` 45 | - `InoreaderClient.remove_tag` 46 | - `InoreaderClient.remove_read` 47 | - `InoreaderClient.remove_starred` 48 | - `InoreaderClient.remove_liked` 49 | 50 | thanks to [tianchen zhong](https://github.com/cczhong11) 51 | 52 | Changed 53 | 54 | - Add param to `inoreader.main.get_client` for customizing the config file path, thanks to [tianchen zhong](https://github.com/cczhong11) 55 | - Command filter supported a new action `unstar` 56 | 57 | Fixed 58 | 59 | - Fix token in refresh_access_token method, thanks to [Torikova](https://github.com/Torikova) 60 | 61 | ## v0.4.5 62 | 63 | Changed 64 | 65 | - Fix `InoreaderClient.__get_stream_contents`, thanks to [BeautyYuYanli](https://github.com/BeautyYuYanli) 66 | 67 | ## v0.4.4 68 | 69 | Changed 70 | 71 | - Disable default app id and key due to abusion 72 | 73 | ## v0.4.3 74 | 75 | Fixed 76 | 77 | - Fix endless loop bug in `InoreaderClient.fetch_articles` 78 | 79 | ## v0.4.2 80 | 81 | Added 82 | 83 | - New functions: 84 | 85 | - `inoreader.utils.download_image` 86 | 87 | - New methods: 88 | 89 | - `InoreaderClient.fetch_articles` 90 | - `InoreaderClient.fetch_starred` 91 | 92 | - New command: `fetch-starred` 93 | 94 | Changed 95 | 96 | - Optimized article content parsing 97 | 98 | ## v0.4.1 99 | 100 | Added 101 | 102 | - New config `proxies` 103 | 104 | ## v0.4.0 105 | 106 | Added 107 | 108 | - New Class: `InoreaderConfigManager` for config management 109 | 110 | Changed 111 | 112 | - Use OAuth2.0 authentication instead of user authentication with password 113 | - Optimized code of `InoreaderClient` 114 | - Optimized results of commands 115 | 116 | ## v0.3.0 117 | 118 | Added 119 | 120 | - New Class: `Subscription` in `inoreader.subscription` 121 | - New methods: 122 | - `InoreaderClient.get_subscription_list` 123 | - `InoreaderClient.get_stream_contents` 124 | 125 | - New commands: `get-subscriptions`, `fetch-articles`, `dedupe` 126 | 127 | 128 | Changed 129 | 130 | - Supported new output formats in command `fetch-unread`: `markdown` and `org-mode` 131 | - Changed command `filter`, see `example/rules.example.yaml` for details 132 | - Use `logging` instead of `print` in cli 133 | 134 | 135 | ## v0.2.1 136 | 137 | Changed 138 | 139 | - Supported new output formats in command `fetch-unread`: `markdown` and `org-mode` 140 | - Changed command `filter`, see `example/rules.example.yaml` for details 141 | 142 | ## v0.2.0 143 | 144 | Added 145 | 146 | - New methods: 147 | - `InoreaderClient.add_tag` 148 | - `InoreaderClient.mark_as_read` 149 | - `InoreaderClient.mark_as_starred` 150 | - `InoreaderClient.mark_as_liked` 151 | - `InoreaderClient.broadcast` 152 | 153 | - New command `filter` 154 | 155 | Changed 156 | 157 | - add `userid` parameter to init method of `InoreaderClient` 158 | - update command line tool: save `userid` after login and use it in other commands 159 | 160 | ## v0.1.0 161 | 162 | Initialize this project 163 | 164 | - Implemented `InoreaderClient` with methods below: 165 | - `InoreaderClient.get_folders` 166 | - `InoreaderClient.get_tags` 167 | - `InoreaderClient.fetch_unread` 168 | 169 | - Implemented command line tool with commands below: 170 | - `login` 171 | - `list-folders` 172 | - `list-tags` 173 | - `fetch-unread` 174 | -------------------------------------------------------------------------------- /inoreader/sim.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import re 3 | from collections import Counter, defaultdict 4 | from difflib import SequenceMatcher 5 | from math import sqrt 6 | 7 | PUNCTS_PAT = re.compile( 8 | r'(?:[#\$&@.,;:!?,。!?、:;  \u3300\'`"~_\+\-\*\/\\|\\^=<>\[\]\(\)\{\}()“”‘’\s]|' 9 | r"[\u2000-\u206f]|" 10 | r"[\u3000-\u303f]|" 11 | r"[\uff30-\uff4f]|" 12 | r"[\uff00-\uff0f\uff1a-\uff20\uff3b-\uff40\uff5b-\uff65])+" 13 | ) 14 | 15 | 16 | def make_terms(text, term, ngram_range=None, lower=True, ignore_punct=True, gram_as_tuple=False): 17 | if lower: 18 | text = text.lower() 19 | if term == "word": 20 | # term_seq = [word.strip() for word in jieba.cut(text) if word.strip()] 21 | term_seq = [word.strip() for word in text.split() if word.strip()] 22 | elif term == "char": 23 | term_seq = list(re.sub(r"\s", "", text)) 24 | else: 25 | raise ValueError(f"unsupported term type: {term}") 26 | 27 | if ngram_range and not (len(ngram_range) == 2 and ngram_range[0] < ngram_range[1]): 28 | raise ValueError(f"wrong `ngram_range`: {ngram_range}") 29 | 30 | terms = [] 31 | min_ngram, max_ngram = ngram_range or (1, 2) 32 | for idx in range(0, max(1, len(term_seq) - min_ngram + 1)): 33 | cur_grams = [] 34 | for gram_level in range(min_ngram, max_ngram): 35 | if gram_as_tuple: 36 | gram = tuple(term_seq[idx : idx + gram_level]) 37 | else: 38 | gram = "".join(term_seq[idx : idx + gram_level]) 39 | if gram not in cur_grams: 40 | if ignore_punct and any(PUNCTS_PAT.match(item) for item in gram): 41 | pass 42 | else: 43 | cur_grams.append(gram) 44 | terms.extend(cur_grams) 45 | return terms 46 | 47 | 48 | def lcs_sim( 49 | s1, s2, term="char", ngram_range=None, ngram_weights=None, lower=True, ignore_punct=True 50 | ): 51 | s1_terms = make_terms(s1, "char", None, lower, ignore_punct) 52 | s2_terms = make_terms(s2, "char", None, lower, ignore_punct) 53 | return SequenceMatcher(a=s1_terms, b=s2_terms).ratio() 54 | 55 | 56 | def jaccard_sim( 57 | s1, s2, term="word", ngram_range=None, ngram_weights=None, lower=True, ignore_punct=True 58 | ): 59 | if not ngram_range or ngram_range[1] == ngram_range[0] + 1: 60 | first_term_set = set(make_terms(s1, term, ngram_range, lower, ignore_punct)) 61 | second_term_set = set(make_terms(s2, term, ngram_range, lower, ignore_punct)) 62 | if not first_term_set and not second_term_set: 63 | return 1.0 64 | return len(first_term_set & second_term_set) / len(first_term_set | second_term_set) 65 | else: 66 | weights = ngram_weights or list(range(*ngram_range)) 67 | weights_sum = sum(weights) 68 | weights = [weight / weights_sum for weight in weights] 69 | scores = [] 70 | for ngram_level in range(*ngram_range): 71 | score = jaccard_sim( 72 | s1, 73 | s2, 74 | term=term, 75 | ngram_range=(ngram_level, ngram_level + 1), 76 | lower=lower, 77 | ignore_punct=ignore_punct, 78 | ) 79 | scores.append(score) 80 | 81 | return sum([score * weight for score, weight in zip(scores, weights)]) 82 | 83 | 84 | def cosine_sim( 85 | s1, s2, term="word", ngram_range=None, ngram_weights=None, lower=True, ignore_punct=True 86 | ): 87 | if not ngram_range or ngram_range[1] == ngram_range[0] + 1: 88 | first_term_freq = Counter(make_terms(s1, term, ngram_range, lower, ignore_punct)) 89 | second_term_freq = Counter(make_terms(s2, term, ngram_range, lower, ignore_punct)) 90 | 91 | first_norm = 0 92 | second_norm = 0 93 | inner_product = 0 94 | 95 | for term, freq in first_term_freq.items(): 96 | first_norm += freq**2 97 | inner_product += freq * second_term_freq[term] 98 | 99 | for _, freq in second_term_freq.items(): 100 | second_norm += freq**2 101 | 102 | if first_norm == 0 and second_norm == 0: 103 | return 1.0 104 | if first_norm == 0 or second_norm == 0: 105 | return 0.0 106 | 107 | return inner_product / sqrt(first_norm * second_norm) 108 | else: 109 | weights = ngram_weights or list(range(*ngram_range)) 110 | weights_sum = sum(weights) 111 | weights = [weight / weights_sum for weight in weights] 112 | scores = [] 113 | for ngram_level in range(*ngram_range): 114 | score = cosine_sim( 115 | s1, 116 | s2, 117 | term=term, 118 | ngram_range=(ngram_level, ngram_level + 1), 119 | lower=lower, 120 | ignore_punct=ignore_punct, 121 | ) 122 | scores.append(score) 123 | 124 | return sum([score * weight for score, weight in zip(scores, weights)]) 125 | 126 | 127 | def sim_of(s1, s2, method="cosine", term="word", ngram_range=None, lower=True, ignore_punct=True): 128 | method_func = { 129 | "lcs": lcs_sim, 130 | "jaccard": jaccard_sim, 131 | "cosine": cosine_sim, 132 | }.get(method) 133 | if not method_func: 134 | raise ValueError("unsupported method: {}".format(method)) 135 | 136 | return method_func( 137 | s1, s2, term=term, ngram_range=ngram_range, lower=lower, ignore_punct=ignore_punct 138 | ) 139 | 140 | 141 | class InvIndex(object): 142 | def __init__(self): 143 | """build inverted index with ngram method""" 144 | self._id2doc = {} 145 | self._index = defaultdict(set) 146 | 147 | def add_doc(self, doc): 148 | if doc.id in self._id2doc: 149 | return False 150 | 151 | self._id2doc[doc.id] = doc.title 152 | terms = set(make_terms(doc.title, "char", (3, 4))) 153 | for term in terms: 154 | self._index[term].add(doc.id) 155 | 156 | return True 157 | 158 | def retrieve(self, query, k=10): 159 | related = Counter() 160 | terms = set(make_terms(query, "char", (3, 4))) 161 | for term in terms: 162 | for qid in self._index.get(term, []): 163 | related[qid] += 1 164 | 165 | return [(idx, self._id2doc[idx], score) for idx, score in related.most_common(k)] 166 | 167 | def save(self, fname): 168 | pickle.dump((self._id2doc, self._index), open(fname, "wb")) 169 | 170 | def load(self, fname): 171 | self._id2doc, self._index = pickle.load(open(fname, "rb")) 172 | -------------------------------------------------------------------------------- /inoreader/client.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import print_function, unicode_literals 3 | 4 | import logging 5 | from datetime import datetime 6 | from operator import itemgetter 7 | from uuid import uuid4 8 | 9 | try: # python2 10 | from urllib import quote_plus 11 | 12 | from urlparse import urljoin 13 | except ImportError: # python3 14 | from urllib.parse import urljoin, quote_plus 15 | 16 | import requests 17 | 18 | from .article import Article 19 | from .consts import BASE_URL 20 | from .exception import APIError, NotLoginError 21 | from .subscription import Subscription 22 | 23 | LOGGER = logging.getLogger(__name__) 24 | 25 | 26 | class InoreaderClient(object): 27 | # paths 28 | TOKEN_PATH = "/oauth2/token" 29 | USER_INFO_PATH = "user-info" 30 | TAG_LIST_PATH = "tag/list" 31 | SUBSCRIPTION_LIST_PATH = "subscription/list" 32 | STREAM_CONTENTS_PATH = "stream/contents/" 33 | EDIT_TAG_PATH = "edit-tag" 34 | EDIT_SUBSCRIPTION_PATH = "subscription/edit" 35 | 36 | # tags 37 | GENERAL_TAG_TEMPLATE = "user/-/label/{}" 38 | READ_TAG = "user/-/state/com.google/read" 39 | STARRED_TAG = "user/-/state/com.google/starred" 40 | LIKED_TAG = "user/-/state/com.google/like" 41 | BROADCAST_TAG = "user/-/state/com.google/broadcast" 42 | 43 | def __init__( 44 | self, app_id, app_key, access_token, refresh_token, expires_at, config_manager=None 45 | ): 46 | self.app_id = app_id 47 | self.app_key = app_key 48 | self.access_token = access_token 49 | self.refresh_token = refresh_token 50 | self.expires_at = float(expires_at) 51 | self.session = requests.Session() 52 | self.session.headers.update( 53 | { 54 | "AppId": self.app_id, 55 | "AppKey": self.app_key, 56 | "Authorization": "Bearer {}".format(self.access_token), 57 | } 58 | ) 59 | self.config_manager = config_manager 60 | self.proxies = self.config_manager.proxies if config_manager else None 61 | 62 | def check_token(self): 63 | now = datetime.now().timestamp() 64 | if now >= self.expires_at: 65 | self.refresh_access_token() 66 | 67 | @staticmethod 68 | def parse_response(response, json_data=True): 69 | if response.status_code == 401: 70 | raise NotLoginError 71 | elif response.status_code != 200: 72 | raise APIError(response.text) 73 | 74 | return response.json() if json_data else response.text 75 | 76 | def refresh_access_token(self): 77 | url = urljoin(BASE_URL, self.TOKEN_PATH) 78 | payload = { 79 | "client_id": self.app_id, 80 | "client_secret": self.app_key, 81 | "grant_type": "refresh_token", 82 | "refresh_token": self.refresh_token, 83 | } 84 | response = self.parse_response(requests.post(url, json=payload, proxies=self.proxies)) 85 | self.access_token = response["access_token"] 86 | self.refresh_token = response["refresh_token"] 87 | self.expires_at = datetime.now().timestamp() + response["expires_in"] 88 | self.session.headers["Authorization"] = "Bearer {}".format(self.access_token) 89 | 90 | if self.config_manager: 91 | self.config_manager.access_token = self.access_token 92 | self.config_manager.refresh_token = self.refresh_token 93 | self.config_manager.expires_at = self.expires_at 94 | self.config_manager.save() 95 | 96 | def userinfo(self): 97 | self.check_token() 98 | 99 | url = urljoin(BASE_URL, self.USER_INFO_PATH) 100 | return self.parse_response(self.session.post(url, proxies=self.proxies)) 101 | 102 | def get_folders(self): 103 | self.check_token() 104 | 105 | url = urljoin(BASE_URL, self.TAG_LIST_PATH) 106 | params = {"types": 1, "counts": 1} 107 | response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies)) 108 | 109 | folders = [] 110 | for item in response["tags"]: 111 | if item.get("type") != "folder": 112 | continue 113 | 114 | folder_name = item["id"].split("/")[-1] 115 | folders.append({"name": folder_name, "unread_count": item["unread_count"]}) 116 | 117 | folders.sort(key=itemgetter("name")) 118 | return folders 119 | 120 | def get_tags(self): 121 | self.check_token() 122 | 123 | url = urljoin(BASE_URL, self.TAG_LIST_PATH) 124 | params = {"types": 1, "counts": 1} 125 | response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies)) 126 | 127 | tags = [] 128 | for item in response["tags"]: 129 | if item.get("type") != "tag": 130 | continue 131 | 132 | folder_name = item["id"].split("/")[-1] 133 | tags.append({"name": folder_name, "unread_count": item["unread_count"]}) 134 | 135 | tags.sort(key=itemgetter("name")) 136 | return tags 137 | 138 | def get_subscription_list(self): 139 | self.check_token() 140 | 141 | url = urljoin(BASE_URL, self.SUBSCRIPTION_LIST_PATH) 142 | response = self.parse_response(self.session.get(url, proxies=self.proxies)) 143 | for item in response["subscriptions"]: 144 | yield Subscription.from_json(item) 145 | 146 | def __get_stream_contents( 147 | self, stream_id=None, n=50, r=None, ot=None, xt=None, it=None, c=None 148 | ): 149 | """reference: https://www.inoreader.com/developers/stream-contents""" 150 | self.check_token() 151 | 152 | url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH) 153 | if stream_id: 154 | url = urljoin(url, quote_plus(stream_id)) 155 | 156 | params = {"n": n, "r": r, "ot": ot, "xt": xt, "it": it, "c": c} 157 | params = {arg: val for arg, val in params.items() if val is not None} 158 | response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies)) 159 | if "continuation" in response: 160 | return response["items"], response["continuation"] 161 | else: 162 | return response["items"], None 163 | 164 | def fetch_articles( 165 | self, stream_id=None, folder=None, tags=None, unread=True, starred=False, limit=None, n=50 166 | ): 167 | self.check_token() 168 | 169 | if not stream_id: 170 | if folder: 171 | stream_id = self.GENERAL_TAG_TEMPLATE.format(folder) 172 | elif tags: 173 | stream_id = self.GENERAL_TAG_TEMPLATE.format(tags[0]) 174 | 175 | params = {"stream_id": stream_id, "n": n, "c": str(uuid4())} 176 | if unread: 177 | params["xt"] = self.READ_TAG 178 | 179 | if starred: 180 | params["it"] = self.STARRED_TAG 181 | 182 | fetched_count = 0 183 | items, continuation = self.__get_stream_contents(**params) 184 | for data in items: 185 | categories = { 186 | category.split("/")[-1] 187 | for category in data.get("categories", []) 188 | if category.find("label") > 0 189 | } 190 | if tags and not categories.issuperset(set(tags)): 191 | continue 192 | 193 | yield Article.from_json(data) 194 | fetched_count += 1 195 | if limit and fetched_count >= limit: 196 | break 197 | 198 | while continuation and (not limit or fetched_count < limit): 199 | params["c"] = continuation 200 | items, continuation = self.__get_stream_contents(**params) 201 | for data in items: 202 | categories = { 203 | category.split("/")[-1] 204 | for category in data.get("categories", []) 205 | if category.find("label") > 0 206 | } 207 | if tags and not categories.issuperset(set(tags)): 208 | continue 209 | yield Article.from_json(data) 210 | fetched_count += 1 211 | if limit and fetched_count >= limit: 212 | break 213 | 214 | def fetch_unread(self, folder=None, tags=None, limit=None, n=None): 215 | for article in self.fetch_articles(folder=folder, tags=tags, unread=True, n=n): 216 | yield article 217 | 218 | def fetch_starred(self, folder=None, tags=None, limit=None, n=None): 219 | for article in self.fetch_articles( 220 | folder=folder, tags=tags, unread=False, starred=True, n=n 221 | ): 222 | yield article 223 | 224 | def add_general_label(self, articles, label): 225 | self.check_token() 226 | 227 | url = urljoin(BASE_URL, self.EDIT_TAG_PATH) 228 | for start in range(0, len(articles), 10): 229 | end = min(start + 10, len(articles)) 230 | params = {"a": label, "i": [articles[idx].id for idx in range(start, end)]} 231 | self.parse_response( 232 | self.session.post(url, params=params, proxies=self.proxies), json_data=False 233 | ) 234 | 235 | def remove_general_label(self, articles, label): 236 | self.check_token() 237 | 238 | url = urljoin(BASE_URL, self.EDIT_TAG_PATH) 239 | for start in range(0, len(articles), 10): 240 | end = min(start + 10, len(articles)) 241 | params = {"r": label, "i": [articles[idx].id for idx in range(start, end)]} 242 | self.parse_response( 243 | self.session.post(url, params=params, proxies=self.proxies), json_data=False 244 | ) 245 | 246 | def add_tag(self, articles, tag): 247 | self.add_general_label(articles, self.GENERAL_TAG_TEMPLATE.format(tag)) 248 | 249 | def mark_as_read(self, articles): 250 | self.add_general_label(articles, self.READ_TAG) 251 | 252 | def mark_as_starred(self, articles): 253 | self.add_general_label(articles, self.STARRED_TAG) 254 | 255 | def mark_as_liked(self, articles): 256 | self.add_general_label(articles, self.LIKED_TAG) 257 | 258 | def remove_tag(self, articles, tag): 259 | self.remove_general_label(articles, self.GENERAL_TAG_TEMPLATE.format(tag)) 260 | 261 | def remove_read(self, articles): 262 | self.remove_general_label(articles, self.READ_TAG) 263 | 264 | def remove_starred(self, articles): 265 | self.remove_general_label(articles, self.STARRED_TAG) 266 | 267 | def remove_liked(self, articles): 268 | self.remove_general_label(articles, self.LIKED_TAG) 269 | 270 | def broadcast(self, articles): 271 | self.add_general_label(articles, self.BROADCAST_TAG) 272 | 273 | def edit_subscription(self, stream_id, action, title=None, add_folder=None, remove_folder=None): 274 | self.check_token() 275 | url = urljoin(BASE_URL, self.EDIT_SUBSCRIPTION_PATH) 276 | # https://us.inoreader.com/developers/edit-subscription 277 | # The documentation looks a bit outdated, `follow`/`unfollow` don't work 278 | action = {"follow": "subscribe", "unfollow": "unsubscribe"}.get(action) or action 279 | params = {"ac": action, "s": stream_id} 280 | if title: 281 | params["t"] = title 282 | 283 | if add_folder: 284 | params["a"] = add_folder 285 | 286 | if remove_folder: 287 | params["r"] = remove_folder 288 | 289 | r = self.session.post(url, params=params, proxies=self.proxies) 290 | response = self.parse_response( 291 | r, 292 | # self.session.post(url, params=params, proxies=self.proxies), 293 | json_data=False, 294 | ) 295 | return response 296 | -------------------------------------------------------------------------------- /inoreader/main.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import print_function, unicode_literals 3 | 4 | import codecs 5 | import csv 6 | import json 7 | import logging 8 | import os 9 | import re 10 | import sys 11 | import threading 12 | from collections import Counter, defaultdict 13 | from functools import partial, wraps 14 | from logging.config import dictConfig 15 | from operator import itemgetter 16 | from queue import Queue 17 | from uuid import uuid4 18 | 19 | import click 20 | import yaml 21 | from flask import Flask, request 22 | from requests_oauthlib import OAuth2Session 23 | from tabulate import tabulate 24 | 25 | from inoreader import InoreaderClient 26 | from inoreader.config import InoreaderConfigManager 27 | from inoreader.consts import DEFAULT_APPID, DEFAULT_APPKEY 28 | from inoreader.exception import APIError, NotLoginError 29 | from inoreader.filter import get_filter 30 | from inoreader.sim import InvIndex, sim_of 31 | from inoreader.utils import download_image 32 | 33 | APPID_ENV_NAME = "INOREADER_APP_ID" 34 | APPKEY_ENV_NAME = "INOREADER_APP_KEY" 35 | TOKEN_ENV_NAME = "INOREADER_AUTH_TOKEN" 36 | ENV_NAMES = [APPID_ENV_NAME, APPKEY_ENV_NAME, TOKEN_ENV_NAME] 37 | 38 | CONFIG_FILE = os.path.join(os.environ.get("HOME"), ".inoreader") 39 | LOGGER = logging.getLogger(__name__) 40 | 41 | 42 | dictConfig( 43 | { 44 | "version": 1, 45 | "formatters": { 46 | "simple": { 47 | "format": "%(asctime)s - %(message)s", 48 | } 49 | }, 50 | "handlers": { 51 | "default": { 52 | "level": "DEBUG", 53 | "class": "logging.StreamHandler", 54 | "formatter": "simple", 55 | "stream": "ext://sys.stdout", 56 | }, 57 | }, 58 | "loggers": { 59 | "__main__": {"handlers": ["default"], "level": "DEBUG", "propagate": False}, 60 | "inoreader": {"handlers": ["default"], "level": "DEBUG", "propagate": True}, 61 | }, 62 | } 63 | ) 64 | 65 | 66 | def get_client(config_file=CONFIG_FILE): 67 | config = InoreaderConfigManager(config_file) 68 | if not config.data: 69 | LOGGER.error("Please login first") 70 | sys.exit(1) 71 | 72 | client = InoreaderClient( 73 | config.app_id, 74 | config.app_key, 75 | config.access_token, 76 | config.refresh_token, 77 | config.expires_at, 78 | config_manager=config, 79 | ) 80 | return client 81 | 82 | 83 | def catch_error(func): 84 | @wraps(func) 85 | def wrapper(*args, **kwargs): 86 | try: 87 | return func(*args, **kwargs) 88 | except NotLoginError: 89 | print("Error: Please login first!") 90 | sys.exit(1) 91 | except APIError as exception: 92 | print("Error:", str(exception)) 93 | sys.exit(1) 94 | 95 | return wrapper 96 | 97 | 98 | @click.group(context_settings={"help_option_names": ["-h", "--help"]}) 99 | def main(): 100 | pass 101 | 102 | 103 | @main.command() 104 | def login(): 105 | """Login to your inoreader account with OAuth 2.0""" 106 | # run simple daemon http server to handle callback 107 | app = Flask(__name__) 108 | 109 | # disable flask output 110 | app.logger.disabled = True 111 | logger = logging.getLogger("werkzeug") 112 | logger.setLevel(logging.ERROR) 113 | logger.disabled = True 114 | sys.modules["flask.cli"].show_server_banner = lambda *x: None 115 | 116 | # use queue to pass data between threads 117 | queue = Queue() 118 | 119 | config = InoreaderConfigManager(CONFIG_FILE) 120 | app_id = config.app_id or DEFAULT_APPID 121 | app_key = config.app_key or DEFAULT_APPKEY 122 | state = str(uuid4()) 123 | oauth = OAuth2Session( 124 | app_id, 125 | redirect_uri="http://localhost:8080/oauth/redirect", 126 | scope="read write", 127 | state=state, 128 | ) 129 | 130 | @app.route("/oauth/redirect") 131 | def redirect(): 132 | token = oauth.fetch_token( 133 | "https://www.inoreader.com/oauth2/token", 134 | authorization_response=request.url, 135 | client_secret=app_key, 136 | proxies=config.proxies, 137 | ) 138 | queue.put(token) 139 | queue.task_done() 140 | return "Done." 141 | 142 | func = partial(app.run, port=8080, debug=False) 143 | threading.Thread(target=func, daemon=True).start() 144 | 145 | os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1" 146 | authorization_url, ret_state = oauth.authorization_url("https://www.inoreader.com/oauth2/auth") 147 | if state != ret_state: 148 | LOGGER.error("Server return bad state") 149 | sys.exit(1) 150 | 151 | token = None 152 | print("Open the link to authorize access:", authorization_url) 153 | while True: 154 | token = queue.get() 155 | if token: 156 | break 157 | 158 | queue.join() 159 | if token: 160 | config.app_id = app_id 161 | config.app_key = app_key 162 | config.access_token = token["access_token"] 163 | config.refresh_token = token["refresh_token"] 164 | config.expires_at = token["expires_at"] 165 | config.save() 166 | LOGGER.info("Login successfully, tokens are saved in config file %s", config.config_file) 167 | else: 168 | LOGGER.warning("Login failed, please check your environment or try again later.") 169 | sys.exit(1) 170 | 171 | 172 | @main.command("list-folders") 173 | @catch_error 174 | def list_folders(): 175 | """List all folders""" 176 | client = get_client() 177 | res = client.get_folders() 178 | 179 | output_info = [["Folder", "Unread Count"]] 180 | for item in res: 181 | output_info.append([item["name"], item["unread_count"]]) 182 | 183 | print(tabulate(output_info, headers="firstrow", tablefmt="github")) 184 | 185 | 186 | @main.command("list-tags") 187 | @catch_error 188 | def list_tags(): 189 | """List all tags""" 190 | client = get_client() 191 | res = client.get_tags() 192 | 193 | output_info = [["Tag", "Unread Count"]] 194 | for item in res: 195 | output_info.append([item["name"], item["unread_count"]]) 196 | 197 | print(tabulate(output_info, headers="firstrow", tablefmt="github")) 198 | 199 | 200 | @main.command("fetch-unread") 201 | @click.option("-f", "--folder", required=True, help="Folder which articles belong to") 202 | @click.option("-t", "--tags", help="Tag(s) for filtering, separate with comma") 203 | @click.option( 204 | "--batch-size", type=int, default=50, help="Maximum number of articles per API request" 205 | ) 206 | @click.option("-o", "--outfile", required=True, help="Filename to save articles") 207 | @click.option( 208 | "--out-format", 209 | type=click.Choice(["json", "csv", "plain", "markdown", "org-mode"]), 210 | default="json", 211 | help="Format of output file, default: json", 212 | ) 213 | @catch_error 214 | def fetch_unread(folder, tags, batch_size, outfile, out_format): 215 | """Fetch unread articles""" 216 | client = get_client() 217 | 218 | tag_list = [] if not tags else tags.split(",") 219 | fout = codecs.open(outfile, mode="w", encoding="utf-8") 220 | writer = csv.writer(fout, delimiter=",") if out_format == "csv" else None 221 | for idx, article in enumerate(client.fetch_unread(folder=folder, tags=tag_list, n=batch_size)): 222 | if idx > 0 and (idx % 10) == 0: 223 | LOGGER.info("fetched %d articles", idx) 224 | title = article.title 225 | text = article.text 226 | link = article.link 227 | if out_format == "json": 228 | print( 229 | json.dumps({"title": title, "content": text, "url": link}, ensure_ascii=False), 230 | file=fout, 231 | ) 232 | elif out_format == "csv": 233 | writer.writerow([link, title, text]) 234 | elif out_format == "plain": 235 | print("TITLE: {}".format(title), file=fout) 236 | print("LINK: {}".format(link), file=fout) 237 | print("CONTENT: {}".format(text), file=fout) 238 | print(file=fout) 239 | elif out_format == "markdown": 240 | if link: 241 | print("# [{}]({})\n".format(title, link), file=fout) 242 | else: 243 | print("# {}\n".format(title), file=fout) 244 | print(text + "\n", file=fout) 245 | elif out_format == "org-mode": 246 | if link: 247 | title = title.replace("[", "_").replace("]", "_") 248 | print("* [[{}][{}]]\n".format(link, title), file=fout) 249 | else: 250 | print("* {}\n".format(title), file=fout) 251 | print(text + "\n", file=fout) 252 | 253 | LOGGER.info("fetched %d articles and saved them in %s", idx + 1, outfile) 254 | 255 | fout.close() 256 | 257 | 258 | def apply_action(articles, client, action, tags): 259 | if action == "tag": 260 | for tag in tags.split(","): 261 | client.add_tag(articles, tag) 262 | 263 | for article in articles: 264 | LOGGER.info("Add tags [%s] on article: %s", tags, article.title) 265 | elif action == "mark_as_read": 266 | client.mark_as_read(articles) 267 | for article in articles: 268 | LOGGER.info("Mark article as read: %s", article.title) 269 | elif action == "like": 270 | client.mark_as_liked(articles) 271 | for article in articles: 272 | LOGGER.info("Mark article as liked: %s", article.title) 273 | elif action == "broadcast": 274 | client.broadcast(articles) 275 | for article in articles: 276 | LOGGER.info("Broadcast article: %s", article.title) 277 | elif action == "star": 278 | client.mark_as_starred(articles) 279 | for article in articles: 280 | LOGGER.info("Starred article: %s", article.title) 281 | elif action == "unstar": 282 | client.remove_starred(articles) 283 | for article in articles: 284 | LOGGER.info("Unstarred article: %s", article.title) 285 | 286 | 287 | @main.command("filter") 288 | @click.option("-r", "--rules-file", required=True, help="YAML file with your rules") 289 | @catch_error 290 | def filter_articles(rules_file): 291 | """Select articles and do something""" 292 | client = get_client() 293 | matched_articles = defaultdict(list) 294 | for rule in yaml.load(open(rules_file), Loader=yaml.Loader): 295 | fields = [ 296 | field 297 | for field in rule.get("fields", ["title", "content"]) 298 | if field in ("title", "content") 299 | ] 300 | cur_filter = get_filter(rule["filter"]) 301 | actions = [] 302 | # only 'mark_as_read', 'like', 'star', 'broadcast', 'tag' is supported now 303 | for action in rule.get("actions", [{"type": "mark_as_read"}]): 304 | if action["type"] not in ( 305 | "mark_as_read", 306 | "like", 307 | "star", 308 | "broadcast", 309 | "tag", 310 | "unstar", 311 | ): 312 | continue 313 | actions.append(action) 314 | 315 | articles = [] 316 | if "folders" in rule: 317 | for folder in rule["folders"]: 318 | articles.extend(client.fetch_unread(folder=folder)) 319 | else: 320 | for articles_info in rule.get("articles", []): 321 | articles.extend(client.fetch_articles(**articles_info)) 322 | 323 | # FIXME: deduplicate 324 | count = 0 325 | for article in articles: 326 | matched = False 327 | if "title" in fields and cur_filter.validate(article.title): 328 | matched = True 329 | if "content" in fields and cur_filter.validate(article.text): 330 | matched = True 331 | 332 | if matched: 333 | for action in actions: 334 | matched_articles[action["type"]].append((article, action)) 335 | 336 | count += 1 337 | 338 | LOGGER.info( 339 | "matched %d articles with filter named '%s'", 340 | count, 341 | rule["name"], 342 | ) 343 | 344 | for action_name in matched_articles: 345 | articles, actions = zip(*matched_articles[action_name]) 346 | if action_name != "tag": 347 | apply_action(articles, client, action_name, None) 348 | else: 349 | for article, action in zip(articles, actions): 350 | apply_action([article], client, "tag", action["tags"]) 351 | 352 | 353 | @main.command("get-subscriptions") 354 | @click.option("-o", "--outfile", help="Filename to save results") 355 | @click.option("-f", "--folder", help="Folder which subscriptions belong to") 356 | @click.option( 357 | "--out-format", 358 | type=click.Choice(["json", "csv"]), 359 | default="csv", 360 | help="Format of output, default: csv", 361 | ) 362 | @catch_error 363 | def get_subscriptions(outfile, folder, out_format): 364 | """Get your subscriptions""" 365 | client = get_client() 366 | results = [] 367 | for sub in client.get_subscription_list(): 368 | sub_categories = {category["label"] for category in sub.categories} 369 | if folder and folder not in sub_categories: 370 | continue 371 | 372 | results.append( 373 | { 374 | "id": sub.id, 375 | "title": sub.title, 376 | "url": sub.url, 377 | "folders": ";".join(sub_categories), 378 | } 379 | ) 380 | 381 | fout = open(outfile, "w") if outfile else sys.stdout 382 | if out_format == "csv": 383 | headers = ["id", "title", "url", "folders"] 384 | writer = csv.DictWriter(fout, headers, quoting=csv.QUOTE_ALL, delimiter="\t") 385 | writer.writeheader() 386 | for item in results: 387 | writer.writerow(item) 388 | elif out_format == "json": 389 | json.dump(results, fout, ensure_ascii=False, indent=4) 390 | 391 | if outfile: 392 | fout.close() 393 | 394 | 395 | @main.command("fetch-articles") 396 | @click.option("-i", "--stream-id", required=True, help="Stream ID which you want to fetch") 397 | @click.option( 398 | "--batch-size", type=int, default=50, help="Maximum number of articles per API request" 399 | ) 400 | @click.option("--only-unread", is_flag=True, help="Fetch unread articles only") 401 | @click.option("-o", "--outfile", required=True, help="Filename to save results") 402 | @click.option( 403 | "--out-format", 404 | type=click.Choice(["json", "csv", "plain", "markdown", "org-mode"]), 405 | default="json", 406 | help="Format of output, default: json", 407 | ) 408 | @catch_error 409 | def fetch_articles(outfile, stream_id, batch_size, only_unread, out_format): 410 | """Fetch articles by stream id""" 411 | client = get_client() 412 | 413 | fout = codecs.open(outfile, mode="w", encoding="utf-8") 414 | writer = None 415 | if out_format == "csv": 416 | writer = csv.DictWriter(fout, ["title", "content"], delimiter=",", quoting=csv.QUOTE_ALL) 417 | writer.writeheader() 418 | 419 | for idx, article in enumerate( 420 | client.fetch_articles(stream_id=stream_id, n=batch_size, unread=only_unread) 421 | ): 422 | if idx > 0 and (idx % 10) == 0: 423 | LOGGER.info("fetched %d articles", idx) 424 | 425 | title = article.title 426 | text = article.text 427 | if out_format == "json": 428 | print(json.dumps({"title": title, "content": text}, ensure_ascii=False), file=fout) 429 | elif out_format == "csv": 430 | writer.writerow({"title": title, "content": text}) 431 | elif out_format == "plain": 432 | print("TITLE: {}".format(title), file=fout) 433 | print("CONTENT: {}".format(text), file=fout) 434 | print(file=fout) 435 | elif out_format == "markdown": 436 | print("# {}\n".format(title), file=fout) 437 | print(text + "\n", file=fout) 438 | elif out_format == "org-mode": 439 | print("* {}\n".format(title), file=fout) 440 | print(text + "\n", file=fout) 441 | 442 | LOGGER.info("fetched %d articles and saved them in %s", idx + 1, outfile) 443 | 444 | fout.close() 445 | 446 | 447 | @main.command() 448 | @click.option("-f", "--folder", help="Folder you want to deduplicate") 449 | @click.option("-t", "--thresh", type=float, default=0.8, help="Minimum similarity score") 450 | @catch_error 451 | def dedupe(folder, thresh): 452 | """Deduplicate articles""" 453 | client = get_client() 454 | matched_articles, index = [], InvIndex() 455 | for idx, article in enumerate(client.fetch_unread(folder=folder)): 456 | if idx > 0 and (idx % 10) == 0: 457 | LOGGER.info("fetched %d articles and found %d duplicate", idx, len(matched_articles)) 458 | 459 | related = index.retrieve(article.title, k=10) 460 | sims = Counter() 461 | for docid, doc, _ in related: 462 | if docid == article.id: 463 | continue 464 | sims[doc] = sim_of(doc, article.title, method="cosine", term="char", ngram_range=(2, 3)) 465 | 466 | if sims and max(sims.values()) >= thresh: 467 | top_doc, top_score = sims.most_common()[0] 468 | print("article 「{}」 is duplicate with -> 「{}」".format(article.title, top_doc)) 469 | matched_articles.append(article) 470 | continue 471 | 472 | index.add_doc(article) 473 | 474 | LOGGER.info("fetched %d articles and found %d duplicate", idx + 1, len(matched_articles)) 475 | apply_action(matched_articles, client, "mark_as_read", None) 476 | 477 | 478 | @main.command("fetch-starred") 479 | @click.option("-f", "--folder", help="Folder which articles belong to") 480 | @click.option("-t", "--tags", help="Tag(s) for filtering, separate with comma") 481 | @click.option( 482 | "--batch-size", type=int, default=50, help="Maximum number of articles per API request" 483 | ) 484 | @click.option( 485 | "-o", "--outfile", help="Filename to save articles, required when output format is `csv`" 486 | ) 487 | @click.option( 488 | "-d", "--outdir", help="Directory to save articles, required when output format is not `csv`" 489 | ) 490 | @click.option("-l", "--limit", type=int) 491 | @click.option("--save-image", is_flag=True) 492 | @click.option( 493 | "--out-format", 494 | type=click.Choice(["json", "csv", "markdown", "org-mode"]), 495 | default="json", 496 | help="Format of output file, default: json", 497 | ) 498 | @catch_error 499 | def fetch_starred(folder, tags, batch_size, outfile, outdir, limit, save_image, out_format): 500 | """Fetch starred articles""" 501 | client = get_client() 502 | 503 | if out_format == "csv" and not outfile: 504 | click.secho("`outfile` is required!", fg="red") 505 | return -1 506 | elif out_format != "csv" and not outdir: 507 | click.secho("`outdir` is required!", fg="red") 508 | return -1 509 | 510 | if out_format == "csv": 511 | fout = codecs.open(outfile, mode="w", encoding="utf-8") 512 | writer = ( 513 | csv.writer(fout, delimiter=",", quoting=csv.QUOTE_ALL) if out_format == "csv" else None 514 | ) 515 | elif not os.path.exists(outdir): 516 | os.makedirs(outdir) 517 | 518 | tag_list = [] if not tags else tags.split(",") 519 | url_to_image = {} 520 | fetched_count = 0 521 | for article in client.fetch_starred(folder=folder, tags=tag_list, limit=limit, n=batch_size): 522 | if limit and fetched_count >= limit: 523 | break 524 | 525 | fetched_count += 1 526 | title = article.title 527 | text = article.text 528 | link = article.link 529 | if out_format == "csv": 530 | writer.writerow([link, title, text]) 531 | continue 532 | 533 | filename = re.sub(r"\s+", "_", title) 534 | filename = re.sub(r"[\[\]\(\)()::,,/|]", "_", filename) 535 | filename = re.sub(r'[“”\'"]', "", filename) 536 | filename = re.sub(r"-+", "-", filename) 537 | filename = filename[:50] 538 | if out_format == "json": 539 | filename += ".json" 540 | elif out_format == "markdown": 541 | filename += ".md" 542 | elif out_format == "org-mode": 543 | filename += ".org" 544 | 545 | if save_image: 546 | image_contents = re.findall(r"!\[(?:[^\[\]]+)\]\((?:[^\(\)]+)\)", text) 547 | for image_content in image_contents: 548 | match = re.match(r"!\[(?P[^\[\]]+)\]\((?P[^\(\)]+)\)", image_content) 549 | image_alt, image_url = itemgetter("alt", "url")(match.groupdict()) 550 | if image_url in url_to_image: 551 | text = text.replace( 552 | image_content, "![{}]({})".format(image_alt, url_to_image[image_url]) 553 | ) 554 | continue 555 | 556 | image_filename = "" 557 | if not re.findall(r"[\?\!\/=\&]", image_alt): 558 | image_filename = re.sub(r"\.[a-z]+$", "", image_alt) 559 | else: 560 | image_filename = str(uuid4()).replace("-", "") 561 | 562 | return_image_file = download_image( 563 | image_url, outdir, image_filename, proxies=client.proxies 564 | ) 565 | if return_image_file: 566 | LOGGER.info('Download image as "%s" from "%s"', return_image_file, image_url) 567 | text = text.replace( 568 | image_content, "![{}]({})".format(image_alt, return_image_file) 569 | ) 570 | url_to_image[image_url] = return_image_file 571 | 572 | with open(os.path.join(outdir, filename), "w") as fout: 573 | if out_format == "json": 574 | json.dump( 575 | {"title": title, "content": text, "url": link}, 576 | fout, 577 | ensure_ascii=False, 578 | indent=4, 579 | ) 580 | elif out_format == "markdown": 581 | print(title + "\n=====\n\nLINK: " + link + "\n\n", file=fout) 582 | text = re.sub(r"!\[([^\[\]]+)\]\(([^\(\)]+)\)", r"\n![\1](\2)\n", text) 583 | print(text + "\n", file=fout) 584 | elif out_format == "org-mode": 585 | print("#+TITLE: " + title + "\n\nLINK: " + link + "\n\n", file=fout) 586 | text = re.sub(r"!\[([^\[\]]+)\]\(([^\(\)]+)\)", r"\n[[file:\2][\1]]\n", text) 587 | text = re.sub(r"\[([^\[\]]+)\]\(([^\(\)]+)\)", r"[[\2][\1]]", text) 588 | print(text + "\n", file=fout) 589 | 590 | LOGGER.info('saved article "%s" in directory "%s"', title, outdir) 591 | 592 | if out_format == "csv": 593 | fout.close() 594 | LOGGER.info("fetched %d articles and saved them in %s", fetched_count, outfile) 595 | else: 596 | LOGGER.info("fetched %d articles and saved them in %s", fetched_count, outdir) 597 | 598 | 599 | @main.command("edit-subscription") 600 | @click.option( 601 | "-a", 602 | "--action", 603 | required=True, 604 | type=click.Choice(["follow", "unfollow", "rename", "add-folder", "remove-folder"]), 605 | help="", 606 | ) 607 | @click.option("-i", "--stream-id", required=True, help="Stream ID which you want to fetch") 608 | @click.option("-n", "--name", help="The name of subscription, for action follow/rename(required)") 609 | @click.option("-f", "--folder", help="Folder which subscription belong to") 610 | @catch_error 611 | def edit_subscriptions(action, stream_id, name, folder): 612 | """Get your subscriptions""" 613 | edit_action = action 614 | if action in ("rename", "add-folder", "remove-folder"): 615 | edit_action = "edit" 616 | if action == "rename" and not name: 617 | click.secho("`name` is required for action `rename`!", fg="red") 618 | return -1 619 | elif action in ("add-folder", "remove_starred") and not folder: 620 | click.secho(f"`folder` is required for action `{action}`", fg="red") 621 | return -1 622 | 623 | client = get_client() 624 | stream_id = "feed/" + stream_id if not stream_id.startswith("feed/") else stream_id 625 | if folder and not folder.startswith("user/-/label/"): 626 | folder = client.GENERAL_TAG_TEMPLATE.format(folder) 627 | 628 | add_folder = folder if action in ("follow", "add-folder") else None 629 | remove_folder = folder if action == "remove-folder" else None 630 | try: 631 | response = client.edit_subscription( 632 | stream_id, edit_action, title=name, add_folder=add_folder, remove_folder=remove_folder 633 | ) 634 | click.secho(response, fg="green") 635 | except Exception as exception: 636 | print("Error:", str(exception)) 637 | return -1 638 | 639 | 640 | if __name__ == "__main__": 641 | main() 642 | --------------------------------------------------------------------------------