├── .flake8 ├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ └── build.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CHANGES.rst ├── CONTRIBUTING.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docs ├── Makefile ├── api.rst ├── app.rst ├── changelog.rst ├── cli.rst ├── conf.py ├── config.rst ├── contributing.rst ├── dev-app.rst ├── dev.rst ├── guide.rst ├── images │ └── redesign-01.png ├── index.rst ├── install.rst ├── internal.rst ├── make.bat ├── plugins.rst ├── screenshots │ ├── dillo.png │ ├── entries-feed.png │ ├── entries-v2-dark.png │ ├── entries-v2-filters-light.png │ ├── entries.png │ ├── entry-one.png │ ├── entry-two.png │ ├── feeds.png │ ├── lynx.png │ └── search.png ├── tutorial.rst └── why.rst ├── examples ├── config.yaml ├── custom_headers.py ├── feed_slugs.py ├── parser_only.py ├── podcast.py └── terminal.py ├── pyproject.toml ├── run.sh ├── scripts ├── backup.sh ├── bench.py ├── debug_storage_stats.py ├── generate_import_all.py ├── jscontrols.html ├── jscontrols.py ├── lines.sh └── release.py ├── src └── reader │ ├── __init__.py │ ├── __main__.py │ ├── _app │ ├── __init__.py │ ├── api_thing.py │ ├── cli.py │ ├── static │ │ ├── controls.js │ │ └── style.css │ ├── templates │ │ ├── add_entry.html │ │ ├── entries.html │ │ ├── entry.html │ │ ├── feeds.html │ │ ├── layout.html │ │ ├── macros.html │ │ ├── metadata.html │ │ └── tags.html │ ├── v2 │ │ ├── __init__.py │ │ ├── forms.py │ │ ├── static │ │ │ ├── style.css │ │ │ └── theme.js │ │ └── templates │ │ │ └── v2 │ │ │ ├── entries.html │ │ │ ├── layout.html │ │ │ └── macros.html │ └── wsgi.py │ ├── _cli.py │ ├── _config.py │ ├── _hash_utils.py │ ├── _parser │ ├── __init__.py │ ├── _http_utils.py │ ├── _lazy.py │ ├── _url_utils.py │ ├── feedparser.py │ ├── file.py │ ├── http.py │ ├── jsonfeed.py │ └── requests │ │ ├── __init__.py │ │ └── _lazy.py │ ├── _plugins │ ├── __init__.py │ ├── cli_status.py │ ├── enclosure_tags.py │ ├── preview_feed_list.py │ ├── share.py │ ├── sqlite_releases.py │ ├── templates │ │ └── preview_feed_list.html │ └── timer.py │ ├── _storage │ ├── __init__.py │ ├── _base.py │ ├── _changes.py │ ├── _entries.py │ ├── _feeds.py │ ├── _html_utils.py │ ├── _schema.py │ ├── _search.py │ ├── _sql_utils.py │ ├── _sqlite_utils.py │ └── _tags.py │ ├── _types.py │ ├── _update.py │ ├── _utils.py │ ├── _vendor │ ├── __init__.py │ └── feedparser │ │ ├── __init__.py │ │ ├── api.py │ │ ├── datetimes │ │ ├── __init__.py │ │ ├── asctime.py │ │ ├── greek.py │ │ ├── hungarian.py │ │ ├── iso8601.py │ │ ├── korean.py │ │ ├── perforce.py │ │ ├── rfc822.py │ │ └── w3dtf.py │ │ ├── encodings.py │ │ ├── exceptions.py │ │ ├── html.py │ │ ├── http.py │ │ ├── mixin.py │ │ ├── namespaces │ │ ├── __init__.py │ │ ├── _base.py │ │ ├── admin.py │ │ ├── cc.py │ │ ├── dc.py │ │ ├── georss.py │ │ ├── itunes.py │ │ ├── mediarss.py │ │ └── psc.py │ │ ├── parsers │ │ ├── __init__.py │ │ ├── json.py │ │ ├── loose.py │ │ └── strict.py │ │ ├── py.typed │ │ ├── sanitizer.py │ │ ├── sgml.py │ │ ├── urls.py │ │ └── util.py │ ├── core.py │ ├── exceptions.py │ ├── plugins │ ├── __init__.py │ ├── enclosure_dedupe.py │ ├── entry_dedupe.py │ ├── mark_as_read.py │ ├── readtime.py │ └── ua_fallback.py │ ├── py.typed │ ├── types.py │ └── utils.py ├── tests ├── conftest.py ├── data │ ├── 10.json │ ├── 10.json.py │ ├── custom │ ├── empty.atom │ ├── empty.atom.py │ ├── empty.json │ ├── empty.json.py │ ├── empty.rss │ ├── empty.rss.py │ ├── full.atom │ ├── full.atom.py │ ├── full.json │ ├── full.json.py │ ├── full.rss │ ├── full.rss.py │ ├── invalid.json │ ├── invalid.json.py │ ├── relative.atom │ ├── relative.atom.py │ ├── relative.rss │ ├── relative.rss.py │ ├── sqlite_releases.html │ ├── unknown.json │ └── unknown.json.py ├── fakeparser.py ├── reader_methods.py ├── reader_test_plugins │ ├── __init__.py │ ├── good.py │ ├── init_error.py │ ├── missing_dependency.py │ └── missing_entry_point.py ├── test__types.py ├── test__utils.py ├── test_app.py ├── test_app_wsgi.py ├── test_bench.py ├── test_changes.py ├── test_cli.py ├── test_config.py ├── test_exceptions.py ├── test_hash_utils.py ├── test_html_utils.py ├── test_lazy_imports.py ├── test_parser.py ├── test_plugins_cli_status.py ├── test_plugins_enclosure_dedupe.py ├── test_plugins_entry_dedupe.py ├── test_plugins_mark_as_read.py ├── test_plugins_preview_feed_list.py ├── test_plugins_readtime.py ├── test_plugins_sqlite_releases.py ├── test_plugins_ua_fallback.py ├── test_reader.py ├── test_reader_context.py ├── test_reader_counts.py ├── test_reader_deprecations.py ├── test_reader_filter.py ├── test_reader_hooks.py ├── test_reader_integration.py ├── test_reader_plugins.py ├── test_reader_private.py ├── test_reader_search.py ├── test_reader_sort.py ├── test_reader_update.py ├── test_reader_utils.py ├── test_search.py ├── test_sql_utils.py ├── test_sqlite_utils.py ├── test_storage.py ├── test_tags.py ├── test_test_utils.py ├── test_types.py └── utils.py └── tox.ini /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # B = bugbear 3 | # E = pycodestyle errors 4 | # F = flake8 pyflakes 5 | # W = pycodestyle warnings 6 | # B9 = bugbear opinions 7 | select = B, E, F, W, B9 8 | ignore = 9 | # slice notation whitespace, invalid 10 | E203 11 | # line length, handled by bugbear B950 12 | E501 13 | # bugbear line length; too sensitive, triggered for comments 14 | # and docstrings (and adding "noqa" in comments is making things worse); 15 | # black taking care of line length for code should be good enough; 16 | # if enabled, we should set max-line-length = 80 (so up to 88 are allowed) 17 | B950 18 | # bare except, handled by bugbear B001 19 | E722 20 | # bin op line break, invalid 21 | W503 W504 22 | # string formatting opinion 23 | B907 24 | # multiple statements on one line, handled by black 25 | E704 26 | per-file-ignores = 27 | # __init__ modules export names 28 | **/__init__.py: F401 29 | exclude = tests/*, docs/*, scripts/*, src/reader/_vendor/* 30 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # https://github.com/actions/checkout/issues/135#issuecomment-613361104 2 | * text eol=lf 3 | *.png -text 4 | *.bat -text 5 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | open-pull-requests-limit: 10 8 | ignore: 9 | - dependency-name: mypy 10 | versions: 11 | - "0.800" 12 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | 2 | name: build 3 | 4 | on: 5 | push: 6 | branches: [master] 7 | pull_request: 8 | branches: [master] 9 | workflow_dispatch: 10 | 11 | defaults: 12 | run: 13 | shell: bash 14 | 15 | jobs: 16 | tests: 17 | runs-on: ${{ matrix.os }} 18 | 19 | strategy: 20 | fail-fast: false 21 | 22 | matrix: 23 | python-version: [ 24 | "3.11", "3.12", "3.13", 25 | "pypy-3.11" 26 | ] 27 | os: [ubuntu-latest, macos-latest, windows-latest] 28 | 29 | steps: 30 | 31 | - uses: actions/checkout@v4 32 | 33 | - uses: actions/setup-python@v5 34 | with: 35 | python-version: ${{ matrix.python-version }} 36 | allow-prereleases: true 37 | 38 | - run: ./run.sh ci-install 39 | - run: ./run.sh ci-run 40 | 41 | - uses: codecov/codecov-action@v4 42 | with: 43 | token: ${{ secrets.CODECOV_TOKEN }} 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # direnv 87 | .envrc 88 | 89 | # virtualenv 90 | .venv* 91 | venv/ 92 | ENV/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: ^src/reader/_vendor/.*$ 2 | 3 | repos: 4 | 5 | - repo: https://github.com/pycqa/isort 6 | rev: 6.0.1 7 | hooks: 8 | - id: isort 9 | 10 | - repo: https://github.com/asottile/pyupgrade 11 | rev: v3.19.1 12 | hooks: 13 | - id: pyupgrade 14 | args: ["--py311-plus"] 15 | 16 | - repo: https://github.com/psf/black 17 | rev: 25.1.0 18 | hooks: 19 | - id: black 20 | args: ["-S"] 21 | 22 | - repo: https://github.com/PyCQA/flake8 23 | rev: 7.2.0 24 | hooks: 25 | - id: flake8 26 | additional_dependencies: [flake8-bugbear] 27 | 28 | - repo: https://github.com/pre-commit/pre-commit-hooks 29 | rev: v5.0.0 30 | hooks: 31 | - id: check-byte-order-marker 32 | - id: trailing-whitespace 33 | - id: end-of-file-fixer 34 | 35 | ci: 36 | autoupdate_schedule: quarterly 37 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3" # (last stable CPython version) 7 | 8 | python: 9 | install: 10 | - method: pip 11 | path: . 12 | extra_requirements: 13 | - docs 14 | 15 | sphinx: 16 | configuration: docs/conf.py 17 | 18 | formats: all 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 lemon24 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 21 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 24 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include src/reader/_app/templates * 2 | recursive-include src/reader/_app/static * 3 | recursive-include src/reader/_app/v2 * 4 | recursive-include src/reader/_plugins/templates * 5 | recursive-include src/reader *.pyi 6 | recursive-include src/reader py.typed 7 | include run.sh CHANGES.rst LICENSE tox.ini 8 | recursive-include tests * 9 | recursive-include examples * 10 | recursive-include docs * 11 | recursive-include scripts * 12 | global-exclude *.py[cod] 13 | prune docs/_build 14 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. begin-intro 2 | 3 | **reader** is a Python feed reader library. 4 | 5 | It is designed to allow writing feed reader applications 6 | without any business code, 7 | and without depending on a particular framework. 8 | 9 | .. end-intro 10 | 11 | 12 | |build-status-github| |code-coverage| |documentation-status| |pypi-status| |type-checking| |code-style| 13 | 14 | 15 | .. |build-status-github| image:: https://github.com/lemon24/reader/workflows/build/badge.svg 16 | :target: https://github.com/lemon24/reader/actions?query=workflow%3Abuild 17 | :alt: build status (GitHub Actions) 18 | 19 | .. |code-coverage| image:: https://codecov.io/gh/lemon24/reader/branch/master/graph/badge.svg?token=lcLZaSFysf 20 | :target: https://codecov.io/gh/lemon24/reader 21 | :alt: code coverage 22 | 23 | .. |documentation-status| image:: https://readthedocs.org/projects/reader/badge/?version=latest&style=flat 24 | :target: https://reader.readthedocs.io/en/latest/?badge=latest 25 | :alt: documentation status 26 | 27 | .. |pypi-status| image:: https://img.shields.io/pypi/v/reader.svg 28 | :target: https://pypi.python.org/pypi/reader 29 | :alt: PyPI status 30 | 31 | .. |type-checking| image:: http://www.mypy-lang.org/static/mypy_badge.svg 32 | :target: http://mypy-lang.org/ 33 | :alt: checked with mypy 34 | 35 | .. |code-style| image:: https://img.shields.io/badge/code%20style-black-000000.svg 36 | :target: https://github.com/psf/black 37 | :alt: code style: black 38 | 39 | 40 | .. begin-features 41 | 42 | *reader* allows you to: 43 | 44 | * retrieve, store, and manage **Atom**, **RSS**, and **JSON** feeds 45 | * mark articles as read or important 46 | * add arbitrary tags/metadata to feeds and articles 47 | * filter feeds and articles 48 | * full-text search articles 49 | * get statistics on feed and user activity 50 | * write plugins to extend its functionality 51 | * skip all the low level stuff and focus on what makes your feed reader different 52 | 53 | ...all these with: 54 | 55 | * a stable, clearly documented API 56 | * excellent test coverage 57 | * fully typed Python 58 | 59 | What *reader* doesn't do: 60 | 61 | * provide an UI 62 | * provide a REST API (yet) 63 | * depend on a web framework 64 | * have an opinion of how/where you use it 65 | 66 | The following exist, but are optional (and frankly, a bit unpolished): 67 | 68 | * a minimal web interface 69 | 70 | * that works even with text-only browsers 71 | * with automatic tag fixing for podcasts (MP3 enclosures) 72 | 73 | * a command-line interface 74 | 75 | .. end-features 76 | 77 | 78 | Documentation: `reader.readthedocs.io`_ 79 | 80 | .. _reader.readthedocs.io: https://reader.readthedocs.io/ 81 | 82 | 83 | Usage: 84 | 85 | .. begin-usage 86 | 87 | .. code-block:: bash 88 | 89 | $ pip install reader 90 | 91 | .. code-block:: python 92 | 93 | >>> from reader import make_reader 94 | >>> 95 | >>> reader = make_reader('db.sqlite') 96 | >>> reader.add_feed('http://www.hellointernet.fm/podcast?format=rss') 97 | >>> reader.update_feeds() 98 | >>> 99 | >>> entries = list(reader.get_entries()) 100 | >>> [e.title for e in entries] 101 | ['H.I. #108: Project Cyclops', 'H.I. #107: One Year of Weird', ...] 102 | >>> 103 | >>> reader.mark_entry_as_read(entries[0]) 104 | >>> 105 | >>> [e.title for e in reader.get_entries(read=False)] 106 | ['H.I. #107: One Year of Weird', 'H.I. #106: Water on Mars', ...] 107 | >>> [e.title for e in reader.get_entries(read=True)] 108 | ['H.I. #108: Project Cyclops'] 109 | >>> 110 | >>> reader.update_search() 111 | >>> 112 | >>> for e in reader.search_entries('year', limit=3): 113 | ... title = e.metadata.get('.title') 114 | ... print(title.value, title.highlights) 115 | ... 116 | H.I. #107: One Year of Weird (slice(15, 19, None),) 117 | H.I. #52: 20,000 Years of Torment (slice(17, 22, None),) 118 | H.I. #83: The Best Kind of Prison () 119 | 120 | .. end-usage 121 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = reader 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | 2 | .. include:: ../CHANGES.rst 3 | -------------------------------------------------------------------------------- /docs/cli.rst: -------------------------------------------------------------------------------- 1 | 2 | Command-line interface 3 | ====================== 4 | 5 | *reader* comes with a command-line interface 6 | that exposes basic management functionality. 7 | 8 | 9 | .. warning:: 10 | 11 | The CLI is is not fully stable, 12 | see the :ref:`roadmap ` for details. 13 | 14 | .. note:: 15 | 16 | The command-line interface is optional, use the ``cli`` extra to install 17 | its :ref:`dependencies `. 18 | 19 | Most commands need a database to work. The following are equivalent: 20 | 21 | .. code-block:: bash 22 | 23 | python -m reader --db /path/to/db some-command 24 | READER_DB=/path/to/db python -m reader some-command 25 | 26 | If no database path is given, ``~/.config/reader/db.sqlite`` is used 27 | (at least on Linux). 28 | 29 | Add a feed: 30 | 31 | .. code-block:: bash 32 | 33 | python -m reader add http://www.example.com/atom.xml 34 | 35 | Update all feeds: 36 | 37 | .. code-block:: bash 38 | 39 | python -m reader update 40 | 41 | Serve the web application locally (at http://localhost:8080/): 42 | 43 | .. code-block:: bash 44 | 45 | python -m reader serve 46 | 47 | 48 | .. _cli-update: 49 | 50 | Updating feeds 51 | -------------- 52 | 53 | For *reader* to actually be useful as a feed reader, feeds need to get updated 54 | and, if full-text search is enabled, the search index needs to be updated. 55 | 56 | You can run the ``update`` command regularly to update feeds (e.g. every 57 | hour). Note that *reader* uses the ETag and Last-Modified headers, so, if 58 | supported by the the server, feeds will only be downloaded if they changed. 59 | 60 | To avoid waiting too much for a new feed to be updated, you can run 61 | ``update --new`` more often (e.g. every minute); this will update 62 | only newly-added feeds. This is also a good time to update the search index. 63 | 64 | You can achieve this using cron:: 65 | 66 | 42 * * * * reader update -v 2>&1 >>"/tmp/$LOGNAME.reader.update.hourly.log" 67 | * * * * * reader update -v --new 2>&1 >>"/tmp/$LOGNAME.reader.update.new.log"; reader search update 2>&1 >>"/tmp/$LOGNAME.reader.search.update.log" 68 | 69 | If you are running *reader* on a personal computer, it might also be convenient 70 | to run ``update`` once immediately after boot:: 71 | 72 | @reboot sleep 60; reader update -v 2>&1 >>"/tmp/$LOGNAME.reader.update.boot.log" 73 | 74 | 75 | Reference 76 | --------- 77 | 78 | .. click:: reader._cli:cli 79 | :prog: reader 80 | :show-nested: 81 | -------------------------------------------------------------------------------- /docs/config.rst: -------------------------------------------------------------------------------- 1 | 2 | Configuration 3 | ============= 4 | 5 | Both the :doc:`CLI ` and the :doc:`web application ` can 6 | be configured from a file. 7 | 8 | .. warning:: 9 | 10 | The configuration file format is not stable yet 11 | and might change without any notice. 12 | 13 | .. note:: 14 | 15 | Configuration file loading dependencies get installed automatically when 16 | installing the CLI or the web application 17 | :ref:`extras `. 18 | 19 | 20 | The configuration file path can be specified either through the ``--config`` 21 | CLI option or through the ``READER_CONFIG`` environment variable 22 | (also usable with the web application). 23 | 24 | The config file is split in contexts; 25 | this allows having a set of global defaults 26 | and overriding them with CLI- or web-app-specific values. 27 | Use the ``config dump --merge`` command 28 | to see the final configuration for each context. 29 | 30 | The older ``READER_DB``, ``READER_PLUGIN``, and ``READER_APP_PLUGIN`` 31 | environment variables always *replace* the corresponding config values, 32 | so they should be used only for debugging. 33 | 34 | The following example shows the config file structure 35 | and the options currently available: 36 | 37 | .. literalinclude:: ../examples/config.yaml 38 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /docs/dev-app.rst: -------------------------------------------------------------------------------- 1 | 2 | Web interface design philosophy 3 | ------------------------------- 4 | 5 | The web interface should be as minimal as possible. 6 | 7 | The web interface should work with text-only browsers, modern browsers, and 8 | everything in-between. Some may be nicer to use, but all functionality should 9 | be available everywhere. 10 | 11 | Fast and ugly is better than slow and pretty. 12 | 13 | It should be possible to build a decent web interface (at least for reader) 14 | using only HTML forms with a few JavaScript enhancements added on top. 15 | 16 | 17 | 2023 update: `Hypermedia Systems`_ and `htmx`_ seem to embody these ideas 18 | in a much better way than I could; 19 | a potential web app re-design will likely use them. 20 | 21 | 22 | .. _Hypermedia Systems: https://hypermedia.systems/ 23 | .. _htmx: https://htmx.org/ 24 | 25 | 26 | User interactions 27 | ~~~~~~~~~~~~~~~~~ 28 | 29 | .. note:: 30 | 31 | This list might lag behind reality; anyway, it all started from here. 32 | 33 | User interactions, by logical groups: 34 | 35 | * entry 36 | 37 | * mark an entry as read 38 | * mark an entry as unread 39 | * go to an entry's link 40 | * go to an entry's feed 41 | * go to an entry's feed link 42 | 43 | * entry list 44 | 45 | * see the latest unread entries 46 | * see the latest read entries 47 | * see the latest entries 48 | 49 | * entry list (feed) 50 | 51 | * mark all the entries as read 52 | * mark all the entries as unread 53 | 54 | * feed 55 | 56 | * add a feed 57 | * delete a feed 58 | * change a feed's title 59 | * go to a feed's entries 60 | * go to a feed's link 61 | 62 | * feed list 63 | 64 | * see a list of all the feeds 65 | 66 | * other 67 | 68 | * be notified of the success/failure of a previous action 69 | 70 | Controls (below), mapped to user interactions: 71 | 72 | * link 73 | 74 | * go to ... 75 | * see ... 76 | 77 | * simple button 78 | 79 | * mark an entry as read 80 | * mark an entry as unread 81 | 82 | * button with input 83 | 84 | * add a feed 85 | * change a feed's title 86 | 87 | * button with checkbox 88 | 89 | * mark all the entries are read 90 | * mark all the entries are unread 91 | * delete a feed 92 | 93 | 94 | Controls 95 | ~~~~~~~~ 96 | 97 | There are three interaction modes, HTML-only, HTML+CSS, and HTML+CSS+JS. 98 | Each mode adds enhancements on top of the previous one. 99 | 100 | In the HTML-only mode, all elements of a control are visible. Clicking the 101 | element that triggers the action (e.g. a button) submits a form and, if 102 | possible, redirects back to the source page, with any error messages shown 103 | after the action element. 104 | 105 | In the HTML+CSS mode, some elements might be hidden so that only the action 106 | element is visible; in its inert state it should look like text. On hover, 107 | the other elements of the control should become visible. 108 | 109 | In the HTML+CSS+JS mode, clicking the action element results in an asynchronous 110 | call, with the status of the action displayed after it. 111 | 112 | Links are just links. 113 | 114 | Simple buttons consist of a single button. 115 | 116 | Buttons with input consist of an text input element followed by a button. 117 | The text input are hidden when not hovered. 118 | 119 | Buttons with checkbox consist of a checkbox, a label for the checkbox, and 120 | a button. The checkbox and label are hidden when not hovered. 121 | 122 | 123 | Page structure 124 | ~~~~~~~~~~~~~~ 125 | 126 | Text TBD. 127 | 128 | .. figure:: images/redesign-01.png 129 | :width: 240px 130 | :alt: page structure, controls 131 | 132 | page structure, controls 133 | 134 | 135 | Pages 136 | ~~~~~ 137 | 138 | Text TBD. 139 | -------------------------------------------------------------------------------- /docs/images/redesign-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/images/redesign-01.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | reader 3 | ====== 4 | 5 | .. include:: ../README.rst 6 | :start-after: begin-intro 7 | :end-before: end-intro 8 | 9 | 10 | Features 11 | -------- 12 | 13 | .. include:: ../README.rst 14 | :start-after: begin-features 15 | :end-before: end-features 16 | 17 | 18 | Quickstart 19 | ---------- 20 | 21 | What does it look like? Here is an example of *reader* in use: 22 | 23 | .. include:: ../README.rst 24 | :start-after: begin-usage 25 | :end-before: end-usage 26 | 27 | 28 | User guide 29 | ---------- 30 | 31 | This part of the documentation guides you through all of the library’s usage patterns. 32 | 33 | .. toctree:: 34 | :maxdepth: 2 35 | 36 | why 37 | install 38 | tutorial 39 | guide 40 | 41 | 42 | API reference 43 | ------------- 44 | 45 | If you are looking for information on a specific function, class, or method, 46 | this part of the documentation is for you. 47 | 48 | .. toctree:: 49 | :maxdepth: 2 50 | 51 | api 52 | internal 53 | 54 | 55 | Unstable features 56 | ----------------- 57 | 58 | The following are optional features that are still being worked on. 59 | They may become their own packages, get merged into the main library, 60 | or be removed in the future. 61 | 62 | .. toctree:: 63 | :maxdepth: 2 64 | 65 | cli 66 | app 67 | config 68 | plugins 69 | 70 | 71 | Project information 72 | ------------------- 73 | 74 | *reader* is released under the :gh:`BSD ` license, 75 | its documentation lives at `Read the Docs`_, 76 | the code on `GitHub`_, 77 | and the latest release on `PyPI`_. 78 | It is rigorously tested on Python |min_python|\+ and PyPy. 79 | 80 | 81 | .. _Read the Docs: https://reader.readthedocs.io/ 82 | .. _GitHub: https://github.com/lemon24/reader 83 | .. _PyPI: https://pypi.org/project/reader/ 84 | 85 | 86 | .. toctree:: 87 | :maxdepth: 2 88 | 89 | contributing 90 | dev 91 | changelog 92 | 93 | 94 | Indices and tables 95 | ================== 96 | 97 | * :ref:`genindex` 98 | * :ref:`search` 99 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | 2 | Installation 3 | ============ 4 | 5 | Python versions 6 | --------------- 7 | 8 | *reader* supports Python |min_python| and newer, and PyPy. 9 | 10 | 11 | Dependencies 12 | ------------ 13 | 14 | These packages will be installed automatically when installing *reader*: 15 | 16 | * `feedparser`_ parses feeds; *reader* is essentially feedparser + state. 17 | * `requests`_ retrieves feeds from the internet; 18 | it replaces feedparser's default use of :mod:`urllib` 19 | to make it easier to write plugins. 20 | * `werkzeug`_ provides HTTP utilities. 21 | * `iso8601`_ parses dates in ISO 8601 / RFC 3339; used for JSON Feed parsing. 22 | * `beautifulsoup4`_ is used to strip HTML tags before adding entries 23 | to the search index. 24 | * `typing-extensions`_ is used for :mod:`typing` backports. 25 | 26 | *reader* also depends on the :mod:`sqlite3` standard library module 27 | (at least SQLite 3.18 with the `JSON1`_ and `FTS5`_ extensions). 28 | 29 | 30 | .. _no-vendored-feedparser: 31 | 32 | .. note:: 33 | 34 | Because `feedparser`_ makes PyPI releases at a lower cadence, 35 | *reader* uses a vendored version of feedparser's `develop`_ branch 36 | by default since :ref:`version 2.9`. 37 | To opt out of this behavior, and make *reader* use 38 | the installed ``feedparser`` package, 39 | set the ``READER_NO_VENDORED_FEEDPARSER`` environment variable to ``1``. 40 | 41 | .. _develop: https://github.com/kurtmckee/feedparser 42 | 43 | 44 | .. _optional dependencies: 45 | 46 | Optional dependencies 47 | ~~~~~~~~~~~~~~~~~~~~~ 48 | 49 | Despite coming with a CLI and web application, *reader* is primarily a library. 50 | As such, most dependencies are optional, and can be installed as `extras`_. 51 | 52 | As of version |version|, *reader* has the following extras: 53 | 54 | * ``cli`` installs the dependencies needed for the 55 | :doc:`command-line interface `. 56 | * ``app`` installs the dependencies needed for the 57 | :doc:`web application `. 58 | * Specific plugins may require additional dependencies; 59 | refer to their documentation for details. 60 | 61 | 62 | .. _beautifulsoup4: https://www.crummy.com/software/BeautifulSoup/ 63 | .. _feedparser: https://feedparser.readthedocs.io/en/latest/ 64 | .. _requests: https://requests.readthedocs.io/ 65 | .. _werkzeug: https://werkzeug.palletsprojects.com/ 66 | .. _iso8601: http://pyiso8601.readthedocs.org/ 67 | .. _typing-extensions: https://pypi.org/project/typing-extensions/ 68 | .. _JSON1: https://www.sqlite.org/json1.html 69 | .. _FTS5: https://www.sqlite.org/fts5.html 70 | 71 | .. _extras: https://www.python.org/dev/peps/pep-0508/#extras 72 | 73 | 74 | Virtual environments 75 | -------------------- 76 | 77 | You should probably install *reader* inside a virtual environment; 78 | see `this `_ for how and why to do it. 79 | 80 | .. _venv: https://flask.palletsprojects.com/en/1.1.x/installation/#virtual-environments 81 | 82 | 83 | Install reader 84 | -------------- 85 | 86 | Use the following command to install *reader*, 87 | along with its required dependencies: 88 | 89 | .. code-block:: bash 90 | 91 | pip install reader 92 | 93 | Use the following command to install *reader* 94 | with `optional dependencies `_: 95 | 96 | .. code-block:: bash 97 | 98 | pip install 'reader[some-extra,...]' 99 | 100 | 101 | Update reader 102 | ~~~~~~~~~~~~~ 103 | 104 | Use the following command to update *reader* 105 | (add any extras as needed): 106 | 107 | .. code-block:: bash 108 | 109 | pip install --upgrade reader 110 | 111 | 112 | Living on the edge 113 | ~~~~~~~~~~~~~~~~~~ 114 | 115 | If you want to use the latest *reader* code before it’s released, 116 | install or update from the master branch: 117 | 118 | .. code-block:: bash 119 | 120 | pip install --upgrade https://github.com/lemon24/reader/archive/master.tar.gz 121 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=reader 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/screenshots/dillo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/dillo.png -------------------------------------------------------------------------------- /docs/screenshots/entries-feed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/entries-feed.png -------------------------------------------------------------------------------- /docs/screenshots/entries-v2-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/entries-v2-dark.png -------------------------------------------------------------------------------- /docs/screenshots/entries-v2-filters-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/entries-v2-filters-light.png -------------------------------------------------------------------------------- /docs/screenshots/entries.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/entries.png -------------------------------------------------------------------------------- /docs/screenshots/entry-one.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/entry-one.png -------------------------------------------------------------------------------- /docs/screenshots/entry-two.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/entry-two.png -------------------------------------------------------------------------------- /docs/screenshots/feeds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/feeds.png -------------------------------------------------------------------------------- /docs/screenshots/lynx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/lynx.png -------------------------------------------------------------------------------- /docs/screenshots/search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/search.png -------------------------------------------------------------------------------- /docs/why.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | Why *reader*? 4 | ============= 5 | 6 | 7 | Why use a feed reader library? 8 | ------------------------------ 9 | 10 | Have you been unhappy with existing feed readers and wanted to make your own, but: 11 | 12 | * never knew where to start? 13 | * it seemed like too much work? 14 | * you don't like writing backend code? 15 | 16 | Are you already working with `feedparser`_, but: 17 | 18 | * want an easier way to store, filter, sort and search feeds and entries? 19 | * want to get back type-annotated objects instead of dicts? 20 | * want to restrict or deny file-system access? 21 | * want to change the way feeds are retrieved by using the more familiar `requests`_ library? 22 | * want to also support `JSON Feed`_? 23 | * want to support custom information sources? 24 | 25 | ... while still supporting all the feed types feedparser does? 26 | 27 | If you answered yes to any of the above, *reader* can help. 28 | 29 | 30 | .. _feedparser: https://feedparser.readthedocs.io/en/latest/ 31 | .. _requests: https://requests.readthedocs.io 32 | .. _JSON Feed: https://jsonfeed.org/ 33 | 34 | 35 | 36 | .. _philosophy: 37 | 38 | The *reader* philosophy 39 | ----------------------- 40 | 41 | * *reader* is a library 42 | * *reader* is for the long term 43 | * *reader* is extensible 44 | * *reader* is stable (within reason) 45 | * *reader* is simple to use; API matters 46 | * *reader* features work well together 47 | * *reader* is tested 48 | * *reader* is documented 49 | * *reader* has minimal dependencies 50 | 51 | 52 | 53 | Why make your own feed reader? 54 | ------------------------------ 55 | 56 | So you can: 57 | 58 | * have full control over your data 59 | * control what features it has or doesn't have 60 | * decide how much you pay for it 61 | * make sure it doesn't get closed while you're still using it 62 | * really, it's `easier than you think`_ 63 | 64 | Obviously, this may not be your cup of tea, but if it is, *reader* can help. 65 | 66 | 67 | .. _easier than you think: https://rachelbythebay.com/w/2011/10/26/fred/ 68 | 69 | 70 | 71 | Why make a feed reader library? 72 | ------------------------------- 73 | 74 | I wanted a feed reader that is: 75 | 76 | * accessible from multiple devices 77 | * fast 78 | * with a simple UI 79 | * self-hosted (for privacy reasons) 80 | * modular / easy to extend (so I can change stuff I don't like) 81 | * written in Python (see above) 82 | 83 | The fact that I couldn't find one extensible enough bugged me so much that I decided to make my own; a few years later, I ended up with what I would've liked to use when I first started. 84 | -------------------------------------------------------------------------------- /examples/config.yaml: -------------------------------------------------------------------------------- 1 | # Contexts are values of the top level map. 2 | # There are 3 known contexts: default, cli, and app. 3 | # 4 | # The default context can also be implicit: top level keys that don't 5 | # correspond to a known context are assumed to belong to the default context. 6 | # 7 | # Thus, the following are equivalent: 8 | # 9 | # default: 10 | # reader: ... 11 | # something else: ... 12 | # 13 | # --- 14 | # 15 | # reader: ... 16 | # something else: ... 17 | # 18 | # However, mixing them is an error: 19 | # 20 | # default: 21 | # reader: ... 22 | # something else: ... 23 | 24 | 25 | # default context. 26 | # 27 | # Provides default settings for the other contexts. 28 | 29 | default: 30 | # The reader section contains make_reader() keyword arguments: 31 | reader: 32 | url: /path/to/db.sqlite 33 | feed_root: /path/to/feeds 34 | 35 | # Additionally, it's possible to specify reader plugins, as a 36 | # : 37 | # map; options are ignored at the moment. 38 | # Note that unlike other settings, plugins are merged, not replaced. 39 | plugins: 40 | reader._plugins.sqlite_releases:init: 41 | reader.ua_fallback: 42 | 43 | 44 | # CLI context. 45 | 46 | cli: 47 | # When using the CLI, we want to use some additional reader plugins. 48 | reader: 49 | plugins: 50 | reader.mark_as_read: 51 | reader.entry_dedupe: 52 | 53 | # The cli context also allows changing the CLI defaults. 54 | defaults: 55 | # Note that while the --db and --plugin CLI options could appear here, 56 | # doing it isn't very usful, since the CLI values (including defaults) 57 | # always override the corresponding config file values. 58 | 59 | # Options that can be passed multiple times take a list of values: 60 | # --plugin reader._plugins.enclosure_dedupe:enclosure_dedupe 61 | # plugin: [reader._plugins.enclosure_dedupe:enclosure_dedupe] 62 | 63 | # Subcommand defaults can be given as nested maps: 64 | 65 | # add --update 66 | add: 67 | # Flags take a boolean value: 68 | update: yes 69 | 70 | # update --workers 10 -vv 71 | update: 72 | workers: 10 73 | # Flags that can be repeated take an integer: 74 | verbose: 2 75 | 76 | search: 77 | # search update -v 78 | update: 79 | verbose: 1 80 | 81 | # serve --port 8888 82 | serve: 83 | port: 8888 84 | 85 | 86 | # Web application context. 87 | # 88 | # Used for both the serve command (`python -m reader serve`) 89 | # and when using the WSGI application (reader._app.wsgi:app) directly. 90 | 91 | app: 92 | # When using the web app, we want to use an additional reader plugin. 93 | reader: 94 | plugins: 95 | reader.enclosure_dedupe: 96 | 97 | # ... and some app plugins. 98 | plugins: 99 | reader._plugins.enclosure_tags:init: 100 | reader._plugins.preview_feed_list:init: 101 | -------------------------------------------------------------------------------- /examples/custom_headers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adding custom headers when retrieving feeds 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | Example of adding custom request headers with :attr:`.SessionFactory.request_hooks`: 6 | 7 | .. code-block:: console 8 | 9 | $ python examples/custom_headers.py 10 | updating... 11 | server: Hello, world! 12 | updated! 13 | 14 | """ 15 | 16 | # fmt: off 17 | # flake8: noqa 18 | 19 | import http.server 20 | import threading 21 | from reader import make_reader 22 | 23 | # start a background server that logs the received header 24 | 25 | class Handler(http.server.BaseHTTPRequestHandler): 26 | def log_message(self, *_): pass 27 | def do_GET(self): 28 | print("server:", self.headers.get('my-header')) 29 | self.send_error(304) 30 | 31 | server = http.server.HTTPServer(('localhost', 8080), Handler) 32 | threading.Thread(target=server.handle_request).start() 33 | 34 | # create a reader object 35 | 36 | reader = make_reader(':memory:') 37 | reader.add_feed('http://localhost:8080') 38 | 39 | # set up a hook that adds the header to each request 40 | 41 | def hook(session, request, **kwargs): 42 | request.headers.setdefault('my-header', 'Hello, world!') 43 | 44 | reader._parser.session_factory.request_hooks.append(hook) 45 | 46 | # updating the feed sends the modified request to the server 47 | 48 | print("updating...") 49 | reader.update_feeds() 50 | print("updated!") 51 | -------------------------------------------------------------------------------- /examples/feed_slugs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Feed slugs 3 | ~~~~~~~~~~ 4 | 5 | This is a recipe of what a "get feed by slug" plugin may look like 6 | (e.g. for user-defined short URLs). 7 | 8 | Usage:: 9 | 10 | >>> from reader import make_reader 11 | >>> import feed_slugs 12 | >>> reader = make_reader('db.sqlite', plugins=[feed_slugs.init_reader]) 13 | >>> reader.set_feed_slug('https://death.andgravity.com/_feed/index.xml', 'andgravity') 14 | >>> reader.get_feed_by_slug('andgravity') 15 | Feed(url='https://death.andgravity.com/_feed/index.xml', ...) 16 | >>> reader.get_feed_slug(_.url) 17 | 'andgravity' 18 | 19 | .. 20 | Originally implemented for https://github.com/lemon24/reader/issues/358. 21 | 22 | """ 23 | 24 | # fmt: off 25 | # flake8: noqa 26 | 27 | def init_reader(reader): 28 | # __get__() allows help(reader.get_feed_by_slug) to work 29 | reader.get_feed_by_slug = get_feed_by_slug.__get__(reader) 30 | reader.get_feed_slug = get_feed_slug.__get__(reader) 31 | reader.set_feed_slug = set_feed_slug.__get__(reader) 32 | 33 | def get_feed_by_slug(reader, slug): 34 | tag = _make_tag(reader, slug) 35 | return next(reader.get_feeds(tags=[tag], limit=1), None) 36 | 37 | def get_feed_slug(reader, feed): 38 | if tag := next(_get_tags(reader, feed), None): 39 | return tag.removeprefix(_make_tag(reader, '')) 40 | return None 41 | 42 | def set_feed_slug(reader, feed, slug: str | None): 43 | feed = reader.get_feed(feed) 44 | tag = _make_tag(reader, slug) 45 | 46 | if not slug: 47 | reader.delete_tag(feed, tag, missing_ok=True) 48 | return 49 | 50 | reader.set_tag(feed, tag) 51 | 52 | # ensure only one feed has the slug; technically a race condition, 53 | # when it happens no feed will have the tag 54 | for other_feed in reader.get_feeds(tags=[tag]): 55 | if feed.url != other_feed.url: 56 | reader.delete_tag(other_feed, tag, missing_ok=True) 57 | 58 | # ensure feed has only one slug; technically a race condition, 59 | # when it happens the feed will have no slug 60 | for other_tag in _get_tags(reader, feed): 61 | if tag != other_tag: 62 | reader.delete_tag(feed, other_tag, missing_ok=True) 63 | 64 | def _make_tag(reader, slug): 65 | return reader.make_plugin_reserved_name('slug', slug) 66 | 67 | def _get_tags(reader, resource): 68 | prefix = _make_tag(reader, '') 69 | # filter tags by prefix would make this faster, 70 | # https://github.com/lemon24/reader/issues/309 71 | return (t for t in reader.get_tag_keys(resource) if t.startswith(prefix)) 72 | 73 | if __name__ == '__main__': 74 | from reader import make_reader 75 | 76 | reader = make_reader('db.sqlite', plugins=[init_reader]) 77 | url = 'https://death.andgravity.com/_feed/index.xml' 78 | 79 | reader.set_feed_slug(url, 'one') 80 | print( 81 | reader.get_feed_slug(url), 82 | getattr(reader.get_feed_by_slug('one'), 'url', None), 83 | ) 84 | 85 | reader.set_feed_slug(url, 'two') 86 | print( 87 | reader.get_feed_slug(url), 88 | getattr(reader.get_feed_by_slug('two'), 'url', None), 89 | ) 90 | 91 | reader.set_feed_slug('https://xkcd.com/atom.xml', 'two') 92 | print( 93 | reader.get_feed_slug(url), 94 | getattr(reader.get_feed_by_slug('two'), 'url', None), 95 | ) 96 | -------------------------------------------------------------------------------- /examples/parser_only.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parsing a feed retrieved with something other than *reader* 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | Example of using the *reader* internal API to parse a feed 6 | retrieved asynchronously with `HTTPX `_: 7 | 8 | .. code-block:: console 9 | 10 | $ python examples/parser_only.py 11 | death and gravity 12 | Has your password been pwned? Or, how I almost failed to search a 37 GB text file in under 1 millisecond (in Python) 13 | 14 | """ 15 | 16 | import asyncio 17 | import io 18 | import httpx 19 | from reader._parser import default_parser 20 | from werkzeug.http import parse_options_header 21 | 22 | url = "https://death.andgravity.com/_feed/index.xml" 23 | meta_parser = default_parser() 24 | 25 | 26 | async def main(): 27 | async with httpx.AsyncClient() as client: 28 | response = await client.get(url) 29 | 30 | # to select the parser, we need the MIME type of the response 31 | content_type = response.headers.get('content-type') 32 | if content_type: 33 | mime_type, _ = parse_options_header(content_type) 34 | else: 35 | mime_type = None 36 | 37 | # select the parser (raises ParseError if none found) 38 | parser, _ = meta_parser.get_parser(url, mime_type) 39 | 40 | # wrap the content in a readable binary file 41 | file = io.BytesIO(response.content) 42 | 43 | # parse the feed; not doing parser(url, file, response.headers) directly 44 | # because parsing is CPU-intensive and would block the event loop 45 | feed, entries = await asyncio.to_thread(parser, url, file, response.headers) 46 | 47 | print(feed.title) 48 | print(entries[0].title) 49 | 50 | 51 | if __name__ == '__main__': 52 | asyncio.run(main()) 53 | -------------------------------------------------------------------------------- /examples/podcast.py: -------------------------------------------------------------------------------- 1 | """ 2 | Use *reader* to download all the episodes of a podcast, 3 | and then each new episode as they come up. 4 | 5 | Part of https://reader.readthedocs.io/en/latest/tutorial.html 6 | 7 | """ 8 | 9 | import os 10 | import os.path 11 | import shutil 12 | 13 | import requests 14 | 15 | from reader import make_reader 16 | 17 | 18 | feed_url = "http://www.hellointernet.fm/podcast?format=rss" 19 | podcasts_dir = "podcasts" 20 | 21 | reader = make_reader("db.sqlite") 22 | 23 | 24 | def add_and_update_feed(): 25 | reader.add_feed(feed_url, exist_ok=True) 26 | reader.update_feeds() 27 | 28 | 29 | def download_everything(): 30 | entries = reader.get_entries(feed=feed_url, has_enclosures=True, read=False) 31 | 32 | for entry in entries: 33 | print(entry.feed.title, '-', entry.title) 34 | 35 | for enclosure in entry.enclosures: 36 | filename = enclosure.href.rpartition('/')[2] 37 | print(" *", filename) 38 | download_file(enclosure.href, os.path.join(podcasts_dir, filename)) 39 | 40 | reader.mark_entry_as_read(entry) 41 | 42 | 43 | def download_file(src_url, dst_path): 44 | part_path = dst_path + '.part' 45 | with requests.get(src_url, stream=True) as response: 46 | response.raise_for_status() 47 | try: 48 | with open(part_path, 'wb') as file: 49 | shutil.copyfileobj(response.raw, file) 50 | os.rename(part_path, dst_path) 51 | except BaseException: 52 | try: 53 | os.remove(part_path) 54 | except Exception: 55 | pass 56 | raise 57 | 58 | 59 | add_and_update_feed() 60 | 61 | feed = reader.get_feed(feed_url) 62 | print(f"updated {feed.title} (last changed at {feed.updated})\n") 63 | 64 | os.makedirs(podcasts_dir, exist_ok=True) 65 | download_everything() 66 | -------------------------------------------------------------------------------- /examples/terminal.py: -------------------------------------------------------------------------------- 1 | """ 2 | A simple terminal feed reader that shows a screenful of articles 3 | and updates every 10 minutes. 4 | 5 | Run with:: 6 | 7 | python examples/terminal.py db.sqlite 8 | 9 | To add feeds, run:: 10 | 11 | python -m reader --db db.sqlite add http://example.com/feed.xml 12 | 13 | 14 | """ 15 | 16 | import logging 17 | import os 18 | import sys 19 | import textwrap 20 | import time 21 | import itertools 22 | 23 | from reader import make_reader 24 | 25 | 26 | def get_lines(reader): 27 | size = os.get_terminal_size() 28 | 29 | # Only take as many entries as we have lines. 30 | entries = reader.get_entries(limit=size.lines - 1) 31 | 32 | lines = ( 33 | line 34 | for entry in entries 35 | for line in textwrap.wrap( 36 | f"{(entry.published or entry.updated or entry.added).date()} - " 37 | f"{entry.feed.title} - {entry.title}", 38 | width=size.columns, 39 | ) 40 | ) 41 | return itertools.islice(lines, size.lines - 1) 42 | 43 | 44 | def print_status_line(message, seconds): 45 | print(message, end="", flush=True) 46 | time.sleep(seconds) 47 | length = len(message) 48 | print("\b" * length, " " * length, "\b" * length, sep="", end="", flush=True) 49 | 50 | 51 | reader = make_reader(sys.argv[1]) 52 | 53 | # Prevent update errors from showing. 54 | logging.basicConfig(level=logging.CRITICAL) 55 | 56 | update_interval = 60 * 10 57 | last_updated = time.monotonic() - update_interval 58 | 59 | while True: 60 | # Clear screen; should be cross-platform. 61 | os.system("cls || clear") 62 | 63 | print(*get_lines(reader), sep="\n") 64 | 65 | # Keep sleeping until we need to update. 66 | while True: 67 | now = time.monotonic() 68 | if now - last_updated > update_interval: 69 | break 70 | to_sleep = update_interval - (now - last_updated) 71 | message = f"updating in {int(to_sleep // 60) + 1} minutes ..." 72 | print_status_line(message, 60) 73 | 74 | print("updating ...", end="", flush=True) 75 | last_updated = time.monotonic() 76 | reader.update_feeds(workers=10) 77 | -------------------------------------------------------------------------------- /scripts/backup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # back up a SQLite database 4 | # 5 | # usage: 6 | # ./backup.sh src dst 7 | # ./backup.sh src 8 | # 9 | # example: 10 | # "./backup.sh /src/db.sqlite" -> ./db.sqlite.2023-01-28.gz 11 | # 12 | 13 | set -o nounset 14 | set -o pipefail 15 | set -o errexit 16 | 17 | if (( $# == 1 )); then 18 | src=$1 19 | dst=$( pwd )/$( basename "$src" ).$( date -u +%Y-%m-%d ) 20 | elif (( $# == 2 )); then 21 | src=$1 22 | dst=$2 23 | else 24 | exit 1 25 | fi 26 | 27 | tmpdir=$( mktemp -d ) 28 | trap 'rm -rf '"$tmpdir" EXIT 29 | 30 | tmp=$tmpdir/$( basename "$src" ) 31 | 32 | du -sh "$src" 33 | sqlite3 "$src" "VACUUM INTO '$tmp'" 34 | du -sh "$tmp" 35 | gzip -c "$tmp" > "$dst.gz" 36 | du -sh "$dst.gz" 37 | -------------------------------------------------------------------------------- /scripts/generate_import_all.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import random 3 | 4 | 5 | context = {} 6 | exec('from reader import *', context) 7 | context.pop('__builtins__') 8 | 9 | print("# importing stuff from reader should type check") 10 | print("# force mypy to check this every time:", random.random()) 11 | 12 | for name, value in context.items(): 13 | if inspect.ismodule(value): 14 | continue 15 | print('from reader import', name) 16 | -------------------------------------------------------------------------------- /scripts/jscontrols.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import sys 3 | 4 | import werkzeug 5 | from flask import flash 6 | from flask import Flask 7 | from flask import jsonify 8 | from flask import redirect 9 | from flask import request 10 | 11 | 12 | root_dir = os.path.dirname(__file__) 13 | sys.path.insert(0, os.path.join(root_dir, '../src')) 14 | 15 | from reader._app.api_thing import APIError 16 | from reader._app.api_thing import APIThing 17 | 18 | 19 | app = Flask( 20 | __name__, 21 | template_folder='../src/reader/_app/templates', 22 | static_folder='../src/reader/_app/static', 23 | ) 24 | app.secret_key = 'secret' 25 | 26 | 27 | @app.route('/') 28 | def root(): 29 | with open(os.path.join(root_dir, 'jscontrols.html')) as f: 30 | template_string = f.read() 31 | return app.jinja_env.from_string(template_string).render() 32 | 33 | 34 | form = APIThing(app, '/form', 'form') 35 | 36 | 37 | @form 38 | def simple(data): 39 | return 'simple' 40 | 41 | 42 | @form 43 | def simple_next(data): 44 | return 'simple-next: %s' % data['next'] 45 | 46 | 47 | @form(really=True) 48 | def confirm(data): 49 | return 'confirm' 50 | 51 | 52 | @form 53 | def text(data): 54 | text = data['text'] 55 | if text.startswith('err'): 56 | raise APIError(text, 'category') 57 | return 'text: %s' % text 58 | 59 | 60 | @form(really=True) 61 | def text_confirm(data): 62 | text = data['text'] 63 | if text.startswith('err'): 64 | raise APIError(text, 'category') 65 | return 'text confirm: %s' % text 66 | -------------------------------------------------------------------------------- /scripts/lines.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Print various Python and wc lines of code. 4 | 5 | function sloc { 6 | coverage report \ 7 | | grep -A9999 ^--- \ 8 | | grep -B9999 ^--- \ 9 | | grep -v ^-- \ 10 | | awk '{ print $1 "\t" $2 }' 11 | } 12 | 13 | function count { 14 | sloc | grep "$@" | cut -f2 | paste -sd+ - | bc 15 | sloc | grep "$@" | cut -f1 | xargs wc -l | tail -n-1 | awk '{ print $1 }' 16 | } 17 | 18 | # cache sloc output 19 | _sloc=$( sloc ) 20 | function sloc { 21 | echo "$_sloc" 22 | } 23 | 24 | { 25 | echo '' stmts lines 26 | echo src $( count ^src/ ) 27 | echo core $( count -e ^src/reader/core/ -e ^src/reader/__init__.py ) 28 | echo cli $( count ^src/reader/cli ) 29 | echo app $( count ^src/reader/app/ ) 30 | echo plugins $( count ^src/reader/plugins/ ) 31 | echo tests $( count ^tests/ ) 32 | echo total $( count '.' ) 33 | } \ 34 | | tr ' ' '\t' 35 | -------------------------------------------------------------------------------- /src/reader/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | reader 3 | ====== 4 | 5 | A minimal feed reader. 6 | 7 | Usage 8 | ----- 9 | 10 | Here is small example of using reader. 11 | 12 | Create a Reader object:: 13 | 14 | reader = make_reader('db.sqlite') 15 | 16 | Add a feed:: 17 | 18 | reader.add_feed('http://www.hellointernet.fm/podcast?format=rss') 19 | 20 | Update all the feeds:: 21 | 22 | reader.update_feeds() 23 | 24 | Get all the entries, both read and unread:: 25 | 26 | entries = list(reader.get_entries()) 27 | 28 | Mark the first entry as read:: 29 | 30 | reader.mark_entry_as_read(entries[0]) 31 | 32 | Print the titles of the unread entries:: 33 | 34 | for e in reader.get_entries(read=False): 35 | print(e.title) 36 | 37 | 38 | """ 39 | 40 | __version__ = '3.19.dev0' 41 | 42 | # isort: off 43 | 44 | from .core import ( 45 | Reader as Reader, 46 | make_reader as make_reader, 47 | ) 48 | 49 | from .types import ( 50 | Feed as Feed, 51 | ExceptionInfo as ExceptionInfo, 52 | Entry as Entry, 53 | Content as Content, 54 | Enclosure as Enclosure, 55 | EntrySource as EntrySource, 56 | EntrySearchResult as EntrySearchResult, 57 | HighlightedString as HighlightedString, 58 | FeedCounts as FeedCounts, 59 | EntryCounts as EntryCounts, 60 | EntrySearchCounts as EntrySearchCounts, 61 | FeedSort as FeedSort, 62 | EntrySort as EntrySort, 63 | EntrySearchSort as EntrySearchSort, 64 | UpdateResult as UpdateResult, 65 | UpdatedFeed as UpdatedFeed, 66 | EntryUpdateStatus as EntryUpdateStatus, 67 | ) 68 | 69 | from .exceptions import ( 70 | ReaderError as ReaderError, 71 | FeedError as FeedError, 72 | FeedExistsError as FeedExistsError, 73 | FeedNotFoundError as FeedNotFoundError, 74 | InvalidFeedURLError as InvalidFeedURLError, 75 | EntryError as EntryError, 76 | EntryExistsError as EntryExistsError, 77 | EntryNotFoundError as EntryNotFoundError, 78 | UpdateError as UpdateError, 79 | ParseError as ParseError, 80 | UpdateHookError as UpdateHookError, 81 | SingleUpdateHookError as SingleUpdateHookError, 82 | UpdateHookErrorGroup as UpdateHookErrorGroup, 83 | StorageError as StorageError, 84 | SearchError as SearchError, 85 | SearchNotEnabledError as SearchNotEnabledError, 86 | InvalidSearchQueryError as InvalidSearchQueryError, 87 | TagError as TagError, 88 | TagNotFoundError as TagNotFoundError, 89 | ResourceNotFoundError as ResourceNotFoundError, 90 | PluginError as PluginError, 91 | InvalidPluginError as InvalidPluginError, 92 | PluginInitError as PluginInitError, 93 | ReaderWarning as ReaderWarning, 94 | ) 95 | 96 | 97 | # For internal use only. 98 | 99 | _CONFIG_ENVVAR = 'READER_CONFIG' 100 | _DB_ENVVAR = 'READER_DB' 101 | _PLUGIN_ENVVAR = 'READER_PLUGIN' 102 | _APP_PLUGIN_ENVVAR = 'READER_APP_PLUGIN' 103 | _CLI_PLUGIN_ENVVAR = 'READER_CLI_PLUGIN' 104 | 105 | 106 | # Constants. 107 | 108 | USER_AGENT = f'python-reader/{__version__} (+https://github.com/lemon24/reader)' 109 | 110 | 111 | # Prevent any logging output by default. If no handler is set, 112 | # the messages bubble up to the root logger and get printed on stderr. 113 | # https://docs.python.org/3/howto/logging.html#library-config 114 | import logging # noqa: E402 115 | 116 | logging.getLogger('reader').addHandler(logging.NullHandler()) 117 | -------------------------------------------------------------------------------- /src/reader/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | CANNOT_IMPORT = """\ 5 | Error: cannot import reader._cli 6 | 7 | This might be due to missing dependencies. The command-line interface is 8 | optional, use the 'cli' extra to install its dependencies: 9 | 10 | pip install reader[cli] 11 | """ 12 | 13 | try: 14 | from reader._cli import cli 15 | 16 | cli(prog_name='python -m reader') 17 | except ImportError: 18 | print(CANNOT_IMPORT, file=sys.stderr) 19 | raise 20 | -------------------------------------------------------------------------------- /src/reader/_app/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | import reader 4 | from reader._cli import setup_logging 5 | 6 | 7 | def make_add_response_headers_middleware(wsgi_app, headers): 8 | def wsgi_app_wrapper(environ, start_response): 9 | def start_response_wrapper(status, response_headers, exc_info=None): 10 | response_headers.extend(headers) 11 | return start_response(status, response_headers, exc_info) 12 | 13 | return wsgi_app(environ, start_response_wrapper) 14 | 15 | return wsgi_app_wrapper 16 | 17 | 18 | @click.command() 19 | @click.pass_obj 20 | @click.option('-h', '--host', default='localhost', help="The interface to bind to.") 21 | @click.option('-p', '--port', default=8080, type=int, help="The port to bind to.") 22 | @click.option( 23 | '--plugin', 24 | multiple=True, 25 | envvar=reader._APP_PLUGIN_ENVVAR, 26 | help="Import path to a web app plug-in. Can be passed multiple times.", 27 | ) 28 | @click.option('-v', '--verbose', count=True) 29 | def serve(config, host, port, plugin, verbose): 30 | """Start a local HTTP reader server.""" 31 | setup_logging(verbose) 32 | from werkzeug.serving import run_simple 33 | 34 | from . import create_app 35 | 36 | if plugin: 37 | config['app']['plugins'] = dict.fromkeys(plugin) 38 | 39 | app = create_app(config) 40 | app.wsgi_app = make_add_response_headers_middleware( 41 | app.wsgi_app, 42 | [('Referrer-Policy', 'same-origin')], 43 | ) 44 | 45 | run_simple(host, port, app) 46 | -------------------------------------------------------------------------------- /src/reader/_app/templates/add_entry.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% import "macros.html" as macros %} 4 | 5 | 6 | {% block page_title %}Add entry to {{ macros.feed_title(feed) }}{% endblock %} 7 | {% block main_title %}Add entry to {{ macros.feed_title(feed) }}{% endblock %} 8 | 9 | 10 | {% block body %} 11 | 12 |
13 | 14 | 15 |
16 | 17 |

18 |

19 |

20 | 21 | 22 | 23 | {# TODO: maybe redirect to entry page? how? – we don't know the entry url yet #} 24 | 25 | 26 |

27 | 28 | 29 |
    30 | 31 | {% for message in get_flashed_messages_by_prefix( 32 | ('add-entry', feed.url), 33 | ) %} 34 |
  • {{ message }} 35 | {% endfor %} 36 | 37 |
38 | 39 | 40 |
41 | 42 | {% endblock %} 43 | -------------------------------------------------------------------------------- /src/reader/_app/templates/entry.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% import "macros.html" as macros %} 4 | 5 | {% block page_title %}Entry: {{ (entry.title or entry.link or entry.id) | trim | striptags }}{% endblock %} 6 | {% block main_title %}Entry: {{ (entry.title or entry.link or entry.id) | striptags }}{% endblock %} 7 | 8 | 9 | {% block body %} 10 | 11 | {% set feed = entry.feed %} 12 | {% set content = entry.get_content() %} 13 | 14 | 15 |
16 | 17 | 18 |
    19 | 20 |
  • 21 | {% if entry.author %} by {{ entry.author }}{% endif %} 22 | in {{ entry.feed_resolved_title or feed.url }} 23 |
  • 24 | {%- set published = entry.published or entry.updated_not_none -%} 25 | {{ published | humanize_naturaltime }} 26 | 27 | {% set next = url_for('.entry', **request.args) %} 28 | {% set context = {'feed-url': feed.url, 'entry-id': entry.id} %} 29 | 30 | {% if entry.read %} 31 | {{ macros.simple_button('.form_api', 'mark-as-unread', 'unread', leave_disabled=true, next=next, context=context, title=entry.read_modified or "not modified") }} 32 | {% else %} 33 | {{ macros.simple_button('.form_api', 'mark-as-read', 'read', leave_disabled=true, next=next, context=context, title=entry.read_modified or "not modified") }} 34 | {% endif %} 35 | 36 | {% if not entry.important %} 37 | {{ macros.simple_button('.form_api', 'mark-as-important', 'important', leave_disabled=true, next=next, context=context, title=entry.important_modified or "not modified") }} 38 | {% endif %} 39 | {% if entry.important is not none %} 40 | {{ macros.simple_button('.form_api', 'clear-important', "clear " + ("important" if entry.important else "don't care"), leave_disabled=true, next=next, context=context) }} 41 | {% endif %} 42 | {% if entry.important is not false %} 43 | {{ macros.simple_button('.form_api', 'mark-as-unimportant', "don't care", leave_disabled=true, next=next, context=context, title=entry.important_modified or "not modified") }} 44 | {% endif %} 45 | 46 | {% if entry.added_by == 'user' %} 47 | {{ macros.confirm_button('.form_api', 'delete-entry', 'delete', leave_disabled=true, next=url_for('.entries', **request.args), context=context) }} 48 | {% endif %} 49 | 50 |
  • 51 | update metadata 52 | 53 | {{ macros.readtime(tags) }} 54 | 55 | 56 | {% for message in get_flashed_messages_by_prefix( 57 | ('mark-as-read', feed.url, entry.id), 58 | ('mark-as-unread', feed.url, entry.id), 59 | ('mark-as-important', feed.url, entry.id), 60 | ('clear-important', feed.url, entry.id), 61 | ('mark-as-unimportant', feed.url, entry.id), 62 | ('delete-entry', feed.url, entry.id), 63 | ) %} 64 |
  • {{ message }} 65 | {% endfor %} 66 | 67 |
68 | 69 | 70 | {# TODO: Also show summary. #} 71 | {# TODO: This allows iframes to show; is it safe? #} 72 | {# TODO: This should be styled somehow. #} 73 | {# TODO: h1 inside article is as big as the h1 in the header. #} 74 | {# TODO: Tables look wonky if they're too wide; hard to fix without cooperation from the html. #} 75 | 76 | 77 | {% if content %} 78 |
79 | {% if not content.is_html -%} 80 |
{{ content.value }}
81 | {%- else -%} 82 | {{ content.value | safe }} 83 | {%- endif %} 84 | 85 |
86 | {% else %} 87 |

no content

88 | {% endif %} 89 | 90 | {{ macros.entry_enclosures(entry) }} 91 | 92 | 93 |

Links: 94 | {% for title, href in additional_links(entry) %} 95 | {{ title }}  96 | {% endfor %} 97 | 98 | 99 | 100 |

101 | {% endblock %} 102 | -------------------------------------------------------------------------------- /src/reader/_app/templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 14 | 15 | {% block page_title %}{% endblock %} 16 | 17 |
18 |
    19 |
  • 20 | entries 21 | feeds 22 | tags 23 | metadata 24 | v2 25 | 26 | {{ macros.text_input_button_get( 27 | 'reader.preview', 'add feed', 'url', 'url', 28 | ) }} 29 | 30 | 31 | {# TODO: this is likely not needed since add-feed became a GET button #} 32 | {% for message in get_flashed_messages_by_prefix('add-feed') %} 33 |
  • {{ message }} 34 | {% endfor %} 35 |
36 | 37 |
38 | 39 | 40 |

{% block main_title %}{% endblock %}

41 | 42 | {% block body %}{% endblock %} 43 | 44 | 45 | 62 | 63 | 64 | {% if config.DEBUG %} 65 |

74 | 75 | 76 | {{ config.READER_CONFIG.merged('app').reader.url }} 77 | 78 | 79 | {% set maxrss = debug_maxrss_mib() %} 80 | 81 | rss={{ maxrss | round(1) }} 82 | 83 | 84 |

85 | {% endif %} 86 | -------------------------------------------------------------------------------- /src/reader/_app/templates/metadata.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% import "macros.html" as macros %} 4 | 5 | 6 | {% macro make_title() %} 7 | {% if feed %} 8 | {% if not entry %} 9 | Metadata for 10 | 12 | {{ macros.feed_title(feed) }} 13 | {% else %} 14 | Metadata for 15 | 16 | {{ entry.title or "untitled" }} 17 | {% endif %} 18 | {% else %} 19 | Global metadata 20 | {% endif %} 21 | {% endmacro %} 22 | 23 | {% block page_title %}{{ make_title() | striptags }}{% endblock %} 24 | {% block main_title %}{{ make_title() }}{% endblock %} 25 | 26 | 27 | {% block body %} 28 | 29 | 30 |
31 |
32 | 33 | 34 | {% if feed %} 35 | 36 | {% if entry %} 37 | 38 | {% endif %} 39 | {% endif %} 40 | 41 |
42 | 43 | 44 |
    45 | 46 | {% set resource_id = entry.resource_id or feed.resource_id or () %} 47 | 48 | {% for message in get_flashed_messages_by_prefix( 49 | ('add-metadata',) + resource_id, 50 | ) %} 51 |
  • {{ message }} 52 | {% endfor %} 53 | 54 |
55 | 56 |
57 | 58 | 59 | {% for key, value in metadata | sort %} 60 | 91 | 92 | {% else %} 93 |

no metadata for this resource

94 | {% endfor %} 95 | 96 | {% endblock %} 97 | -------------------------------------------------------------------------------- /src/reader/_app/templates/tags.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% import "macros.html" as macros %} 4 | 5 | 6 | {% block page_title %}Tags{% endblock %} 7 | {% block main_title %}Tags{% endblock %} 8 | 9 | 10 | {% block body %} 11 | 12 |
13 |
    14 | 15 | {{ macros.toggle_link('counts', [ 16 | ('yes', 'counts'), 17 | ('no', 'no counts'), 18 | ], 'no', '.tags') }} 19 | 20 | {% if error %} 21 |
  • error: {{ error }} 22 | {% endif %} 23 | 24 |
25 |
26 | 27 | 28 |
29 | 30 | {% for tag, feed_counts, entry_counts in tags %} 31 |
32 | {% if tag == none %} all 33 | {% elif tag == true %} any tags 34 | {% elif tag == false %} no tags 35 | {% else %} {{ tag }} 36 | {% endif %} 37 |
38 | 39 |
40 | {% set url_kwargs = {'tags': [tag] | tojson } if tag is not none else {} %} 41 | 42 | {% if feed_counts %} 43 | {{ feed_counts.total }} 44 | {% endif %} 45 | feeds 46 | {%- if feed_counts %}{% endif -%} 47 | , 48 | {% if entry_counts %} 49 | {{ macros.entry_counts(entry_counts, url_for('.entries', **url_kwargs)) }} 50 | {% else %} 51 | entries 52 | {% endif %} 53 | {% if entry_counts %}{% endif %} 54 |
55 | 56 | {% else %} 57 | 58 | {% if not error %} 59 |

no tags

60 | {% endif %} 61 | 62 | {% endfor %} 63 |
64 | 65 | 66 | 67 | {% endblock %} 68 | -------------------------------------------------------------------------------- /src/reader/_app/v2/__init__.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from functools import partial 3 | 4 | from flask import abort 5 | from flask import Blueprint 6 | from flask import current_app 7 | from flask import redirect 8 | from flask import request 9 | from flask import url_for 10 | from jinja2_fragments.flask import render_block 11 | 12 | from reader import InvalidSearchQueryError 13 | 14 | from .. import EntryProxy 15 | from .. import get_reader 16 | from .. import stream_template 17 | from .forms import EntryFilter 18 | from .forms import SearchEntryFilter 19 | 20 | 21 | blueprint = Blueprint( 22 | 'v2', __name__, template_folder='templates', static_folder='static' 23 | ) 24 | 25 | 26 | @blueprint.route('/') 27 | def entries(): 28 | reader = get_reader() 29 | 30 | # TODO: search improvements 31 | # TODO: paqgination 32 | # TODO: read time 33 | 34 | if request.args.get('q', '').strip(): 35 | form = SearchEntryFilter(request.args) 36 | else: 37 | form = EntryFilter(request.args) 38 | 39 | form_args = form.args 40 | if q := form_args.pop('Q', ''): 41 | form_args['q'] = q 42 | return redirect(url_for('.entries', **form_args)) 43 | if form_args != request.args.to_dict(): 44 | return redirect(url_for('.entries', **form_args)) 45 | 46 | feed = None 47 | if form.feed.data: 48 | feed = reader.get_feed(form.feed.data, None) 49 | if not feed: 50 | abort(404) 51 | 52 | kwargs = dict(form.data) 53 | if query := kwargs.pop('search', None): 54 | 55 | def get_entries(**kwargs): 56 | for sr in reader.search_entries(query, **kwargs): 57 | yield EntryProxy(sr, reader.get_entry(sr)) 58 | 59 | else: 60 | get_entries = reader.get_entries 61 | 62 | entries = [] 63 | if form.validate(): 64 | try: 65 | entries = eager_iterator(get_entries(**kwargs, limit=64)) 66 | except StopIteration: 67 | pass 68 | except InvalidSearchQueryError as e: 69 | form.search.errors.append(f"invalid query: {e}") 70 | 71 | return stream_template( 72 | 'v2/entries.html', 73 | form=form, 74 | entries=entries, 75 | feed=feed, 76 | ) 77 | 78 | 79 | def eager_iterator(it): 80 | it = iter(it) 81 | try: 82 | return itertools.chain([next(it)], it) 83 | except StopIteration: 84 | return it 85 | 86 | 87 | @blueprint.route('/mark-as', methods=['POST']) 88 | def mark_as(): 89 | reader = get_reader() 90 | 91 | entry = request.form['feed-url'], request.form['entry-id'] 92 | 93 | if 'read' in request.form: 94 | match request.form['read']: 95 | case 'true': 96 | reader.set_entry_read(entry, True) 97 | case 'false': 98 | reader.set_entry_read(entry, False) 99 | case _: 100 | abort(422) 101 | 102 | if 'important' in request.form: 103 | match request.form['important']: 104 | case 'true': 105 | reader.set_entry_important(entry, True) 106 | case 'false': 107 | reader.set_entry_important(entry, False) 108 | case 'none': 109 | reader.set_entry_important(entry, None) 110 | case _: 111 | abort(422) 112 | 113 | if request.headers.get('hx-request') == 'true': 114 | return render_block( 115 | 'v2/entries.html', 116 | 'entry_form', 117 | entry=reader.get_entry(entry), 118 | next=request.form['next'], 119 | # equivalent to {% import "v2/macros.html" as macros %} 120 | macros=current_app.jinja_env.get_template('v2/macros.html').module, 121 | ) 122 | 123 | return redirect(request.form['next'], code=303) 124 | -------------------------------------------------------------------------------- /src/reader/_app/v2/static/style.css: -------------------------------------------------------------------------------- 1 | 2 | .navbar { 3 | --bs-navbar-padding-y: 0.25rem; 4 | --bs-navbar-toggler-padding-y: 0.25rem; 5 | --bs-navbar-toggler-padding-x: 0.25rem; 6 | --bs-navbar-toggler-font-size: 1rem; 7 | --bs-navbar-toggler-border-color: rgba(0, 0, 0, 0); 8 | } 9 | 10 | .nav.controls { 11 | --bs-nav-link-padding-x: 0; 12 | --bs-nav-link-padding-y: 0; 13 | gap: 1rem; 14 | } 15 | .nav.controls .nav-link.active { 16 | color: var(--bs-navbar-active-color); 17 | } 18 | 19 | .htmx-indicator { 20 | display: none; 21 | } 22 | .htmx-request .htmx-indicator, .htmx-request.htmx-indicator { 23 | display: inline-block; 24 | } 25 | .htmx-request .label, .htmx-request.label { 26 | display: none; 27 | } 28 | -------------------------------------------------------------------------------- /src/reader/_app/v2/static/theme.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Color mode toggler for Bootstrap's docs (https://getbootstrap.com/) 3 | * Copyright 2011-2024 The Bootstrap Authors 4 | * Licensed under the Creative Commons Attribution 3.0 Unported License. 5 | */ 6 | 7 | /* 8 | * Modified to use the Bootstrap Icons font, instead of SVG sprites. 9 | */ 10 | 11 | (() => { 12 | 'use strict' 13 | 14 | const getStoredTheme = () => localStorage.getItem('theme') 15 | const setStoredTheme = theme => localStorage.setItem('theme', theme) 16 | 17 | const getPreferredTheme = () => { 18 | const storedTheme = getStoredTheme() 19 | if (storedTheme) { 20 | return storedTheme 21 | } 22 | 23 | return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light' 24 | } 25 | 26 | const setTheme = theme => { 27 | if (theme === 'auto') { 28 | document.documentElement.setAttribute('data-bs-theme', (window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light')) 29 | } else { 30 | document.documentElement.setAttribute('data-bs-theme', theme) 31 | } 32 | } 33 | 34 | const getIconCls = btn => { 35 | return btn.querySelector('.bi').classList.values().find(x => x.startsWith('bi-')) 36 | } 37 | 38 | setTheme(getPreferredTheme()) 39 | 40 | const showActiveTheme = (theme, focus = false) => { 41 | const themeSwitcher = document.querySelector('#theme') 42 | 43 | if (!themeSwitcher) { 44 | return 45 | } 46 | 47 | const themeSwitcherText = document.querySelector('#theme-text') 48 | const activeThemeIcon = document.querySelector('.theme-icon-active') 49 | const btnToActive = document.querySelector(`[data-bs-theme-value="${theme}"]`) 50 | const clsOfActiveBtn = btnToActive.querySelector('.bi').classList.values().find(x => x.startsWith('bi-')) 51 | 52 | document.querySelectorAll('[data-bs-theme-value]').forEach(element => { 53 | element.classList.remove('active') 54 | element.setAttribute('aria-pressed', 'false') 55 | }) 56 | 57 | btnToActive.classList.add('active') 58 | btnToActive.setAttribute('aria-pressed', 'true') 59 | activeThemeIcon.classList.remove( 60 | activeThemeIcon.classList.values().find(x => x.startsWith('bi-')) 61 | ) 62 | activeThemeIcon.classList.add(clsOfActiveBtn) 63 | const themeSwitcherLabel = `${themeSwitcherText.textContent} (${btnToActive.dataset.bsThemeValue})` 64 | themeSwitcher.setAttribute('aria-label', themeSwitcherLabel) 65 | 66 | if (focus) { 67 | themeSwitcher.focus() 68 | } 69 | } 70 | 71 | window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', () => { 72 | const storedTheme = getStoredTheme() 73 | if (storedTheme !== 'light' && storedTheme !== 'dark') { 74 | setTheme(getPreferredTheme()) 75 | } 76 | }) 77 | 78 | window.addEventListener('DOMContentLoaded', () => { 79 | showActiveTheme(getPreferredTheme()) 80 | 81 | document.querySelectorAll('[data-bs-theme-value]') 82 | .forEach(toggle => { 83 | toggle.addEventListener('click', () => { 84 | const theme = toggle.getAttribute('data-bs-theme-value') 85 | setStoredTheme(theme) 86 | setTheme(theme) 87 | showActiveTheme(theme, true) 88 | }) 89 | }) 90 | }) 91 | })() 92 | -------------------------------------------------------------------------------- /src/reader/_app/v2/templates/v2/macros.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {%- macro input(field, icon=none, class=none) %} 4 |
5 |
6 | {% if icon -%} 7 | 8 | {%- endif %} 9 | {{ field( 10 | class="form-control" + (' is-invalid' if field.errors else ''), 11 | placeholder=field.label.text) }} 12 | {%- if field.errors %} 13 |
14 | {%- for error in field.errors %} 15 | {{ error }} 16 | {%- endfor %} 17 |
18 | {%- endif %} 19 |
20 |
21 | {%- endmacro %} 22 | 23 | 24 | {%- macro radio(field) %} 25 |
26 | {{ field.label.text }} 27 |
28 | {%- for option in field %} 29 |
30 | {{ option(class="form-check-input") }} 31 | {{ option.label(class="form-check-label") }} 32 |
33 | {%- endfor %} 34 |
35 |
36 | {%- endmacro %} 37 | 38 | 39 | {%- macro bs_file_icon(mimetype) -%} 40 | {%- set type = (mimetype or '').partition('/')[0] -%} 41 | {%- if type == 'audio' -%} file-earmark-music 42 | {%- elif type == 'image' -%} file-earmark-image 43 | {%- elif type == 'video' -%} file-earmark-play 44 | {%- elif type == 'text' -%} file-earmark-text 45 | {%- else -%} file-earmark 46 | {%- endif -%} 47 | {%- endmacro -%} 48 | -------------------------------------------------------------------------------- /src/reader/_app/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | To run a local development server: 4 | 5 | FLASK_DEBUG=1 FLASK_TRAP_BAD_REQUEST_ERRORS=1 \ 6 | FLASK_APP=src/reader/_app/wsgi.py \ 7 | READER_CONFIG=examples/config.yaml READER_DB=db.sqlite \ 8 | flask run -h 0.0.0.0 -p 8000 9 | 10 | """ 11 | 12 | import os 13 | 14 | import yaml 15 | 16 | import reader._app 17 | import reader._config 18 | 19 | 20 | # TODO: the other envvars except _CONFIG_ENVVAR are for compatibility only 21 | 22 | if reader._CONFIG_ENVVAR in os.environ: 23 | with open(os.environ[reader._CONFIG_ENVVAR]) as file: 24 | config = reader._config.make_reader_config(yaml.safe_load(file)) 25 | else: 26 | config = reader._config.make_reader_config({}) 27 | 28 | if reader._DB_ENVVAR in os.environ: 29 | config.all['reader']['url'] = os.environ[reader._DB_ENVVAR] 30 | if reader._PLUGIN_ENVVAR in os.environ: 31 | config.all['reader']['plugins'] = dict.fromkeys( 32 | os.environ[reader._PLUGIN_ENVVAR].split() 33 | ) 34 | if reader._APP_PLUGIN_ENVVAR in os.environ: 35 | config.data['app']['plugins'] = dict.fromkeys( 36 | os.environ[reader._APP_PLUGIN_ENVVAR].split() 37 | ) 38 | 39 | app = reader._app.create_app(config) 40 | app.config['TRAP_BAD_REQUEST_ERRORS'] = bool( 41 | os.environ.get('FLASK_TRAP_BAD_REQUEST_ERRORS', '') 42 | ) 43 | -------------------------------------------------------------------------------- /src/reader/_hash_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate stable hashes for Python data objects. 3 | Contains no business logic. 4 | 5 | The hashes should be stable across interpreter implementations and versions. 6 | 7 | Supports dataclass instances, datetimes, and JSON-serializable objects. 8 | 9 | Empty dataclass fields are ignored, to allow adding new fields without 10 | the hash changing. Empty means one of: None, '', (), [], or {}. 11 | 12 | The dataclass type is ignored: two instances of different types 13 | will have the same hash if they have the same attribute/value pairs. 14 | 15 | Design choices explained in https://death.andgravity.com/stable-hashing 16 | 17 | Implemented for https://github.com/lemon24/reader/issues/179 18 | 19 | """ 20 | 21 | from __future__ import annotations 22 | 23 | import dataclasses 24 | import datetime 25 | import hashlib 26 | import json 27 | from collections.abc import Collection 28 | from typing import Any 29 | 30 | 31 | # The first byte of the hash contains its version, 32 | # to allow upgrading the implementation without changing existing hashes. 33 | # (In practice, it's likely we'll just let the hash change and update 34 | # the affected objects again; nevertheless, it's good to have the option.) 35 | # 36 | # A previous version recommended using a check_hash(thing, hash) -> bool 37 | # function instead of direct equality checking; it was removed because 38 | # it did not allow objects to cache the hash. 39 | 40 | _VERSION = 0 41 | _EXCLUDE = '_hash_exclude_' 42 | 43 | 44 | def get_hash(thing: object) -> bytes: 45 | prefix = _VERSION.to_bytes(1, 'big') 46 | digest = hashlib.md5(_json_dumps(thing).encode('utf-8')).digest() 47 | return prefix + digest[:-1] 48 | 49 | 50 | def _json_dumps(thing: object) -> str: 51 | return json.dumps( 52 | thing, 53 | default=_json_default, 54 | # force formatting-related options to known values 55 | ensure_ascii=False, 56 | sort_keys=True, 57 | indent=None, 58 | separators=(',', ':'), 59 | ) 60 | 61 | 62 | def _json_default(thing: object) -> Any: 63 | try: 64 | return _dataclass_dict(thing) 65 | except TypeError: 66 | pass 67 | if isinstance(thing, datetime.datetime): 68 | return thing.isoformat(timespec='microseconds') 69 | raise TypeError(f"Object of type {type(thing).__name__} is not JSON serializable") 70 | 71 | 72 | def _dataclass_dict(thing: object) -> dict[str, Any]: 73 | # we could have used dataclasses.asdict() 74 | # with a dict_factory that drops empty values, 75 | # but asdict() is recursive and we need to intercept and check 76 | # the _hash_exclude_ of nested dataclasses; 77 | # this way, json.dumps() does the recursion instead of asdict() 78 | 79 | # raises TypeError for non-dataclasses 80 | fields = dataclasses.fields(thing) # type: ignore[arg-type] 81 | # ... but doesn't for dataclass *types* 82 | if isinstance(thing, type): 83 | raise TypeError("got type, expected instance") 84 | 85 | exclude = getattr(thing, _EXCLUDE, ()) 86 | 87 | rv = {} 88 | for field in fields: 89 | if field.name in exclude: 90 | continue 91 | 92 | value = getattr(thing, field.name) 93 | if value is None or not value and isinstance(value, Collection): 94 | continue 95 | 96 | rv[field.name] = value 97 | 98 | return rv 99 | -------------------------------------------------------------------------------- /src/reader/_parser/_http_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | HTTP utilities. Contains no business logic. 3 | 4 | """ 5 | 6 | from collections.abc import Iterable 7 | 8 | import werkzeug.http 9 | 10 | 11 | parse_options_header = werkzeug.http.parse_options_header 12 | parse_accept_header = werkzeug.http.parse_accept_header 13 | parse_date = werkzeug.http.parse_date 14 | 15 | 16 | def unparse_accept_header(values: Iterable[tuple[str, float]]) -> str: 17 | return werkzeug.datastructures.MIMEAccept(values).to_header() 18 | -------------------------------------------------------------------------------- /src/reader/_parser/file.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pathlib 4 | from collections.abc import Iterator 5 | from contextlib import contextmanager 6 | from dataclasses import dataclass 7 | from typing import Any 8 | from typing import IO 9 | 10 | from ..exceptions import ParseError 11 | from . import wrap_exceptions 12 | from ._url_utils import extract_path 13 | from ._url_utils import resolve_root 14 | 15 | 16 | @dataclass(frozen=True) 17 | class FileRetriever: 18 | """Bare path and file:// URI parser. 19 | 20 | Allows restricting file-system access to a single directory; 21 | see :func:`~reader.make_reader` for details. 22 | 23 | """ 24 | 25 | feed_root: str 26 | 27 | def __post_init__(self) -> None: 28 | # give feed_root checks a chance to fail early 29 | self._normalize_url('known-good-feed-url') 30 | 31 | @contextmanager 32 | def __call__(self, url: str, *args: Any, **kwargs: Any) -> Iterator[IO[bytes]]: 33 | try: 34 | normalized_url = self._normalize_url(url) 35 | except ValueError as e: 36 | raise ParseError(url, message=str(e)) from None 37 | 38 | with wrap_exceptions(url, "while reading feed"): 39 | with open(normalized_url, 'rb') as file: 40 | yield file 41 | 42 | def validate_url(self, url: str) -> None: 43 | self._normalize_url(url) 44 | 45 | def _normalize_url(self, url: str) -> str: 46 | path = extract_path(url) 47 | if self.feed_root: 48 | path = resolve_root(self.feed_root, path) 49 | if pathlib.PurePath(path).is_reserved(): 50 | raise ValueError("path must not be reserved") 51 | return path 52 | -------------------------------------------------------------------------------- /src/reader/_plugins/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Plug-in infrastructure. Not stable. 3 | 4 | Also package containing **unstable** plugins shipped with reader. 5 | 6 | Note that while the plugin entry points (import names) are relatively stable, 7 | the contents of the actual plugins is not. 8 | 9 | """ 10 | 11 | import functools 12 | from contextlib import contextmanager 13 | from pkgutil import resolve_name 14 | 15 | 16 | class LoaderError(Exception): 17 | pass 18 | 19 | 20 | def raise_exception(message, cause): 21 | raise LoaderError(message) from cause 22 | 23 | 24 | class Loader: 25 | """Plugin loader. 26 | 27 | Allows customizing plugin import/initialization failure behavior. 28 | 29 | The load(name, wrap=True) allows any plugin initialization errors 30 | to raise a single exception type, 31 | since make_reader(plugins=...) just lets the exception propagate. 32 | 33 | """ 34 | 35 | def load(self, name, *, wrap=False): 36 | try: 37 | plugin = resolve_name(name) 38 | except (ImportError, AttributeError, ValueError) as e: 39 | self.handle_import_error(f"could not import plugin {name}", e) 40 | return None 41 | 42 | if wrap: 43 | plugin = self._wrap_init(name)(plugin) 44 | 45 | return plugin 46 | 47 | @contextmanager 48 | def _wrap_init(self, name): 49 | try: 50 | yield 51 | except Exception as e: 52 | self.handle_init_error(f"while initializing plugin {name}", e) 53 | 54 | def init(self, target, names): 55 | for name in names: 56 | plugin = self.load(name) 57 | 58 | if not plugin: 59 | continue 60 | 61 | with self._wrap_init(name): 62 | plugin(target) 63 | 64 | handle_import_error = staticmethod(raise_exception) 65 | handle_init_error = staticmethod(raise_exception) 66 | -------------------------------------------------------------------------------- /src/reader/_plugins/share.py: -------------------------------------------------------------------------------- 1 | """ 2 | share 3 | ~~~~~ 4 | 5 | Add social sharing links at the end of the entry page. 6 | 7 | To load:: 8 | 9 | READER_APP_PLUGIN='reader._plugins.share:init' \\ 10 | python -m reader serve 11 | 12 | """ 13 | 14 | from urllib.parse import quote 15 | from urllib.parse import urlparse 16 | 17 | 18 | TEMPLATES = { 19 | 'Twitter': "https://twitter.com/share?text={title}&url={url}", 20 | 'HN': "https://news.ycombinator.com/submitlink?u={url}&t={title}", 21 | 'Reddit': "https://www.reddit.com/submit?url={url}&title={title}", 22 | } 23 | 24 | 25 | def percent_encode(s, encoding="ascii"): 26 | return ''.join([f'%{b:0>2x}' for b in s.encode(encoding)]) 27 | 28 | 29 | def share(entry): 30 | if not entry.link: 31 | return 32 | link = quote(entry.link) 33 | title = quote(entry.title or '') 34 | 35 | for name, template in TEMPLATES.items(): 36 | url = template.format(url=link, title=title) 37 | 38 | # prevent ad blockers from messing with these 39 | url = urlparse(url) 40 | url = url._replace( 41 | netloc=percent_encode(url.netloc), 42 | path='/'.join( 43 | percent_encode(c) if 'share' in c.lower() else c 44 | for c in url.path.split('/') 45 | ), 46 | ) 47 | url = url.geturl() 48 | 49 | yield name, url 50 | 51 | 52 | def init(app): 53 | app.reader_additional_links.append(share) 54 | -------------------------------------------------------------------------------- /src/reader/_plugins/sqlite_releases.py: -------------------------------------------------------------------------------- 1 | """ 2 | sqlite_releases 3 | ~~~~~~~~~~~~~~~ 4 | 5 | Create a feed out of the SQLite release history pages at: 6 | 7 | * https://www.sqlite.org/changes.html 8 | * https://www.sqlite.org/chronology.html 9 | 10 | Also serves as an example of how to write custom parsers. 11 | 12 | This plugin needs additional dependencies, use the ``unstable-plugins`` extra 13 | to install them: 14 | 15 | .. code-block:: bash 16 | 17 | pip install reader[unstable-plugins] 18 | 19 | To load:: 20 | 21 | READER_PLUGIN='reader._plugins.sqlite_releases:init' \\ 22 | python -m reader ... 23 | 24 | """ 25 | 26 | import warnings 27 | from datetime import datetime 28 | from datetime import timezone 29 | from urllib.parse import urlparse 30 | from urllib.parse import urlunparse 31 | 32 | import bs4 33 | 34 | from reader._parser import wrap_exceptions 35 | from reader._types import EntryData 36 | from reader._types import FeedData 37 | 38 | 39 | warnings.filterwarnings( 40 | 'ignore', 41 | message='No parser was explicitly specified', 42 | module='reader._plugins.sqlite_releases', 43 | ) 44 | 45 | 46 | FULL_URL = 'https://www.sqlite.org/changes.html' 47 | URLS = [FULL_URL, 'https://www.sqlite.org/chronology.html'] 48 | 49 | 50 | def extract_text(soup): 51 | for h3 in soup.select('body h3'): 52 | a_name = None 53 | for element, _ in zip(h3.previous_siblings, range(3), strict=False): 54 | if element.name == 'h3': 55 | break 56 | if element.name == 'a' and 'name' in element.attrs: 57 | a_name = element 58 | break 59 | 60 | content = [] 61 | last_a_name_index = None 62 | for i, element in enumerate(h3.next_siblings): 63 | if element.name == 'h3': 64 | break 65 | if element.name == 'a' and 'name' in element.attrs: 66 | last_a_name_index = i 67 | content.append(element) 68 | if last_a_name_index and len(content) - last_a_name_index <= 3: 69 | content = content[:last_a_name_index] 70 | 71 | yield h3.text, a_name['name'] if a_name else None, ''.join(map(str, content)) 72 | 73 | 74 | def make_entries(feed_url, url, soup): 75 | for title, fragment, content in extract_text(soup): 76 | try: 77 | updated = datetime.strptime(title.split()[0], '%Y-%m-%d').replace( 78 | tzinfo=timezone.utc 79 | ) 80 | except (ValueError, IndexError): 81 | continue 82 | 83 | link = urlunparse(urlparse(url)._replace(fragment=fragment)) 84 | 85 | yield EntryData( 86 | feed_url=feed_url, 87 | id=title, 88 | updated=updated, 89 | title=title, 90 | link=link, 91 | summary=content, 92 | ) 93 | 94 | 95 | def make_feed(feed_url, url, soup): 96 | return FeedData(url=feed_url, title=soup.title and soup.title.text, link=url) 97 | 98 | 99 | def parse(url, file, headers): 100 | with wrap_exceptions(url, "while reading feed"): 101 | soup = bs4.BeautifulSoup(file) 102 | with wrap_exceptions(url, "while parsing page"): 103 | feed = make_feed(url, FULL_URL, soup) 104 | entries = list(make_entries(url, FULL_URL, soup)) 105 | feed = feed._replace(updated=max(e.updated for e in entries)) 106 | return feed, entries 107 | 108 | 109 | def init(reader): 110 | for url in URLS: 111 | reader._parser.mount_parser_by_url(url, parse) 112 | -------------------------------------------------------------------------------- /src/reader/_plugins/templates/preview_feed_list.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% import "macros.html" as macros %} 4 | 5 | 6 | {% macro make_title() %} 7 | Feeds for {{ url }} 8 | {% endmacro %} 9 | 10 | 11 | {% block page_title %}{{ make_title() | striptags }}{% endblock %} 12 | {% block main_title %}{{ make_title() }}{% endblock %} 13 | 14 | 15 | {% block body %} 16 | 17 | 18 | {% if errors %} 19 |
    20 | {% for message in errors %} 21 |
  • error: {{ message }} 22 | {% endfor %} 23 |
24 | 25 | 26 | {# style similar to macros.entry_enclosures #} 27 | 28 | {% elif alternates %} 29 | 42 | {% else %} 43 |

no feeds for this page 44 | 45 | {% endif %} 46 | 47 | {% endblock %} 48 | -------------------------------------------------------------------------------- /src/reader/_storage/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any 4 | 5 | from .._types import ChangeTrackerType 6 | from .._types import SearchType 7 | from ._base import StorageBase 8 | from ._changes import Changes 9 | from ._entries import EntriesMixin 10 | from ._feeds import FeedsMixin 11 | from ._tags import TagsMixin 12 | 13 | 14 | # Row value support was added in 3.15. 15 | # pragma_*() tabled-valued functions were added in 3.16. 16 | # last_insert_rowid() support for FTS5 was added in 3.18. 17 | MINIMUM_SQLITE_VERSION = (3, 18) 18 | 19 | # Both storage and search use the JSON1 extension. 20 | REQUIRED_SQLITE_FUNCTIONS = ['json'] 21 | 22 | 23 | class Storage(FeedsMixin, EntriesMixin, TagsMixin, StorageBase): 24 | """Data access object used for all storage (except search). 25 | 26 | This class is split into per-domain mixins, add new methods accordingly. 27 | 28 | Add a test_storage.py::test_errors_locked test for each new public method. 29 | 30 | """ 31 | 32 | def __init__(self, path: str, timeout: float | None = None): 33 | super().__init__(path, timeout) 34 | self.changes: ChangeTrackerType = Changes(self) 35 | 36 | def make_search(self) -> SearchType: 37 | from ._search import Search 38 | 39 | return Search(self) 40 | -------------------------------------------------------------------------------- /src/reader/_storage/_base.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | import os 5 | import sqlite3 6 | import sys 7 | from collections.abc import Callable 8 | from collections.abc import Iterable 9 | from functools import partial 10 | from typing import Any 11 | from typing import TypeVar 12 | 13 | from ..exceptions import StorageError 14 | from . import _sqlite_utils 15 | from ._sql_utils import paginated_query 16 | from ._sql_utils import Query 17 | 18 | 19 | APPLICATION_ID = b'read' 20 | 21 | _T = TypeVar('_T') 22 | 23 | 24 | # also used by tests 25 | CONNECTION_CLS = sqlite3.Connection 26 | 27 | debug = os.environ.get('READER_DEBUG_STORAGE', '') 28 | assert set(debug) <= {'m', 't', 'T', 'i'}, f"invalid READER_DEBUG_STORAGE={debug}" 29 | 30 | if debug: # pragma: no cover 31 | 32 | class CONNECTION_CLS(_sqlite_utils.DebugConnection): # type: ignore # noqa: F811 33 | _set_trace = 't' or 'T' in debug 34 | _io_counters = 'i' in debug 35 | _pid = os.getpid() 36 | 37 | def _log_method(self, data): # type: ignore 38 | data['pid'] = self._pid 39 | stmt = None 40 | if 'T' in debug: 41 | stmt = data.pop('stmt', None) 42 | print('STORAGE_DEBUG', json.dumps(data), file=sys.stderr) 43 | if stmt: 44 | print(stmt, file=sys.stderr) 45 | 46 | 47 | wrap_exceptions = partial(_sqlite_utils.wrap_exceptions, StorageError) 48 | 49 | 50 | class StorageBase: 51 | # Private API, used by tests. 52 | chunk_size = 2**8 53 | 54 | @wrap_exceptions(message="while opening database") 55 | def __init__(self, path: str, timeout: float | None = None): 56 | kwargs: dict[str, Any] = {'factory': CONNECTION_CLS} 57 | if timeout is not None: 58 | kwargs['timeout'] = timeout 59 | 60 | # at least the "PRAGMA foreign_keys = ON" part of setup_db 61 | # has to run for every connection (in every thread), 62 | # since it's not persisted across connections 63 | self.factory = _sqlite_utils.LocalConnectionFactory( 64 | path, self.setup_db, **kwargs 65 | ) 66 | 67 | def get_db(self) -> sqlite3.Connection: 68 | return self.factory() 69 | 70 | @staticmethod 71 | def setup_db(db: sqlite3.Connection) -> None: 72 | # Private API, used by tests. 73 | 74 | from . import MINIMUM_SQLITE_VERSION 75 | from . import REQUIRED_SQLITE_FUNCTIONS 76 | from ._schema import MIGRATION 77 | 78 | return _sqlite_utils.setup_db( 79 | db, 80 | migration=MIGRATION, 81 | id=APPLICATION_ID, 82 | minimum_sqlite_version=MINIMUM_SQLITE_VERSION, 83 | required_sqlite_functions=REQUIRED_SQLITE_FUNCTIONS, 84 | ) 85 | 86 | @wrap_exceptions() 87 | def __enter__(self) -> None: 88 | self.factory.__enter__() 89 | 90 | @wrap_exceptions() 91 | def __exit__(self, *_: Any) -> None: 92 | self.factory.__exit__() 93 | 94 | @wrap_exceptions() 95 | def close(self) -> None: 96 | self.factory.close() 97 | 98 | def paginated_query( 99 | self, 100 | make_query: Callable[[], tuple[Query, dict[str, Any]]], 101 | limit: int | None = None, 102 | last: tuple[Any, ...] | None = None, 103 | row_factory: Callable[[tuple[Any, ...]], _T] | None = None, 104 | ) -> Iterable[_T]: 105 | with wrap_exceptions(): 106 | yield from paginated_query( 107 | self.get_db(), 108 | make_query, 109 | self.chunk_size, 110 | limit or 0, 111 | last, 112 | row_factory, 113 | ) 114 | -------------------------------------------------------------------------------- /src/reader/_storage/_html_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | HTML utilities. Contains no business logic. 3 | 4 | """ 5 | 6 | from __future__ import annotations 7 | 8 | import warnings 9 | from typing import TYPE_CHECKING 10 | 11 | 12 | if TYPE_CHECKING: # pragma: no cover 13 | import bs4 14 | 15 | 16 | # BeautifulSoup warns if not giving it a parser explicitly; full text: 17 | # 18 | # No parser was explicitly specified, so I'm using the best available 19 | # HTML parser for this system ("..."). This usually isn't a problem, 20 | # but if you run this code on another system, or in a different virtual 21 | # environment, it may use a different parser and behave differently. 22 | # 23 | # We are ok with any parser, and with how BeautifulSoup picks the best one if 24 | # available. Explicitly using generic features (e.g. `('html', 'fast')`, 25 | # the default) instead of a specific parser still warns. 26 | # 27 | # Currently there's no way to allow users to pick a parser, and we don't want 28 | # to force a specific parser, so there's no point in warning. 29 | # 30 | # When changing this, also change the equivalent pytest.filterwarnings config. 31 | # 32 | # TODO: Expose BeautifulSoup(features=...) when we have a config system. 33 | # 34 | warnings.filterwarnings( 35 | 'ignore', 36 | message='No parser was explicitly specified', 37 | module='reader._storage._html_utils', 38 | ) 39 | 40 | 41 | def strip_html(html: str, features: str | None = None) -> str: 42 | soup = get_soup(html) 43 | remove_nontext_elements(soup) 44 | return soup.get_text(separator=' ') 45 | 46 | 47 | def get_soup(html: str, features: str | None = None) -> bs4.BeautifulSoup: 48 | # lazy import (https://github.com/lemon24/reader/issues/297) 49 | import bs4 50 | 51 | return bs4.BeautifulSoup(html, features=features) 52 | 53 | 54 | def remove_nontext_elements(soup: bs4.BeautifulSoup) -> None: 55 | # content', type='text/plain', language=None 25 | ), 26 | Content(value='content', type='text/html', language=None), 27 | ), 28 | enclosures=( 29 | # the text/html type comes from feedparser 30 | Enclosure(href=f'{rel_base}enclosure?q=a#fragment', type='text/html'), 31 | ), 32 | ) 33 | ] 34 | -------------------------------------------------------------------------------- /tests/data/relative.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | file.html 5 | 6 | 7 | blog/post/1 8 | 7bd204c6-1655-4c27-aeee-53f933c5395f 9 | 10 | 11 | one <a href="target">two</a> three 12 | 13 | <script>evil</script> content 14 | <script>evil</script> content 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /tests/data/relative.rss.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from reader import Content 4 | from reader import Enclosure 5 | from reader._types import EntryData 6 | from reader._types import FeedData 7 | 8 | 9 | feed = FeedData( 10 | url=f'{url_base}relative.rss', 11 | link=f'{rel_base}file.html', 12 | version='rss20', 13 | ) 14 | 15 | entries = [ 16 | EntryData( 17 | feed_url=feed.url, 18 | id=f'{rel_base}7bd204c6-1655-4c27-aeee-53f933c5395f', 19 | updated=None, 20 | link=f'{rel_base}blog/post/1', 21 | summary=f'one two three', 22 | content=( 23 | Content( 24 | value=' content', type='text/plain', language=None 25 | ), 26 | Content(value='content', type='text/html', language=None), 27 | ), 28 | enclosures=( 29 | # for RSS feedparser doesn't make relative links absolute 30 | # (it does for Atom) 31 | Enclosure(href='enclosure?q=a#fragment'), 32 | ), 33 | ) 34 | ] 35 | -------------------------------------------------------------------------------- /tests/data/sqlite_releases.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Release History Of SQLite 5 | 6 | 7 | 8 | 9 |

Release History

10 | 11 |

12 | This page provides a high-level summary of changes to SQLite. 13 |

14 | 15 | 16 |

2021-01-20 (3.34.1)

17 | Fix a potential use-after-free bug. 18 | 19 | 20 |

2020-12-01 (3.34.0)

21 | Added the sqlite3_txn_state() interface. 22 |

23 | 24 |

2000-05-30

25 | Added the LIKE operator. 26 |

2000-05-29

27 | Initial Public Release of Alpha code 28 | -------------------------------------------------------------------------------- /tests/data/unknown.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "https://jsonfeed.org/version/1234", 3 | "items": [] 4 | } 5 | -------------------------------------------------------------------------------- /tests/data/unknown.json.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from reader import Content 4 | from reader import Enclosure 5 | from reader._types import EntryData 6 | from reader._types import FeedData 7 | 8 | 9 | feed = FeedData( 10 | url=f'{url_base}unknown.json', 11 | version='json', 12 | ) 13 | 14 | entries = [] 15 | -------------------------------------------------------------------------------- /tests/reader_test_plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/tests/reader_test_plugins/__init__.py -------------------------------------------------------------------------------- /tests/reader_test_plugins/good.py: -------------------------------------------------------------------------------- 1 | def init_reader(reader): 2 | pass 3 | -------------------------------------------------------------------------------- /tests/reader_test_plugins/init_error.py: -------------------------------------------------------------------------------- 1 | def init_reader(reader): 2 | raise ValueError('someerror') 3 | -------------------------------------------------------------------------------- /tests/reader_test_plugins/missing_dependency.py: -------------------------------------------------------------------------------- 1 | import some_module_that_doesnt_exist_random_number_to_avoid_collisions_4 2 | -------------------------------------------------------------------------------- /tests/reader_test_plugins/missing_entry_point.py: -------------------------------------------------------------------------------- 1 | # no init_reader() here 2 | -------------------------------------------------------------------------------- /tests/test__utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from reader._utils import deprecated 4 | from reader._utils import deprecated_wrapper 5 | 6 | 7 | # Normally, the stuff in _utils is tested by tests for higher level code, 8 | # but some of the things aren't always used. 9 | 10 | 11 | def test_deprecated_wrapper(): 12 | def new(arg): 13 | raise ValueError(arg) 14 | 15 | old = deprecated_wrapper('old', new, '1.0', '2.0') 16 | 17 | _check_deprecated(old) 18 | 19 | 20 | def test_deprecated(): 21 | @deprecated('new', '1.0', '2.0') 22 | def old(arg): 23 | "docstring" 24 | raise ValueError(arg) 25 | 26 | assert '\n\ndocstring\n\n' in old.__doc__ 27 | 28 | _check_deprecated(old) 29 | 30 | 31 | def test_deprecated_property(): 32 | class Class: 33 | @property 34 | @deprecated('new', '1.0', '2.0', property=True) 35 | def old(self): 36 | "docstring" 37 | raise ValueError() 38 | 39 | with pytest.raises(ValueError), pytest.deprecated_call() as warnings: 40 | Class().old 41 | 42 | assert Class.old.fget.__name__ == 'old' 43 | assert Class.old.fget.__doc__ == ( 44 | 'Deprecated variant of :attr:`new`.\n\n' 45 | 'docstring\n' 46 | '\n' 47 | '.. deprecated:: 1.0\n' 48 | ' This property will be removed in *reader* 2.0.\n' 49 | ' Use :attr:`new` instead.\n\n' 50 | ) 51 | 52 | warning = warnings.pop() 53 | 54 | assert ( 55 | str(warning.message) 56 | == 'old is deprecated and will be removed in reader 2.0. Use new instead.' 57 | ) 58 | 59 | 60 | def _check_deprecated(old): 61 | with pytest.raises(ValueError) as excinfo, pytest.deprecated_call() as warnings: 62 | old('whatever') 63 | 64 | assert excinfo.value.args[0] == 'whatever' 65 | 66 | assert old.__name__ == 'old' 67 | assert old.__doc__.startswith('Deprecated alias for :meth:`new`.\n\n') 68 | assert old.__doc__.endswith( 69 | '\n' 70 | '.. deprecated:: 1.0\n' 71 | ' This method will be removed in *reader* 2.0.\n' 72 | ' Use :meth:`new` instead.\n\n' 73 | ) 74 | 75 | assert len(warnings.list) == 1 76 | warning = warnings.pop() 77 | 78 | assert warning.category is DeprecationWarning 79 | assert ( 80 | str(warning.message) 81 | == 'old() is deprecated and will be removed in reader 2.0. Use new() instead.' 82 | ) 83 | 84 | 85 | def test_better_str_partial(): 86 | from reader._utils import BetterStrPartial as partial 87 | 88 | def fn(): 89 | pass 90 | 91 | assert str(partial(fn, 1, two=2)) == "fn(1, two=2)" 92 | 93 | fn.__name__ = '' 94 | assert str(partial(fn, 1)) == "(1)" 95 | 96 | class Cls: 97 | def meth(self): 98 | pass 99 | 100 | assert str(partial(Cls.meth, two=2)) == 'meth(two=2)' 101 | assert str(partial(Cls().meth, two=2)) == 'meth(two=2)' 102 | -------------------------------------------------------------------------------- /tests/test_app_wsgi.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def dummy_plugin(reader): 5 | reader._dummy_was_here = True 6 | 7 | 8 | def test_app_wsgi(monkeypatch, db_path): 9 | # This assumes no-one else imports reader._app.wsgi.app. 10 | # Also, further imports will yield the same app from this test. 11 | monkeypatch.setitem(os.environ, 'READER_DB', db_path) 12 | monkeypatch.setitem(os.environ, 'READER_PLUGIN', 'test_app_wsgi:dummy_plugin') 13 | monkeypatch.setitem(os.environ, 'READER_APP_PLUGIN', 'test_app_wsgi:dummy_plugin') 14 | 15 | from reader._app import get_reader 16 | from reader._app.wsgi import app 17 | 18 | with app.app_context(): 19 | assert get_reader()._dummy_was_here 20 | 21 | assert app._dummy_was_here 22 | -------------------------------------------------------------------------------- /tests/test_bench.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import sys 3 | 4 | import pytest 5 | from click.testing import CliRunner 6 | 7 | from reader import make_reader 8 | from test_cli import patch_app_dir 9 | from test_reader_filter import setup_reader_for_tags 10 | 11 | 12 | root_dir = os.path.dirname(__file__) 13 | sys.path.insert(0, os.path.join(root_dir, '../scripts')) 14 | import bench 15 | from bench import cli 16 | 17 | 18 | pytestmark = pytest.mark.slow 19 | pytest.importorskip("numpy") 20 | 21 | 22 | @pytest.fixture(scope='module') 23 | def db_path(tmp_path_factory): 24 | dir = tmp_path_factory.mktemp("data") 25 | db_path = str(dir.joinpath('db.sqlite')) 26 | with make_reader(db_path) as reader: 27 | setup_reader_for_tags(reader) 28 | return db_path 29 | 30 | 31 | @pytest.mark.parametrize('command', [['time', '-n1'], ['profile']]) 32 | def test_commands_work(command, db_path): 33 | runner = CliRunner() 34 | result = runner.invoke( 35 | cli, command + ['--db', db_path] + ['get_entries_all', 'show'] 36 | ) 37 | assert result.exit_code == 0, result.exception 38 | 39 | 40 | def test_list(): 41 | runner = CliRunner() 42 | result = runner.invoke(cli, ['list']) 43 | assert 'get_entries_all' in result.output.splitlines() 44 | assert 'show' in result.output.split() 45 | -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from reader._config import Config 4 | 5 | 6 | CONFIG_INIT_DATA = [ 7 | (Config({}), {'default': {}}), 8 | (Config({}, sections={'cli', 'app'}), {'default': {}, 'cli': {}, 'app': {}}), 9 | ( 10 | Config({'reader': {'k': 'v'}}, sections={'cli', 'app'}), 11 | {'default': {'reader': {'k': 'v'}}, 'cli': {}, 'app': {}}, 12 | ), 13 | ( 14 | Config({'default': {'reader': {'k': 'v'}}}, sections={'cli', 'app'}), 15 | {'default': {'reader': {'k': 'v'}}, 'cli': {}, 'app': {}}, 16 | ), 17 | ] 18 | 19 | 20 | @pytest.mark.parametrize('config, data', CONFIG_INIT_DATA) 21 | def test_config_init(config, data): 22 | assert config.data == data 23 | 24 | 25 | def test_config_init_error(): 26 | with pytest.raises(ValueError): 27 | Config({'default': {'reader': {}}, 'reader': {}}) 28 | 29 | 30 | def test_config_merged(): 31 | config = Config( 32 | { 33 | 'url': 'default-url', 34 | 'plugins': {'default-plugin': None, 'another-plugin': 1}, 35 | 'cli': {'url': 'cli-url'}, 36 | 'app': {'plugins': {'app-plugin': None, 'another-plugin': 2}}, 37 | }, 38 | sections={'cli', 'app'}, 39 | merge_keys={ 40 | 'plugins', 41 | }, 42 | ) 43 | 44 | assert config.merged('cli') == { 45 | 'url': 'cli-url', 46 | 'plugins': {'default-plugin': None, 'another-plugin': 1}, 47 | } 48 | 49 | assert config.merged('app') == { 50 | 'url': 'default-url', 51 | 'plugins': {'default-plugin': None, 'another-plugin': 2, 'app-plugin': None}, 52 | } 53 | 54 | 55 | def test_config_merged_recursive(): 56 | config = Config( 57 | { 58 | 'reader': {'plugins': {'default-reader-plugin': None}}, 59 | 'plugins': {'default-plugin': None}, 60 | 'app': { 61 | 'reader': {'plugins': {'app-reader-plugin': None}}, 62 | 'plugins': {'app-plugin': None}, 63 | }, 64 | }, 65 | sections={ 66 | 'app', 67 | }, 68 | merge_keys={'reader', 'plugins'}, 69 | ) 70 | assert config.merged('app') == { 71 | 'reader': { 72 | 'plugins': {'default-reader-plugin': None, 'app-reader-plugin': None} 73 | }, 74 | 'plugins': {'default-plugin': None, 'app-plugin': None}, 75 | } 76 | 77 | 78 | def test_config_all(): 79 | config = Config( 80 | { 81 | 'url': 'default-url', 82 | 'nested': {'default-key': 'default-nested'}, 83 | 'cli': { 84 | 'url': 'cli-url', 85 | 'nested': {'cli-key': 'cli-nested'}, 86 | }, 87 | }, 88 | sections={'cli', 'app'}, 89 | merge_keys={ 90 | 'nested', 91 | }, 92 | ) 93 | 94 | config.all['url'] = 'new-url' 95 | assert config.data == { 96 | 'default': { 97 | 'url': 'new-url', 98 | 'nested': {'default-key': 'default-nested'}, 99 | }, 100 | 'cli': { 101 | 'url': 'new-url', 102 | 'nested': {'cli-key': 'cli-nested'}, 103 | }, 104 | 'app': { 105 | 'url': 'new-url', 106 | }, 107 | } 108 | 109 | config.all['nested'] = {'new-key': 'new-value'} 110 | assert config.data == dict.fromkeys( 111 | ('default', 'cli', 'app'), 112 | { 113 | 'url': 'new-url', 114 | 'nested': {'new-key': 'new-value'}, 115 | }, 116 | ) 117 | -------------------------------------------------------------------------------- /tests/test_exceptions.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import pytest 4 | 5 | from reader import EntryError 6 | from reader import FeedError 7 | from reader import SingleUpdateHookError 8 | from reader import TagError 9 | from reader import UpdateHookErrorGroup 10 | from reader.exceptions import _FancyExceptionBase 11 | 12 | 13 | def test_fancy_exception_base(): 14 | exc = _FancyExceptionBase('message') 15 | assert str(exc) == 'message' 16 | 17 | exc = _FancyExceptionBase(message='message') 18 | assert str(exc) == 'message' 19 | 20 | cause = Exception('cause') 21 | 22 | exc = _FancyExceptionBase('message') 23 | exc.__cause__ = cause 24 | pickled_exc = pickle.dumps(exc) 25 | assert str(exc) == 'message: builtins.Exception: cause' 26 | assert str(exc) == str(pickle.loads(pickled_exc)) 27 | 28 | class WithURL(_FancyExceptionBase): 29 | _default_message = 'default message' 30 | 31 | def __init__(self, url, **kwargs): 32 | super().__init__(**kwargs) 33 | self.url = url 34 | 35 | @property 36 | def _str(self): 37 | return self.url.upper() 38 | 39 | exc = WithURL('url') 40 | assert str(exc) == 'default message: URL' 41 | 42 | exc = WithURL('url', message='another message') 43 | exc.__cause__ = cause 44 | assert str(exc) == 'another message: URL: builtins.Exception: cause' 45 | 46 | 47 | def _all_classes(cls): 48 | yield cls 49 | for subclass in cls.__subclasses__(): 50 | yield from _all_classes(subclass) 51 | 52 | 53 | def all_classes(*args, **kwargs): 54 | return list(_all_classes(*args, **kwargs)) 55 | 56 | 57 | @pytest.mark.parametrize('exc_type', all_classes(FeedError)) 58 | def test_feed_error_str(exc_type): 59 | exc = exc_type('url') 60 | assert repr('url') in str(exc) 61 | 62 | 63 | @pytest.mark.parametrize('exc_type', all_classes(EntryError)) 64 | def test_entry_error_str(exc_type): 65 | exc = exc_type('url', 'id') 66 | assert repr(('url', 'id')) in str(exc) 67 | 68 | 69 | @pytest.mark.parametrize('exc_type', all_classes(TagError)) 70 | def test_tag_error_str(exc_type): 71 | exc = exc_type(('object',), 'key') 72 | assert "'object': 'key'" in str(exc) 73 | 74 | 75 | @pytest.mark.parametrize( 76 | 'args, expected', 77 | [ 78 | ( 79 | ('before_feeds_update', 'myhook'), 80 | "unexpected hook error: before_feeds_update: 'myhook'", 81 | ), 82 | ( 83 | ('before_feeds_update', 'myhook', ()), 84 | "unexpected hook error: before_feeds_update: 'myhook': ()", 85 | ), 86 | ( 87 | ('before_feed_update', 'myhook', ('feed',)), 88 | "unexpected hook error: before_feed_update: 'myhook': 'feed'", 89 | ), 90 | ( 91 | ('after_entry_update', 'myhook', ('feed', 'entry')), 92 | "unexpected hook error: after_entry_update: 'myhook': ('feed', 'entry')", 93 | ), 94 | ], 95 | ) 96 | def test_single_update_hook_error_str(args, expected): 97 | exc = SingleUpdateHookError(*args) 98 | assert str(exc) == expected 99 | exc = SingleUpdateHookError(*args) 100 | exc.__cause__ = Exception('cause') 101 | assert str(exc) == expected + ": builtins.Exception: cause" 102 | 103 | 104 | def test_update_hook_error_group(): 105 | one = SingleUpdateHookError('before_feeds_update', 'one') 106 | two = SingleUpdateHookError('before_feeds_update', 'two') 107 | 108 | group = UpdateHookErrorGroup('message', [one]) 109 | assert group.message == 'message' 110 | assert group.exceptions == (one,) 111 | 112 | derived = group.derive([two]) 113 | assert derived.message == 'message' 114 | assert derived.exceptions == (two,) 115 | 116 | with pytest.raises(TypeError): 117 | UpdateHookErrorGroup('message', [Exception()]) 118 | with pytest.raises(TypeError): 119 | group.derive([Exception()]) 120 | -------------------------------------------------------------------------------- /tests/test_hash_utils.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from datetime import datetime 3 | 4 | import pytest 5 | 6 | from reader._hash_utils import get_hash 7 | 8 | 9 | @dataclass 10 | class DataOne: 11 | one: object 12 | two: object = None 13 | 14 | 15 | @dataclass 16 | class DataTwo: 17 | one: object 18 | two: object = None 19 | three: object = None 20 | 21 | 22 | def two_factory(one, value): 23 | return DataTwo(one, three=value) 24 | 25 | 26 | @dataclass 27 | class DataThree: 28 | one: object 29 | two: object = None 30 | _hash_exclude_ = frozenset( 31 | { 32 | 'one', 33 | } 34 | ) 35 | 36 | 37 | @pytest.mark.parametrize('value', ['', [], (), {}, None]) 38 | @pytest.mark.parametrize('factory', [DataOne, DataTwo, two_factory]) 39 | def test_empty(value, factory): 40 | assert get_hash(DataOne(1)) == get_hash(factory(1, value)) 41 | assert get_hash(DataOne(1, factory(2, value))) == get_hash( 42 | DataOne(1, factory(2, value)) 43 | ) 44 | 45 | 46 | @pytest.mark.parametrize( 47 | 'thing, hash', 48 | [ 49 | (None, b'\x007\xa6%\x9c\xc0\xc1\xda\xe2\x99\xa7\x86d\x89\xdf\xf0'), 50 | (True, b'\x00\xb3&\xb5\x06+/\x0ei\x04h\x10qu4\xcb'), 51 | (1, b'\x00\xc4\xcaB8\xa0\xb9#\x82\r\xccP\x9aou\x84'), 52 | ('str', b'\x00v~-y\x12\xeb\xef\xdf\xe1\x84\x95\xedSc_'), 53 | (['list'], b'\x00\xe1y\x01T;\x817\x06\x03\xeb\x03\x07\xf4\xed\xc5'), 54 | (('tuple',), b'\x00\x95\xab\xbex\xc6\xff@\xdd\x02\xd5N\\\\\xbbY'), 55 | ({'key': 'value'}, b"\x00\xa75?|\xdd\xce\x80\x8d\xe0\x03'G\xa0\xb7\xbe"), 56 | (DataOne(1, 2), b'\x00\xbd]\x03\xe5\x0c\xca\xc3\xae\x17\xf1\x84\x01R@c'), 57 | (DataTwo(1, 2), b'\x00\xbd]\x03\xe5\x0c\xca\xc3\xae\x17\xf1\x84\x01R@c'), 58 | (DataOne(1, DataTwo(2)), b'\x00\xc4[\xfcY0\xffJ--\xb6\xd1M\xd7(\x8f'), 59 | ( 60 | DataOne(1, [DataTwo(2), 3, datetime(2021, 1, 2)]), 61 | b'\x00uU\xb7\xf7\x18\xfa\x06\x98h\x82\xeb\xfd\xdc\xbd.', 62 | ), 63 | ( 64 | DataOne(1, {'key': DataTwo(datetime(2021, 1, 2))}), 65 | b'\x00\xc82CV\xed\xff.\x8d\x9e5&\xbc\xd4e/', 66 | ), 67 | ], 68 | ) 69 | def test_hash(thing, hash): 70 | assert get_hash(thing) == hash 71 | 72 | 73 | @pytest.mark.parametrize('thing', [object(), str, {1, 2}, b'ab']) 74 | def test_hash_error(thing): 75 | with pytest.raises(TypeError): 76 | get_hash(DataOne(thing)) 77 | with pytest.raises(TypeError): 78 | get_hash(DataOne) 79 | 80 | 81 | def test_exclude(): 82 | assert get_hash(DataTwo(None, 2)) == get_hash(DataThree(1, 2)) 83 | assert get_hash(DataTwo(1, 2)) != get_hash(DataThree(1, 2)) 84 | assert get_hash(DataOne(DataTwo(None, 2), 'one')) == get_hash( 85 | DataOne(DataThree(1, 2), 'one') 86 | ) 87 | -------------------------------------------------------------------------------- /tests/test_html_utils.py: -------------------------------------------------------------------------------- 1 | import bs4 2 | import pytest 3 | 4 | from reader._storage._html_utils import strip_html 5 | 6 | 7 | STRIP_HTML_DATA = [ 8 | ('', ''), 9 | ('
', ''), 10 | ('aabb', 'aabb'), 11 | ('aa
bb', 'aa\nbb'), 12 | ('aa

bb', 'aa\nbb'), 13 | ('bb', 'bb'), 14 | ('bb', 'bb'), 15 | ('bb', 'bb'), 16 | ('ssbb', 'bb'), 17 | ('aabb', 'aa\nbb'), 18 | ('aabb', 'aa\nbb'), 19 | ('aabb', 'aa\nbb'), 20 | ('aattbb', 'aa\nbb'), 21 | ('bb', 'bb'), 22 | ('bb', 'bb'), 23 | ('bb', 'bb'), 24 | ('ttbb', 'bb'), 25 | ('aabb', 'aa\nbb'), 26 | ('aabb', 'aa\nbb'), 27 | ('aabb', 'aa\nbb'), 28 | ('aattbb', 'aa\nbb'), 29 | ( 30 | """ 31 | 32 | aa 33 | tt 34 |

bb 35 | 36 | cc 37 | 38 | 39 | dd 40 | 41 | ee 42 | """, 43 | 'aa\nbb\ncc\ndd\nee', 44 | ), 45 | ] 46 | 47 | 48 | # We test all bs4 parsers, since we don't know/care what the user has installed. 49 | @pytest.mark.parametrize( 50 | 'features', 51 | [ 52 | None, 53 | pytest.param('lxml', marks=pytest.mark.requires_lxml), 54 | 'html.parser', 55 | 'html5lib', 56 | ], 57 | ) 58 | @pytest.mark.parametrize('input, expected_output', STRIP_HTML_DATA) 59 | def test_strip_html(input, expected_output, features): 60 | output = strip_html(input, features) 61 | if isinstance(output, str): 62 | output = '\n'.join(output.split()) 63 | 64 | # Special-case different

\nAdded the sqlite3_txn_state() interface.\n

', 59 | ), 60 | ( 61 | '2000-05-30', 62 | datetime(2000, 5, 30, 0, 0), 63 | '2000-05-30', 64 | 'https://www.sqlite.org/changes.html', 65 | 'Added the LIKE operator.', 66 | ), 67 | ( 68 | '2000-05-29', 69 | datetime(2000, 5, 29, 0, 0), 70 | '2000-05-29', 71 | 'https://www.sqlite.org/changes.html', 72 | 'Initial Public Release of Alpha code', 73 | ), 74 | ] 75 | -------------------------------------------------------------------------------- /tests/test_plugins_ua_fallback.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from reader import ParseError 4 | 5 | 6 | def test_fallback(requests_mock, make_reader): 7 | url = 'http://www.example.com/' 8 | 9 | reader = make_reader(':memory:', plugins=('reader.ua_fallback',)) 10 | reader.add_feed(url) 11 | 12 | matcher = requests_mock.get(url, status_code=403) 13 | 14 | with pytest.raises(ParseError) as exc_info: 15 | reader.update_feed(url) 16 | 17 | assert '403' in str(exc_info.value) 18 | 19 | assert len(matcher.request_history) == 2 20 | first_ua, second_ua = (r.headers['User-Agent'] for r in matcher.request_history) 21 | 22 | assert first_ua.startswith('python-reader/') 23 | assert second_ua.startswith('feedparser/') 24 | assert second_ua.endswith(first_ua) 25 | 26 | 27 | def test_noop(requests_mock, make_reader): 28 | url = 'http://www.example.com/' 29 | 30 | reader = make_reader(':memory:', plugins=('reader.ua_fallback',)) 31 | reader.add_feed(url) 32 | 33 | matcher = requests_mock.get(url, status_code=404) 34 | 35 | with pytest.raises(ParseError) as exc_info: 36 | reader.update_feed(url) 37 | 38 | assert '404' in str(exc_info.value) 39 | assert len(matcher.request_history) == 1 40 | -------------------------------------------------------------------------------- /tests/test_reader_deprecations.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import pytest 4 | 5 | from fakeparser import Parser 6 | 7 | 8 | # Nothing here (yet). 9 | -------------------------------------------------------------------------------- /tests/test_reader_plugins.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from reader import InvalidPluginError 4 | from reader import PluginInitError 5 | 6 | 7 | @pytest.fixture(autouse=True) 8 | def set_module_prefix(monkeypatch): 9 | monkeypatch.setattr('reader.plugins._MODULE_PREFIX', 'reader_test_plugins.') 10 | 11 | 12 | def test_good(monkeypatch, make_reader): 13 | def one(reader): 14 | one.reader = reader 15 | 16 | def two(reader): 17 | two.reader = reader 18 | 19 | monkeypatch.setattr('reader_test_plugins.good.init_reader', one) 20 | 21 | reader = make_reader(':memory:', plugins=['reader.good', two]) 22 | 23 | assert one.reader is reader 24 | assert two.reader is reader 25 | 26 | 27 | @pytest.mark.parametrize( 28 | 'plugin_name', 29 | ['reader_test_plugins.good:init_reader', 'reader_test_plugins.good.init_reader'], 30 | ) 31 | def test_good_full_path(monkeypatch, make_reader, plugin_name): 32 | monkeypatch.setattr('reader.plugins._PLUGIN_PREFIX', 'reader_test_plugins.') 33 | 34 | def one(reader): 35 | one.reader = reader 36 | 37 | monkeypatch.setattr('reader_test_plugins.good.init_reader', one) 38 | 39 | reader = make_reader(':memory:', plugins=[plugin_name]) 40 | 41 | assert one.reader is reader 42 | 43 | 44 | def test_init_error_built_in(make_reader): 45 | with pytest.raises(PluginInitError) as exc_info: 46 | reader = make_reader(':memory:', plugins=['reader.init_error']) 47 | 48 | message = str(exc_info.value) 49 | assert 'reader_test_plugins.init_error:init_reader' in message 50 | assert 'someerror' in message 51 | 52 | 53 | def test_init_error_callable(make_reader): 54 | from reader_test_plugins.init_error import init_reader as plugin 55 | 56 | with pytest.raises(PluginInitError) as exc_info: 57 | reader = make_reader(':memory:', plugins=[plugin]) 58 | 59 | message = str(exc_info.value) 60 | assert 'reader_test_plugins.init_error:init_reader' in message 61 | assert 'someerror' in message 62 | 63 | 64 | def test_non_built_in(monkeypatch, make_reader): 65 | with pytest.raises(InvalidPluginError) as exc_info: 66 | make_reader(':memory:', plugins=['reader_test_plugins.good:init_reader']) 67 | 68 | assert "no such built-in plugin: 'reader_test_plugins.good:init_reader'" in str( 69 | exc_info.value 70 | ) 71 | 72 | 73 | def test_missing_plugin(make_reader): 74 | with pytest.raises(InvalidPluginError) as exc_info: 75 | make_reader(':memory:', plugins=['reader.unknown']) 76 | 77 | assert "no such built-in plugin: 'reader.unknown'" in str(exc_info.value) 78 | 79 | 80 | def test_missing_entry_point(make_reader): 81 | with pytest.raises(AttributeError) as exc_info: 82 | make_reader(':memory:', plugins=['reader.missing_entry_point']) 83 | 84 | 85 | def test_missing_dependency(make_reader): 86 | with pytest.raises(ImportError) as exc_info: 87 | make_reader(':memory:', plugins=['reader.missing_dependency']) 88 | -------------------------------------------------------------------------------- /tests/test_reader_utils.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | 3 | import pytest 4 | 5 | from reader import EntryNotFoundError 6 | from reader.utils import archive_entries 7 | 8 | 9 | def test_archive_entries(reader, parser): 10 | reader.copy_entry = Mock(wraps=reader.copy_entry) 11 | 12 | feed = parser.feed(1) 13 | one = parser.entry(1, 'one', title='one') 14 | two = parser.entry(1, '&?:/', title='not URL safe') 15 | reader.add_feed(feed) 16 | reader.update_feeds() 17 | 18 | # archive an entry, archived does not exist 19 | 20 | reader.copy_entry.reset_mock() 21 | archive_entries(reader, [one]) 22 | 23 | assert len(reader.copy_entry.call_args_list) == 1 24 | assert {e.resource_id + (e.title,) for e in reader.get_entries()} == { 25 | ('1', 'one', 'one'), 26 | ('1', '&?:/', 'not URL safe'), 27 | ('reader:archived', 'reader:archived?feed=1&entry=one', 'one'), 28 | } 29 | archived = reader.get_feed('reader:archived') 30 | assert archived.updates_enabled is False 31 | assert archived.user_title == 'Archived' 32 | 33 | # archive two entries (one already archived), archived exists 34 | 35 | one = parser.entry(1, 'one', title='new one') 36 | reader.update_feeds() 37 | 38 | reader.copy_entry.reset_mock() 39 | archive_entries(reader, [one, two]) 40 | 41 | # 3 because one is copied (exists error), deleted, and then copied again 42 | assert len(reader.copy_entry.call_args_list) == 3 43 | assert {e.resource_id + (e.title,) for e in reader.get_entries()} == { 44 | ('1', 'one', 'new one'), 45 | ('1', '&?:/', 'not URL safe'), 46 | ('reader:archived', 'reader:archived?feed=1&entry=one', 'new one'), 47 | ( 48 | 'reader:archived', 49 | 'reader:archived?feed=1&entry=%26%3F%3A%2F', 50 | 'not URL safe', 51 | ), 52 | } 53 | 54 | # archive inexistent entry 55 | 56 | with pytest.raises(EntryNotFoundError): 57 | archive_entries(reader, [('1', 'inexistent')]) 58 | -------------------------------------------------------------------------------- /tests/test_test_utils.py: -------------------------------------------------------------------------------- 1 | from utils import reload_module 2 | 3 | 4 | def test_reload_module(monkeypatch, reload_module): 5 | import ntpath 6 | import os 7 | import os.path 8 | import posixpath 9 | import urllib.request 10 | 11 | os_path_by_name = {'nt': ntpath, 'posix': posixpath} 12 | 13 | # on Windows, url2pathname is imported from nturl2path; 14 | # on POSIX, url2pathname is defined in urllib.request; 15 | # this is decided at urllib.request's import time, based on os.name 16 | url2pathname_module_by_name = {'nt': 'nturl2path', 'posix': 'urllib.request'} 17 | 18 | the_other_os_name = {'nt': 'posix', 'posix': 'nt'}[os.name] 19 | 20 | before = os.name, os.path.__name__, urllib.request.url2pathname.__module__ 21 | 22 | monkeypatch.setattr('os.name', the_other_os_name) 23 | monkeypatch.setattr('os.path', os_path_by_name[the_other_os_name]) 24 | reload_module(urllib.request) 25 | 26 | # sanity check 27 | assert os.name == the_other_os_name 28 | assert os.path.__name__ == os_path_by_name[os.name].__name__ 29 | 30 | assert ( 31 | urllib.request.url2pathname.__module__ == url2pathname_module_by_name[os.name] 32 | ) 33 | 34 | reload_module.undo() 35 | 36 | assert before == (os.name, os.path.__name__, urllib.request.url2pathname.__module__) 37 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = coverage-clean,py{313,312,311,py311},coverage-report,typing,docs 3 | skip_missing_interpreters = true 4 | 5 | [testenv] 6 | extras = 7 | cli 8 | app 9 | tests 10 | unstable-plugins 11 | 12 | allowlist_externals = 13 | ./run.sh 14 | 15 | commands = {posargs:./run.sh coverage-run --cov-append -v} 16 | 17 | depends = 18 | py{313,312,311,py311}: coverage-clean 19 | coverage-report: py{313,312,311,py311} 20 | 21 | [testenv:coverage-clean] 22 | deps = coverage 23 | skip_install = true 24 | commands = coverage erase 25 | 26 | [testenv:coverage-report] 27 | deps = coverage 28 | skip_install = true 29 | commands = ./run.sh coverage-report 30 | 31 | [testenv:typing] 32 | extras = 33 | search 34 | tests 35 | commands = ./run.sh typing 36 | 37 | [testenv:docs] 38 | extras = 39 | docs 40 | commands = sphinx-build -W -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html 41 | --------------------------------------------------------------------------------