├── .flake8
├── .gitattributes
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── build.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CHANGES.rst
├── CONTRIBUTING.rst
├── LICENSE
├── MANIFEST.in
├── README.rst
├── docs
    ├── Makefile
    ├── api.rst
    ├── app.rst
    ├── changelog.rst
    ├── cli.rst
    ├── conf.py
    ├── config.rst
    ├── contributing.rst
    ├── dev-app.rst
    ├── dev.rst
    ├── guide.rst
    ├── images
    │   └── redesign-01.png
    ├── index.rst
    ├── install.rst
    ├── internal.rst
    ├── make.bat
    ├── plugins.rst
    ├── screenshots
    │   ├── dillo.png
    │   ├── entries-feed.png
    │   ├── entries-v2-dark.png
    │   ├── entries-v2-filters-light.png
    │   ├── entries.png
    │   ├── entry-one.png
    │   ├── entry-two.png
    │   ├── feeds.png
    │   ├── lynx.png
    │   └── search.png
    ├── tutorial.rst
    └── why.rst
├── examples
    ├── config.yaml
    ├── custom_headers.py
    ├── feed_slugs.py
    ├── parser_only.py
    ├── podcast.py
    └── terminal.py
├── pyproject.toml
├── run.sh
├── scripts
    ├── backup.sh
    ├── bench.py
    ├── debug_storage_stats.py
    ├── generate_import_all.py
    ├── jscontrols.html
    ├── jscontrols.py
    ├── lines.sh
    └── release.py
├── src
    └── reader
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── _app
    │       ├── __init__.py
    │       ├── api_thing.py
    │       ├── cli.py
    │       ├── static
    │       │   ├── controls.js
    │       │   └── style.css
    │       ├── templates
    │       │   ├── add_entry.html
    │       │   ├── entries.html
    │       │   ├── entry.html
    │       │   ├── feeds.html
    │       │   ├── layout.html
    │       │   ├── macros.html
    │       │   ├── metadata.html
    │       │   └── tags.html
    │       ├── v2
    │       │   ├── __init__.py
    │       │   ├── forms.py
    │       │   ├── static
    │       │   │   ├── style.css
    │       │   │   └── theme.js
    │       │   └── templates
    │       │   │   └── v2
    │       │   │       ├── entries.html
    │       │   │       ├── layout.html
    │       │   │       └── macros.html
    │       └── wsgi.py
    │   ├── _cli.py
    │   ├── _config.py
    │   ├── _hash_utils.py
    │   ├── _parser
    │       ├── __init__.py
    │       ├── _http_utils.py
    │       ├── _lazy.py
    │       ├── _url_utils.py
    │       ├── feedparser.py
    │       ├── file.py
    │       ├── http.py
    │       ├── jsonfeed.py
    │       └── requests
    │       │   ├── __init__.py
    │       │   └── _lazy.py
    │   ├── _plugins
    │       ├── __init__.py
    │       ├── cli_status.py
    │       ├── enclosure_tags.py
    │       ├── preview_feed_list.py
    │       ├── share.py
    │       ├── sqlite_releases.py
    │       ├── templates
    │       │   └── preview_feed_list.html
    │       └── timer.py
    │   ├── _storage
    │       ├── __init__.py
    │       ├── _base.py
    │       ├── _changes.py
    │       ├── _entries.py
    │       ├── _feeds.py
    │       ├── _html_utils.py
    │       ├── _schema.py
    │       ├── _search.py
    │       ├── _sql_utils.py
    │       ├── _sqlite_utils.py
    │       └── _tags.py
    │   ├── _types.py
    │   ├── _update.py
    │   ├── _utils.py
    │   ├── _vendor
    │       ├── __init__.py
    │       └── feedparser
    │       │   ├── __init__.py
    │       │   ├── api.py
    │       │   ├── datetimes
    │       │       ├── __init__.py
    │       │       ├── asctime.py
    │       │       ├── greek.py
    │       │       ├── hungarian.py
    │       │       ├── iso8601.py
    │       │       ├── korean.py
    │       │       ├── perforce.py
    │       │       ├── rfc822.py
    │       │       └── w3dtf.py
    │       │   ├── encodings.py
    │       │   ├── exceptions.py
    │       │   ├── html.py
    │       │   ├── http.py
    │       │   ├── mixin.py
    │       │   ├── namespaces
    │       │       ├── __init__.py
    │       │       ├── _base.py
    │       │       ├── admin.py
    │       │       ├── cc.py
    │       │       ├── dc.py
    │       │       ├── georss.py
    │       │       ├── itunes.py
    │       │       ├── mediarss.py
    │       │       └── psc.py
    │       │   ├── parsers
    │       │       ├── __init__.py
    │       │       ├── json.py
    │       │       ├── loose.py
    │       │       └── strict.py
    │       │   ├── py.typed
    │       │   ├── sanitizer.py
    │       │   ├── sgml.py
    │       │   ├── urls.py
    │       │   └── util.py
    │   ├── core.py
    │   ├── exceptions.py
    │   ├── plugins
    │       ├── __init__.py
    │       ├── enclosure_dedupe.py
    │       ├── entry_dedupe.py
    │       ├── mark_as_read.py
    │       ├── readtime.py
    │       └── ua_fallback.py
    │   ├── py.typed
    │   ├── types.py
    │   └── utils.py
├── tests
    ├── conftest.py
    ├── data
    │   ├── 10.json
    │   ├── 10.json.py
    │   ├── custom
    │   ├── empty.atom
    │   ├── empty.atom.py
    │   ├── empty.json
    │   ├── empty.json.py
    │   ├── empty.rss
    │   ├── empty.rss.py
    │   ├── full.atom
    │   ├── full.atom.py
    │   ├── full.json
    │   ├── full.json.py
    │   ├── full.rss
    │   ├── full.rss.py
    │   ├── invalid.json
    │   ├── invalid.json.py
    │   ├── relative.atom
    │   ├── relative.atom.py
    │   ├── relative.rss
    │   ├── relative.rss.py
    │   ├── sqlite_releases.html
    │   ├── unknown.json
    │   └── unknown.json.py
    ├── fakeparser.py
    ├── reader_methods.py
    ├── reader_test_plugins
    │   ├── __init__.py
    │   ├── good.py
    │   ├── init_error.py
    │   ├── missing_dependency.py
    │   └── missing_entry_point.py
    ├── test__types.py
    ├── test__utils.py
    ├── test_app.py
    ├── test_app_wsgi.py
    ├── test_bench.py
    ├── test_changes.py
    ├── test_cli.py
    ├── test_config.py
    ├── test_exceptions.py
    ├── test_hash_utils.py
    ├── test_html_utils.py
    ├── test_lazy_imports.py
    ├── test_parser.py
    ├── test_plugins_cli_status.py
    ├── test_plugins_enclosure_dedupe.py
    ├── test_plugins_entry_dedupe.py
    ├── test_plugins_mark_as_read.py
    ├── test_plugins_preview_feed_list.py
    ├── test_plugins_readtime.py
    ├── test_plugins_sqlite_releases.py
    ├── test_plugins_ua_fallback.py
    ├── test_reader.py
    ├── test_reader_context.py
    ├── test_reader_counts.py
    ├── test_reader_deprecations.py
    ├── test_reader_filter.py
    ├── test_reader_hooks.py
    ├── test_reader_integration.py
    ├── test_reader_plugins.py
    ├── test_reader_private.py
    ├── test_reader_search.py
    ├── test_reader_sort.py
    ├── test_reader_update.py
    ├── test_reader_utils.py
    ├── test_search.py
    ├── test_sql_utils.py
    ├── test_sqlite_utils.py
    ├── test_storage.py
    ├── test_tags.py
    ├── test_test_utils.py
    ├── test_types.py
    └── utils.py
└── tox.ini


/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | # B = bugbear
 3 | # E = pycodestyle errors
 4 | # F = flake8 pyflakes
 5 | # W = pycodestyle warnings
 6 | # B9 = bugbear opinions
 7 | select = B, E, F, W, B9
 8 | ignore =
 9 |     # slice notation whitespace, invalid
10 |     E203
11 |     # line length, handled by bugbear B950
12 |     E501
13 |     # bugbear line length; too sensitive, triggered for comments
14 |     # and docstrings (and adding "noqa" in comments is making things worse);
15 |     # black taking care of line length for code should be good enough;
16 |     # if enabled, we should set max-line-length = 80 (so up to 88 are allowed)
17 |     B950
18 |     # bare except, handled by bugbear B001
19 |     E722
20 |     # bin op line break, invalid
21 |     W503 W504
22 |     # string formatting opinion
23 |     B907
24 |     # multiple statements on one line, handled by black
25 |     E704
26 | per-file-ignores =
27 |     # __init__ modules export names
28 |     **/__init__.py: F401
29 | exclude = tests/*, docs/*, scripts/*, src/reader/_vendor/*
30 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # https://github.com/actions/checkout/issues/135#issuecomment-613361104
2 | * text eol=lf
3 | *.png -text
4 | *.bat -text
5 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 | - package-ecosystem: pip
 4 |   directory: "/"
 5 |   schedule:
 6 |     interval: weekly
 7 |   open-pull-requests-limit: 10
 8 |   ignore:
 9 |   - dependency-name: mypy
10 |     versions:
11 |     - "0.800"
12 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: build
 3 | 
 4 | on:
 5 |   push:
 6 |     branches: [master]
 7 |   pull_request:
 8 |     branches: [master]
 9 |   workflow_dispatch:
10 | 
11 | defaults:
12 |   run:
13 |     shell: bash
14 | 
15 | jobs:
16 |   tests:
17 |     runs-on: ${{ matrix.os }}
18 | 
19 |     strategy:
20 |       fail-fast: false
21 | 
22 |       matrix:
23 |         python-version: [
24 |           "3.11", "3.12", "3.13",
25 |           "pypy-3.11"
26 |         ]
27 |         os: [ubuntu-latest, macos-latest, windows-latest]
28 | 
29 |     steps:
30 | 
31 |       - uses: actions/checkout@v4
32 | 
33 |       - uses: actions/setup-python@v5
34 |         with:
35 |           python-version: ${{ matrix.python-version }}
36 |           allow-prereleases: true
37 | 
38 |       - run: ./run.sh ci-install
39 |       - run: ./run.sh ci-run
40 | 
41 |       - uses: codecov/codecov-action@v4
42 |         with:
43 |           token: ${{ secrets.CODECOV_TOKEN }}
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # dotenv
 84 | .env
 85 | 
 86 | # direnv
 87 | .envrc
 88 | 
 89 | # virtualenv
 90 | .venv*
 91 | venv/
 92 | ENV/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: ^src/reader/_vendor/.*$
 2 | 
 3 | repos:
 4 | 
 5 |   - repo: https://github.com/pycqa/isort
 6 |     rev: 6.0.1
 7 |     hooks:
 8 |       - id: isort
 9 | 
10 |   - repo: https://github.com/asottile/pyupgrade
11 |     rev: v3.19.1
12 |     hooks:
13 |       - id: pyupgrade
14 |         args: ["--py311-plus"]
15 | 
16 |   - repo: https://github.com/psf/black
17 |     rev: 25.1.0
18 |     hooks:
19 |       - id: black
20 |         args: ["-S"]
21 | 
22 |   - repo: https://github.com/PyCQA/flake8
23 |     rev: 7.2.0
24 |     hooks:
25 |       - id: flake8
26 |         additional_dependencies: [flake8-bugbear]
27 | 
28 |   - repo: https://github.com/pre-commit/pre-commit-hooks
29 |     rev: v5.0.0
30 |     hooks:
31 |       - id: check-byte-order-marker
32 |       - id: trailing-whitespace
33 |       - id: end-of-file-fixer
34 | 
35 | ci:
36 |   autoupdate_schedule: quarterly
37 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-22.04
 5 |   tools:
 6 |     python: "3"  # (last stable CPython version)
 7 | 
 8 | python:
 9 |   install:
10 |     - method: pip
11 |       path: .
12 |       extra_requirements:
13 |         - docs
14 | 
15 | sphinx:
16 |     configuration: docs/conf.py
17 | 
18 | formats: all
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2018 lemon24
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 | 1.  Redistributions of source code must retain the above copyright
 8 |     notice, this list of conditions and the following disclaimer.
 9 | 
10 | 2.  Redistributions in binary form must reproduce the above copyright
11 |     notice, this list of conditions and the following disclaimer in the
12 |     documentation and/or other materials provided with the distribution.
13 | 
14 | 3.  Neither the name of the copyright holder nor the names of its
15 |     contributors may be used to endorse or promote products derived from
16 |     this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
21 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | recursive-include src/reader/_app/templates *
 2 | recursive-include src/reader/_app/static *
 3 | recursive-include src/reader/_app/v2 *
 4 | recursive-include src/reader/_plugins/templates *
 5 | recursive-include src/reader *.pyi
 6 | recursive-include src/reader py.typed
 7 | include run.sh CHANGES.rst LICENSE tox.ini
 8 | recursive-include tests *
 9 | recursive-include examples *
10 | recursive-include docs *
11 | recursive-include scripts *
12 | global-exclude *.py[cod]
13 | prune docs/_build
14 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | .. begin-intro
  2 | 
  3 | **reader** is a Python feed reader library.
  4 | 
  5 | It is designed to allow writing feed reader applications
  6 | without any business code,
  7 | and without depending on a particular framework.
  8 | 
  9 | .. end-intro
 10 | 
 11 | 
 12 | |build-status-github| |code-coverage| |documentation-status| |pypi-status| |type-checking| |code-style|
 13 | 
 14 | 
 15 | .. |build-status-github| image:: https://github.com/lemon24/reader/workflows/build/badge.svg
 16 |   :target: https://github.com/lemon24/reader/actions?query=workflow%3Abuild
 17 |   :alt: build status (GitHub Actions)
 18 | 
 19 | .. |code-coverage| image:: https://codecov.io/gh/lemon24/reader/branch/master/graph/badge.svg?token=lcLZaSFysf
 20 |   :target: https://codecov.io/gh/lemon24/reader
 21 |   :alt: code coverage
 22 | 
 23 | .. |documentation-status| image:: https://readthedocs.org/projects/reader/badge/?version=latest&style=flat
 24 |   :target: https://reader.readthedocs.io/en/latest/?badge=latest
 25 |   :alt: documentation status
 26 | 
 27 | .. |pypi-status| image:: https://img.shields.io/pypi/v/reader.svg
 28 |   :target: https://pypi.python.org/pypi/reader
 29 |   :alt: PyPI status
 30 | 
 31 | .. |type-checking| image:: http://www.mypy-lang.org/static/mypy_badge.svg
 32 |   :target: http://mypy-lang.org/
 33 |   :alt: checked with mypy
 34 | 
 35 | .. |code-style| image:: https://img.shields.io/badge/code%20style-black-000000.svg
 36 |   :target: https://github.com/psf/black
 37 |   :alt: code style: black
 38 | 
 39 | 
 40 | .. begin-features
 41 | 
 42 | *reader* allows you to:
 43 | 
 44 | * retrieve, store, and manage **Atom**, **RSS**, and **JSON** feeds
 45 | * mark articles as read or important
 46 | * add arbitrary tags/metadata to feeds and articles
 47 | * filter feeds and articles
 48 | * full-text search articles
 49 | * get statistics on feed and user activity
 50 | * write plugins to extend its functionality
 51 | * skip all the low level stuff and focus on what makes your feed reader different
 52 | 
 53 | ...all these with:
 54 | 
 55 | * a stable, clearly documented API
 56 | * excellent test coverage
 57 | * fully typed Python
 58 | 
 59 | What *reader* doesn't do:
 60 | 
 61 | * provide an UI
 62 | * provide a REST API (yet)
 63 | * depend on a web framework
 64 | * have an opinion of how/where you use it
 65 | 
 66 | The following exist, but are optional (and frankly, a bit unpolished):
 67 | 
 68 | * a minimal web interface
 69 | 
 70 |   * that works even with text-only browsers
 71 |   * with automatic tag fixing for podcasts (MP3 enclosures)
 72 | 
 73 | * a command-line interface
 74 | 
 75 | .. end-features
 76 | 
 77 | 
 78 | Documentation: `reader.readthedocs.io`_
 79 | 
 80 | .. _reader.readthedocs.io: https://reader.readthedocs.io/
 81 | 
 82 | 
 83 | Usage:
 84 | 
 85 | .. begin-usage
 86 | 
 87 | .. code-block:: bash
 88 | 
 89 |     $ pip install reader
 90 | 
 91 | .. code-block:: python
 92 | 
 93 |     >>> from reader import make_reader
 94 |     >>>
 95 |     >>> reader = make_reader('db.sqlite')
 96 |     >>> reader.add_feed('http://www.hellointernet.fm/podcast?format=rss')
 97 |     >>> reader.update_feeds()
 98 |     >>>
 99 |     >>> entries = list(reader.get_entries())
100 |     >>> [e.title for e in entries]
101 |     ['H.I. #108: Project Cyclops', 'H.I. #107: One Year of Weird', ...]
102 |     >>>
103 |     >>> reader.mark_entry_as_read(entries[0])
104 |     >>>
105 |     >>> [e.title for e in reader.get_entries(read=False)]
106 |     ['H.I. #107: One Year of Weird', 'H.I. #106: Water on Mars', ...]
107 |     >>> [e.title for e in reader.get_entries(read=True)]
108 |     ['H.I. #108: Project Cyclops']
109 |     >>>
110 |     >>> reader.update_search()
111 |     >>>
112 |     >>> for e in reader.search_entries('year', limit=3):
113 |     ...     title = e.metadata.get('.title')
114 |     ...     print(title.value, title.highlights)
115 |     ...
116 |     H.I. #107: One Year of Weird (slice(15, 19, None),)
117 |     H.I. #52: 20,000 Years of Torment (slice(17, 22, None),)
118 |     H.I. #83: The Best Kind of Prison ()
119 | 
120 | .. end-usage
121 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = reader
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | 
2 | .. include:: ../CHANGES.rst
3 | 


--------------------------------------------------------------------------------
/docs/cli.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Command-line interface
 3 | ======================
 4 | 
 5 | *reader* comes with a command-line interface
 6 | that exposes basic management functionality.
 7 | 
 8 | 
 9 | .. warning::
10 | 
11 |     The CLI is is not fully stable,
12 |     see the :ref:`roadmap <cli roadmap>` for details.
13 | 
14 | .. note::
15 | 
16 |     The command-line interface is optional, use the ``cli`` extra to install
17 |     its :ref:`dependencies <Optional dependencies>`.
18 | 
19 | Most commands need a database to work. The following are equivalent:
20 | 
21 | .. code-block:: bash
22 | 
23 |     python -m reader --db /path/to/db some-command
24 |     READER_DB=/path/to/db python -m reader some-command
25 | 
26 | If no database path is given, ``~/.config/reader/db.sqlite`` is used
27 | (at least on Linux).
28 | 
29 | Add a feed:
30 | 
31 | .. code-block:: bash
32 | 
33 |     python -m reader add http://www.example.com/atom.xml
34 | 
35 | Update all feeds:
36 | 
37 | .. code-block:: bash
38 | 
39 |     python -m reader update
40 | 
41 | Serve the web application locally (at http://localhost:8080/):
42 | 
43 | .. code-block:: bash
44 | 
45 |     python -m reader serve
46 | 
47 | 
48 | .. _cli-update:
49 | 
50 | Updating feeds
51 | --------------
52 | 
53 | For *reader* to actually be useful as a feed reader, feeds need to get updated
54 | and, if full-text search is enabled, the search index needs to be updated.
55 | 
56 | You can run the ``update`` command  regularly to update feeds (e.g. every
57 | hour). Note that *reader* uses the ETag and Last-Modified headers, so, if
58 | supported by the the server, feeds will only be downloaded if they changed.
59 | 
60 | To avoid waiting too much for a new feed to be updated, you can run
61 | ``update --new`` more often (e.g. every minute); this will update
62 | only newly-added feeds. This is also a good time to update the search index.
63 | 
64 | You can achieve this using cron::
65 | 
66 |     42 * * * *  reader update -v 2>&1 >>"/tmp/$LOGNAME.reader.update.hourly.log"
67 |     * * * * *   reader update -v --new 2>&1 >>"/tmp/$LOGNAME.reader.update.new.log"; reader search update 2>&1 >>"/tmp/$LOGNAME.reader.search.update.log"
68 | 
69 | If you are running *reader* on a personal computer, it might also be convenient
70 | to run ``update`` once immediately after boot::
71 | 
72 |     @reboot     sleep 60; reader update -v 2>&1 >>"/tmp/$LOGNAME.reader.update.boot.log"
73 | 
74 | 
75 | Reference
76 | ---------
77 | 
78 | .. click:: reader._cli:cli
79 |     :prog: reader
80 |     :show-nested:
81 | 


--------------------------------------------------------------------------------
/docs/config.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Configuration
 3 | =============
 4 | 
 5 | Both the :doc:`CLI <cli>` and the :doc:`web application <app>` can
 6 | be configured from a file.
 7 | 
 8 | .. warning::
 9 | 
10 |     The configuration file format is not stable yet
11 |     and might change without any notice.
12 | 
13 | .. note::
14 | 
15 |     Configuration file loading dependencies get installed automatically when
16 |     installing the CLI or the web application
17 |     :ref:`extras <Optional dependencies>`.
18 | 
19 | 
20 | The configuration file path can be specified either through the ``--config``
21 | CLI option or through the ``READER_CONFIG`` environment variable
22 | (also usable with the web application).
23 | 
24 | The config file is split in contexts;
25 | this allows having a set of global defaults
26 | and overriding them with CLI- or web-app-specific values.
27 | Use the ``config dump --merge`` command
28 | to see the final configuration for each context.
29 | 
30 | The older ``READER_DB``, ``READER_PLUGIN``, and ``READER_APP_PLUGIN``
31 | environment variables always *replace* the corresponding config values,
32 | so they should be used only for debugging.
33 | 
34 | The following example shows the config file structure
35 | and the options currently available:
36 | 
37 | .. literalinclude:: ../examples/config.yaml
38 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CONTRIBUTING.rst
2 | 


--------------------------------------------------------------------------------
/docs/dev-app.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Web interface design philosophy
  3 | -------------------------------
  4 | 
  5 | The web interface should be as minimal as possible.
  6 | 
  7 | The web interface should work with text-only browsers, modern browsers, and
  8 | everything in-between. Some may be nicer to use, but all functionality should
  9 | be available everywhere.
 10 | 
 11 | Fast and ugly is better than slow and pretty.
 12 | 
 13 | It should be possible to build a decent web interface (at least for reader)
 14 | using only HTML forms with a few JavaScript enhancements added on top.
 15 | 
 16 | 
 17 | 2023 update: `Hypermedia Systems`_ and `htmx`_ seem to embody these ideas
 18 | in a much better way than I could;
 19 | a potential web app re-design will likely use them.
 20 | 
 21 | 
 22 | .. _Hypermedia Systems: https://hypermedia.systems/
 23 | .. _htmx: https://htmx.org/
 24 | 
 25 | 
 26 | User interactions
 27 | ~~~~~~~~~~~~~~~~~
 28 | 
 29 | .. note::
 30 | 
 31 |     This list might lag behind reality; anyway, it all started from here.
 32 | 
 33 | User interactions, by logical groups:
 34 | 
 35 | * entry
 36 | 
 37 |   * mark an entry as read
 38 |   * mark an entry as unread
 39 |   * go to an entry's link
 40 |   * go to an entry's feed
 41 |   * go to an entry's feed link
 42 | 
 43 | * entry list
 44 | 
 45 |   * see the latest unread entries
 46 |   * see the latest read entries
 47 |   * see the latest entries
 48 | 
 49 | * entry list (feed)
 50 | 
 51 |   * mark all the entries as read
 52 |   * mark all the entries as unread
 53 | 
 54 | * feed
 55 | 
 56 |   * add a feed
 57 |   * delete a feed
 58 |   * change a feed's title
 59 |   * go to a feed's entries
 60 |   * go to a feed's link
 61 | 
 62 | * feed list
 63 | 
 64 |   * see a list of all the feeds
 65 | 
 66 | * other
 67 | 
 68 |   * be notified of the success/failure of a previous action
 69 | 
 70 | Controls (below), mapped to user interactions:
 71 | 
 72 | * link
 73 | 
 74 |   * go to ...
 75 |   * see ...
 76 | 
 77 | * simple button
 78 | 
 79 |   * mark an entry as read
 80 |   * mark an entry as unread
 81 | 
 82 | * button with input
 83 | 
 84 |   * add a feed
 85 |   * change a feed's title
 86 | 
 87 | * button with checkbox
 88 | 
 89 |   * mark all the entries are read
 90 |   * mark all the entries are unread
 91 |   * delete a feed
 92 | 
 93 | 
 94 | Controls
 95 | ~~~~~~~~
 96 | 
 97 | There are three interaction modes, HTML-only, HTML+CSS, and HTML+CSS+JS.
 98 | Each mode adds enhancements on top of the previous one.
 99 | 
100 | In the HTML-only mode, all elements of a control are visible. Clicking the
101 | element that triggers the action (e.g. a button) submits a form and, if
102 | possible, redirects back to the source page, with any error messages shown
103 | after the action element.
104 | 
105 | In the HTML+CSS mode, some elements might be hidden so that only the action
106 | element is visible; in its inert state it should look like text. On hover,
107 | the other elements of the control should become visible.
108 | 
109 | In the HTML+CSS+JS mode, clicking the action element results in an asynchronous
110 | call, with the status of the action displayed after it.
111 | 
112 | Links are just links.
113 | 
114 | Simple buttons consist of a single button.
115 | 
116 | Buttons with input consist of an text input element followed by a button.
117 | The text input are hidden when not hovered.
118 | 
119 | Buttons with checkbox consist of a checkbox, a label for the checkbox, and
120 | a button. The checkbox and label are hidden when not hovered.
121 | 
122 | 
123 | Page structure
124 | ~~~~~~~~~~~~~~
125 | 
126 | Text TBD.
127 | 
128 | .. figure:: images/redesign-01.png
129 |   :width: 240px
130 |   :alt: page structure, controls
131 | 
132 |   page structure, controls
133 | 
134 | 
135 | Pages
136 | ~~~~~
137 | 
138 | Text TBD.
139 | 


--------------------------------------------------------------------------------
/docs/images/redesign-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/images/redesign-01.png


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | reader
 3 | ======
 4 | 
 5 | .. include:: ../README.rst
 6 |   :start-after: begin-intro
 7 |   :end-before: end-intro
 8 | 
 9 | 
10 | Features
11 | --------
12 | 
13 | .. include:: ../README.rst
14 |   :start-after: begin-features
15 |   :end-before: end-features
16 | 
17 | 
18 | Quickstart
19 | ----------
20 | 
21 | What does it look like? Here is an example of *reader* in use:
22 | 
23 | .. include:: ../README.rst
24 |   :start-after: begin-usage
25 |   :end-before: end-usage
26 | 
27 | 
28 | User guide
29 | ----------
30 | 
31 | This part of the documentation guides you through all of the library’s usage patterns.
32 | 
33 | .. toctree::
34 |     :maxdepth: 2
35 | 
36 |     why
37 |     install
38 |     tutorial
39 |     guide
40 | 
41 | 
42 | API reference
43 | -------------
44 | 
45 | If you are looking for information on a specific function, class, or method,
46 | this part of the documentation is for you.
47 | 
48 | .. toctree::
49 |     :maxdepth: 2
50 | 
51 |     api
52 |     internal
53 | 
54 | 
55 | Unstable features
56 | -----------------
57 | 
58 | The following are optional features that are still being worked on.
59 | They may become their own packages, get merged into the main library,
60 | or be removed in the future.
61 | 
62 | .. toctree::
63 |     :maxdepth: 2
64 | 
65 |     cli
66 |     app
67 |     config
68 |     plugins
69 | 
70 | 
71 | Project information
72 | -------------------
73 | 
74 | *reader* is released under the :gh:`BSD <LICENSE>` license,
75 | its documentation lives at `Read the Docs`_,
76 | the code on `GitHub`_,
77 | and the latest release on `PyPI`_.
78 | It is rigorously tested on Python |min_python|\+ and PyPy.
79 | 
80 | 
81 | .. _Read the Docs: https://reader.readthedocs.io/
82 | .. _GitHub: https://github.com/lemon24/reader
83 | .. _PyPI: https://pypi.org/project/reader/
84 | 
85 | 
86 | .. toctree::
87 |     :maxdepth: 2
88 | 
89 |     contributing
90 |     dev
91 |     changelog
92 | 
93 | 
94 | Indices and tables
95 | ==================
96 | 
97 | * :ref:`genindex`
98 | * :ref:`search`
99 | 


--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Installation
  3 | ============
  4 | 
  5 | Python versions
  6 | ---------------
  7 | 
  8 | *reader* supports Python |min_python| and newer, and PyPy.
  9 | 
 10 | 
 11 | Dependencies
 12 | ------------
 13 | 
 14 | These packages will be installed automatically when installing *reader*:
 15 | 
 16 | * `feedparser`_ parses feeds; *reader* is essentially feedparser + state.
 17 | * `requests`_ retrieves feeds from the internet;
 18 |   it replaces feedparser's default use of :mod:`urllib`
 19 |   to make it easier to write plugins.
 20 | * `werkzeug`_ provides HTTP utilities.
 21 | * `iso8601`_  parses dates in ISO 8601 / RFC 3339; used for JSON Feed parsing.
 22 | * `beautifulsoup4`_ is used to strip HTML tags before adding entries
 23 |   to the search index.
 24 | * `typing-extensions`_ is used for :mod:`typing` backports.
 25 | 
 26 | *reader* also depends on the :mod:`sqlite3` standard library module
 27 | (at least SQLite 3.18 with the `JSON1`_ and `FTS5`_ extensions).
 28 | 
 29 | 
 30 | .. _no-vendored-feedparser:
 31 | 
 32 | .. note::
 33 | 
 34 |   Because `feedparser`_ makes PyPI releases at a lower cadence,
 35 |   *reader* uses a vendored version of feedparser's `develop`_ branch
 36 |   by default since :ref:`version 2.9`.
 37 |   To opt out of this behavior, and make *reader* use
 38 |   the installed ``feedparser`` package,
 39 |   set the ``READER_NO_VENDORED_FEEDPARSER`` environment variable to ``1``.
 40 | 
 41 | .. _develop: https://github.com/kurtmckee/feedparser
 42 | 
 43 | 
 44 | .. _optional dependencies:
 45 | 
 46 | Optional dependencies
 47 | ~~~~~~~~~~~~~~~~~~~~~
 48 | 
 49 | Despite coming with a CLI and web application, *reader* is primarily a library.
 50 | As such, most dependencies are optional, and can be installed as `extras`_.
 51 | 
 52 | As of version |version|, *reader* has the following extras:
 53 | 
 54 | * ``cli`` installs the dependencies needed for the
 55 |   :doc:`command-line interface <cli>`.
 56 | * ``app`` installs the dependencies needed for the
 57 |   :doc:`web application <app>`.
 58 | * Specific plugins may require additional dependencies;
 59 |   refer to their documentation for details.
 60 | 
 61 | 
 62 | .. _beautifulsoup4: https://www.crummy.com/software/BeautifulSoup/
 63 | .. _feedparser: https://feedparser.readthedocs.io/en/latest/
 64 | .. _requests: https://requests.readthedocs.io/
 65 | .. _werkzeug: https://werkzeug.palletsprojects.com/
 66 | .. _iso8601: http://pyiso8601.readthedocs.org/
 67 | .. _typing-extensions: https://pypi.org/project/typing-extensions/
 68 | .. _JSON1: https://www.sqlite.org/json1.html
 69 | .. _FTS5: https://www.sqlite.org/fts5.html
 70 | 
 71 | .. _extras: https://www.python.org/dev/peps/pep-0508/#extras
 72 | 
 73 | 
 74 | Virtual environments
 75 | --------------------
 76 | 
 77 | You should probably install *reader* inside a virtual environment;
 78 | see `this <venv_>`_ for how and why to do it.
 79 | 
 80 | .. _venv: https://flask.palletsprojects.com/en/1.1.x/installation/#virtual-environments
 81 | 
 82 | 
 83 | Install reader
 84 | --------------
 85 | 
 86 | Use the following command to install *reader*,
 87 | along with its required dependencies:
 88 | 
 89 | .. code-block:: bash
 90 | 
 91 |     pip install reader
 92 | 
 93 | Use the following command to install *reader*
 94 | with `optional dependencies <Optional dependencies_>`_:
 95 | 
 96 | .. code-block:: bash
 97 | 
 98 |     pip install 'reader[some-extra,...]'
 99 | 
100 | 
101 | Update reader
102 | ~~~~~~~~~~~~~
103 | 
104 | Use the following command to update *reader*
105 | (add any extras as needed):
106 | 
107 | .. code-block:: bash
108 | 
109 |     pip install --upgrade reader
110 | 
111 | 
112 | Living on the edge
113 | ~~~~~~~~~~~~~~~~~~
114 | 
115 | If you want to use the latest *reader* code before it’s released,
116 | install or update from the master branch:
117 | 
118 | .. code-block:: bash
119 | 
120 |     pip install --upgrade https://github.com/lemon24/reader/archive/master.tar.gz
121 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=reader
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/screenshots/dillo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/dillo.png


--------------------------------------------------------------------------------
/docs/screenshots/entries-feed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/entries-feed.png


--------------------------------------------------------------------------------
/docs/screenshots/entries-v2-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/entries-v2-dark.png


--------------------------------------------------------------------------------
/docs/screenshots/entries-v2-filters-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/entries-v2-filters-light.png


--------------------------------------------------------------------------------
/docs/screenshots/entries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/entries.png


--------------------------------------------------------------------------------
/docs/screenshots/entry-one.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/entry-one.png


--------------------------------------------------------------------------------
/docs/screenshots/entry-two.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/entry-two.png


--------------------------------------------------------------------------------
/docs/screenshots/feeds.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/feeds.png


--------------------------------------------------------------------------------
/docs/screenshots/lynx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/lynx.png


--------------------------------------------------------------------------------
/docs/screenshots/search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/docs/screenshots/search.png


--------------------------------------------------------------------------------
/docs/why.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | Why *reader*?
 4 | =============
 5 | 
 6 | 
 7 | Why use a feed reader library?
 8 | ------------------------------
 9 | 
10 | Have you been unhappy with existing feed readers and wanted to make your own, but:
11 | 
12 | * never knew where to start?
13 | * it seemed like too much work?
14 | * you don't like writing backend code?
15 | 
16 | Are you already working with `feedparser`_, but:
17 | 
18 | * want an easier way to store, filter, sort and search feeds and entries?
19 | * want to get back type-annotated objects instead of dicts?
20 | * want to restrict or deny file-system access?
21 | * want to change the way feeds are retrieved by using the more familiar `requests`_ library?
22 | * want to also support `JSON Feed`_?
23 | * want to support custom information sources?
24 | 
25 | ... while still supporting all the feed types feedparser does?
26 | 
27 | If you answered yes to any of the above, *reader* can help.
28 | 
29 | 
30 | .. _feedparser: https://feedparser.readthedocs.io/en/latest/
31 | .. _requests: https://requests.readthedocs.io
32 | .. _JSON Feed: https://jsonfeed.org/
33 | 
34 | 
35 | 
36 | .. _philosophy:
37 | 
38 | The *reader* philosophy
39 | -----------------------
40 | 
41 | * *reader* is a library
42 | * *reader* is for the long term
43 | * *reader* is extensible
44 | * *reader* is stable (within reason)
45 | * *reader* is simple to use; API matters
46 | * *reader* features work well together
47 | * *reader* is tested
48 | * *reader* is documented
49 | * *reader* has minimal dependencies
50 | 
51 | 
52 | 
53 | Why make your own feed reader?
54 | ------------------------------
55 | 
56 | So you can:
57 | 
58 | * have full control over your data
59 | * control what features it has or doesn't have
60 | * decide how much you pay for it
61 | * make sure it doesn't get closed while you're still using it
62 | * really, it's `easier than you think`_
63 | 
64 | Obviously, this may not be your cup of tea, but if it is, *reader* can help.
65 | 
66 | 
67 | .. _easier than you think: https://rachelbythebay.com/w/2011/10/26/fred/
68 | 
69 | 
70 | 
71 | Why make a feed reader library?
72 | -------------------------------
73 | 
74 | I wanted a feed reader that is:
75 | 
76 | * accessible from multiple devices
77 | * fast
78 | * with a simple UI
79 | * self-hosted (for privacy reasons)
80 | * modular / easy to extend (so I can change stuff I don't like)
81 | * written in Python (see above)
82 | 
83 | The fact that I couldn't find one extensible enough bugged me so much that I decided to make my own; a few years later, I ended up with what I would've liked to use when I first started.
84 | 


--------------------------------------------------------------------------------
/examples/config.yaml:
--------------------------------------------------------------------------------
  1 | # Contexts are values of the top level map.
  2 | # There are 3 known contexts: default, cli, and app.
  3 | #
  4 | # The default context can also be implicit: top level keys that don't
  5 | # correspond to a known context are assumed to belong to the default context.
  6 | #
  7 | # Thus, the following are equivalent:
  8 | #
  9 | #   default:
 10 | #       reader: ...
 11 | #       something else: ...
 12 | #
 13 | #   ---
 14 | #
 15 | #   reader: ...
 16 | #   something else: ...
 17 | #
 18 | # However, mixing them is an error:
 19 | #
 20 | #   default:
 21 | #       reader: ...
 22 | #   something else: ...
 23 | 
 24 | 
 25 | # default context.
 26 | #
 27 | # Provides default settings for the other contexts.
 28 | 
 29 | default:
 30 |     # The reader section contains make_reader() keyword arguments:
 31 |     reader:
 32 |         url: /path/to/db.sqlite
 33 |         feed_root: /path/to/feeds
 34 | 
 35 |         # Additionally, it's possible to specify reader plugins, as a
 36 |         #   <plugin import path>: <plugin options>
 37 |         # map; options are ignored at the moment.
 38 |         # Note that unlike other settings, plugins are merged, not replaced.
 39 |         plugins:
 40 |             reader._plugins.sqlite_releases:init:
 41 |             reader.ua_fallback:
 42 | 
 43 | 
 44 | # CLI context.
 45 | 
 46 | cli:
 47 |     # When using the CLI, we want to use some additional reader plugins.
 48 |     reader:
 49 |         plugins:
 50 |             reader.mark_as_read:
 51 |             reader.entry_dedupe:
 52 | 
 53 |     # The cli context also allows changing the CLI defaults.
 54 |     defaults:
 55 |         # Note that while the --db and --plugin CLI options could appear here,
 56 |         # doing it isn't very usful, since the CLI values (including defaults)
 57 |         # always override the corresponding config file values.
 58 | 
 59 |         # Options that can be passed multiple times take a list of values:
 60 |         # --plugin reader._plugins.enclosure_dedupe:enclosure_dedupe
 61 |         # plugin: [reader._plugins.enclosure_dedupe:enclosure_dedupe]
 62 | 
 63 |         # Subcommand defaults can be given as nested maps:
 64 | 
 65 |         # add --update
 66 |         add:
 67 |             # Flags take a boolean value:
 68 |             update: yes
 69 | 
 70 |         # update --workers 10 -vv
 71 |         update:
 72 |             workers: 10
 73 |             # Flags that can be repeated take an integer:
 74 |             verbose: 2
 75 | 
 76 |         search:
 77 |             # search update -v
 78 |             update:
 79 |                 verbose: 1
 80 | 
 81 |         # serve --port 8888
 82 |         serve:
 83 |             port: 8888
 84 | 
 85 | 
 86 | # Web application context.
 87 | #
 88 | # Used for both the serve command (`python -m reader serve`)
 89 | # and when using the WSGI application (reader._app.wsgi:app) directly.
 90 | 
 91 | app:
 92 |     # When using the web app, we want to use an additional reader plugin.
 93 |     reader:
 94 |         plugins:
 95 |             reader.enclosure_dedupe:
 96 | 
 97 |     # ... and some app plugins.
 98 |     plugins:
 99 |         reader._plugins.enclosure_tags:init:
100 |         reader._plugins.preview_feed_list:init:
101 | 


--------------------------------------------------------------------------------
/examples/custom_headers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Adding custom headers when retrieving feeds
 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Example of adding custom request headers with :attr:`.SessionFactory.request_hooks`:
 6 | 
 7 | .. code-block:: console
 8 | 
 9 |     $ python examples/custom_headers.py
10 |     updating...
11 |     server: Hello, world!
12 |     updated!
13 | 
14 | """
15 | 
16 | # fmt: off
17 | # flake8: noqa
18 | 
19 | import http.server
20 | import threading
21 | from reader import make_reader
22 | 
23 | # start a background server that logs the received header
24 | 
25 | class Handler(http.server.BaseHTTPRequestHandler):
26 |     def log_message(self, *_): pass
27 |     def do_GET(self):
28 |         print("server:", self.headers.get('my-header'))
29 |         self.send_error(304)
30 | 
31 | server = http.server.HTTPServer(('localhost', 8080), Handler)
32 | threading.Thread(target=server.handle_request).start()
33 | 
34 | # create a reader object
35 | 
36 | reader = make_reader(':memory:')
37 | reader.add_feed('http://localhost:8080')
38 | 
39 | # set up a hook that adds the header to each request
40 | 
41 | def hook(session, request, **kwargs):
42 |     request.headers.setdefault('my-header', 'Hello, world!')
43 | 
44 | reader._parser.session_factory.request_hooks.append(hook)
45 | 
46 | # updating the feed sends the modified request to the server
47 | 
48 | print("updating...")
49 | reader.update_feeds()
50 | print("updated!")
51 | 


--------------------------------------------------------------------------------
/examples/feed_slugs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Feed slugs
 3 | ~~~~~~~~~~
 4 | 
 5 | This is a recipe of what a "get feed by slug" plugin may look like
 6 | (e.g. for user-defined short URLs).
 7 | 
 8 | Usage::
 9 | 
10 |     >>> from reader import make_reader
11 |     >>> import feed_slugs
12 |     >>> reader = make_reader('db.sqlite', plugins=[feed_slugs.init_reader])
13 |     >>> reader.set_feed_slug('https://death.andgravity.com/_feed/index.xml', 'andgravity')
14 |     >>> reader.get_feed_by_slug('andgravity')
15 |     Feed(url='https://death.andgravity.com/_feed/index.xml', ...)
16 |     >>> reader.get_feed_slug(_.url)
17 |     'andgravity'
18 | 
19 | ..
20 |     Originally implemented for https://github.com/lemon24/reader/issues/358.
21 | 
22 | """
23 | 
24 | # fmt: off
25 | # flake8: noqa
26 | 
27 | def init_reader(reader):
28 |     # __get__() allows help(reader.get_feed_by_slug) to work
29 |     reader.get_feed_by_slug = get_feed_by_slug.__get__(reader)
30 |     reader.get_feed_slug = get_feed_slug.__get__(reader)
31 |     reader.set_feed_slug = set_feed_slug.__get__(reader)
32 | 
33 | def get_feed_by_slug(reader, slug):
34 |     tag = _make_tag(reader, slug)
35 |     return next(reader.get_feeds(tags=[tag], limit=1), None)
36 | 
37 | def get_feed_slug(reader, feed):
38 |     if tag := next(_get_tags(reader, feed), None):
39 |         return tag.removeprefix(_make_tag(reader, ''))
40 |     return None
41 | 
42 | def set_feed_slug(reader, feed, slug: str | None):
43 |     feed = reader.get_feed(feed)
44 |     tag = _make_tag(reader, slug)
45 | 
46 |     if not slug:
47 |         reader.delete_tag(feed, tag, missing_ok=True)
48 |         return
49 | 
50 |     reader.set_tag(feed, tag)
51 | 
52 |     # ensure only one feed has the slug; technically a race condition,
53 |     # when it happens no feed will have the tag
54 |     for other_feed in reader.get_feeds(tags=[tag]):
55 |         if feed.url != other_feed.url:
56 |             reader.delete_tag(other_feed, tag, missing_ok=True)
57 | 
58 |     # ensure feed has only one slug; technically a race condition,
59 |     # when it happens the feed will have no slug
60 |     for other_tag in _get_tags(reader, feed):
61 |         if tag != other_tag:
62 |             reader.delete_tag(feed, other_tag, missing_ok=True)
63 | 
64 | def _make_tag(reader, slug):
65 |     return reader.make_plugin_reserved_name('slug', slug)
66 | 
67 | def _get_tags(reader, resource):
68 |     prefix = _make_tag(reader, '')
69 |     # filter tags by prefix would make this faster,
70 |     # https://github.com/lemon24/reader/issues/309
71 |     return (t for t in reader.get_tag_keys(resource) if t.startswith(prefix))
72 | 
73 | if __name__ == '__main__':
74 |     from reader import make_reader
75 | 
76 |     reader = make_reader('db.sqlite', plugins=[init_reader])
77 |     url = 'https://death.andgravity.com/_feed/index.xml'
78 | 
79 |     reader.set_feed_slug(url, 'one')
80 |     print(
81 |         reader.get_feed_slug(url),
82 |         getattr(reader.get_feed_by_slug('one'), 'url', None),
83 |     )
84 | 
85 |     reader.set_feed_slug(url, 'two')
86 |     print(
87 |         reader.get_feed_slug(url),
88 |         getattr(reader.get_feed_by_slug('two'), 'url', None),
89 |     )
90 | 
91 |     reader.set_feed_slug('https://xkcd.com/atom.xml', 'two')
92 |     print(
93 |         reader.get_feed_slug(url),
94 |         getattr(reader.get_feed_by_slug('two'), 'url', None),
95 |     )
96 | 


--------------------------------------------------------------------------------
/examples/parser_only.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Parsing a feed retrieved with something other than *reader*
 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Example of using the *reader* internal API to parse a feed
 6 | retrieved asynchronously with `HTTPX <https://www.python-httpx.org/>`_:
 7 | 
 8 | .. code-block:: console
 9 | 
10 |     $ python examples/parser_only.py
11 |     death and gravity
12 |     Has your password been pwned? Or, how I almost failed to search a 37 GB text file in under 1 millisecond (in Python)
13 | 
14 | """
15 | 
16 | import asyncio
17 | import io
18 | import httpx
19 | from reader._parser import default_parser
20 | from werkzeug.http import parse_options_header
21 | 
22 | url = "https://death.andgravity.com/_feed/index.xml"
23 | meta_parser = default_parser()
24 | 
25 | 
26 | async def main():
27 |     async with httpx.AsyncClient() as client:
28 |         response = await client.get(url)
29 | 
30 |         # to select the parser, we need the MIME type of the response
31 |         content_type = response.headers.get('content-type')
32 |         if content_type:
33 |             mime_type, _ = parse_options_header(content_type)
34 |         else:
35 |             mime_type = None
36 | 
37 |         # select the parser (raises ParseError if none found)
38 |         parser, _ = meta_parser.get_parser(url, mime_type)
39 | 
40 |         # wrap the content in a readable binary file
41 |         file = io.BytesIO(response.content)
42 | 
43 |         # parse the feed; not doing parser(url, file, response.headers) directly
44 |         # because parsing is CPU-intensive and would block the event loop
45 |         feed, entries = await asyncio.to_thread(parser, url, file, response.headers)
46 | 
47 |         print(feed.title)
48 |         print(entries[0].title)
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     asyncio.run(main())
53 | 


--------------------------------------------------------------------------------
/examples/podcast.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Use *reader* to download all the episodes of a podcast,
 3 | and then each new episode as they come up.
 4 | 
 5 | Part of https://reader.readthedocs.io/en/latest/tutorial.html
 6 | 
 7 | """
 8 | 
 9 | import os
10 | import os.path
11 | import shutil
12 | 
13 | import requests
14 | 
15 | from reader import make_reader
16 | 
17 | 
18 | feed_url = "http://www.hellointernet.fm/podcast?format=rss"
19 | podcasts_dir = "podcasts"
20 | 
21 | reader = make_reader("db.sqlite")
22 | 
23 | 
24 | def add_and_update_feed():
25 |     reader.add_feed(feed_url, exist_ok=True)
26 |     reader.update_feeds()
27 | 
28 | 
29 | def download_everything():
30 |     entries = reader.get_entries(feed=feed_url, has_enclosures=True, read=False)
31 | 
32 |     for entry in entries:
33 |         print(entry.feed.title, '-', entry.title)
34 | 
35 |         for enclosure in entry.enclosures:
36 |             filename = enclosure.href.rpartition('/')[2]
37 |             print("  *", filename)
38 |             download_file(enclosure.href, os.path.join(podcasts_dir, filename))
39 | 
40 |         reader.mark_entry_as_read(entry)
41 | 
42 | 
43 | def download_file(src_url, dst_path):
44 |     part_path = dst_path + '.part'
45 |     with requests.get(src_url, stream=True) as response:
46 |         response.raise_for_status()
47 |         try:
48 |             with open(part_path, 'wb') as file:
49 |                 shutil.copyfileobj(response.raw, file)
50 |             os.rename(part_path, dst_path)
51 |         except BaseException:
52 |             try:
53 |                 os.remove(part_path)
54 |             except Exception:
55 |                 pass
56 |             raise
57 | 
58 | 
59 | add_and_update_feed()
60 | 
61 | feed = reader.get_feed(feed_url)
62 | print(f"updated {feed.title} (last changed at {feed.updated})\n")
63 | 
64 | os.makedirs(podcasts_dir, exist_ok=True)
65 | download_everything()
66 | 


--------------------------------------------------------------------------------
/examples/terminal.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A simple terminal feed reader that shows a screenful of articles
 3 | and updates every 10 minutes.
 4 | 
 5 | Run with::
 6 | 
 7 |     python examples/terminal.py db.sqlite
 8 | 
 9 | To add feeds, run::
10 | 
11 |     python -m reader --db db.sqlite add http://example.com/feed.xml
12 | 
13 | 
14 | """
15 | 
16 | import logging
17 | import os
18 | import sys
19 | import textwrap
20 | import time
21 | import itertools
22 | 
23 | from reader import make_reader
24 | 
25 | 
26 | def get_lines(reader):
27 |     size = os.get_terminal_size()
28 | 
29 |     # Only take as many entries as we have lines.
30 |     entries = reader.get_entries(limit=size.lines - 1)
31 | 
32 |     lines = (
33 |         line
34 |         for entry in entries
35 |         for line in textwrap.wrap(
36 |             f"{(entry.published or entry.updated or entry.added).date()} - "
37 |             f"{entry.feed.title} - {entry.title}",
38 |             width=size.columns,
39 |         )
40 |     )
41 |     return itertools.islice(lines, size.lines - 1)
42 | 
43 | 
44 | def print_status_line(message, seconds):
45 |     print(message, end="", flush=True)
46 |     time.sleep(seconds)
47 |     length = len(message)
48 |     print("\b" * length, " " * length, "\b" * length, sep="", end="", flush=True)
49 | 
50 | 
51 | reader = make_reader(sys.argv[1])
52 | 
53 | # Prevent update errors from showing.
54 | logging.basicConfig(level=logging.CRITICAL)
55 | 
56 | update_interval = 60 * 10
57 | last_updated = time.monotonic() - update_interval
58 | 
59 | while True:
60 |     # Clear screen; should be cross-platform.
61 |     os.system("cls || clear")
62 | 
63 |     print(*get_lines(reader), sep="\n")
64 | 
65 |     # Keep sleeping until we need to update.
66 |     while True:
67 |         now = time.monotonic()
68 |         if now - last_updated > update_interval:
69 |             break
70 |         to_sleep = update_interval - (now - last_updated)
71 |         message = f"updating in {int(to_sleep // 60) + 1} minutes ..."
72 |         print_status_line(message, 60)
73 | 
74 |     print("updating ...", end="", flush=True)
75 |     last_updated = time.monotonic()
76 |     reader.update_feeds(workers=10)
77 | 


--------------------------------------------------------------------------------
/scripts/backup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # back up a SQLite database
 4 | #
 5 | # usage:
 6 | #   ./backup.sh src dst
 7 | #   ./backup.sh src
 8 | #
 9 | # example:
10 | #   "./backup.sh /src/db.sqlite" -> ./db.sqlite.2023-01-28.gz
11 | #
12 | 
13 | set -o nounset
14 | set -o pipefail
15 | set -o errexit
16 | 
17 | if (( $# == 1 )); then
18 |     src=$1
19 |     dst=$( pwd )/$( basename "$src" ).$( date -u +%Y-%m-%d )
20 | elif (( $# == 2 )); then
21 |     src=$1
22 |     dst=$2
23 | else
24 |     exit 1
25 | fi
26 | 
27 | tmpdir=$( mktemp -d )
28 | trap 'rm -rf '"$tmpdir" EXIT
29 | 
30 | tmp=$tmpdir/$( basename "$src" )
31 | 
32 | du -sh "$src"
33 | sqlite3 "$src" "VACUUM INTO '$tmp'"
34 | du -sh "$tmp"
35 | gzip -c "$tmp" > "$dst.gz"
36 | du -sh "$dst.gz"
37 | 


--------------------------------------------------------------------------------
/scripts/generate_import_all.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import random
 3 | 
 4 | 
 5 | context = {}
 6 | exec('from reader import *', context)
 7 | context.pop('__builtins__')
 8 | 
 9 | print("# importing stuff from reader should type check")
10 | print("# force mypy to check this every time:", random.random())
11 | 
12 | for name, value in context.items():
13 |     if inspect.ismodule(value):
14 |         continue
15 |     print('from reader import', name)
16 | 


--------------------------------------------------------------------------------
/scripts/jscontrols.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import sys
 3 | 
 4 | import werkzeug
 5 | from flask import flash
 6 | from flask import Flask
 7 | from flask import jsonify
 8 | from flask import redirect
 9 | from flask import request
10 | 
11 | 
12 | root_dir = os.path.dirname(__file__)
13 | sys.path.insert(0, os.path.join(root_dir, '../src'))
14 | 
15 | from reader._app.api_thing import APIError
16 | from reader._app.api_thing import APIThing
17 | 
18 | 
19 | app = Flask(
20 |     __name__,
21 |     template_folder='../src/reader/_app/templates',
22 |     static_folder='../src/reader/_app/static',
23 | )
24 | app.secret_key = 'secret'
25 | 
26 | 
27 | @app.route('/')
28 | def root():
29 |     with open(os.path.join(root_dir, 'jscontrols.html')) as f:
30 |         template_string = f.read()
31 |     return app.jinja_env.from_string(template_string).render()
32 | 
33 | 
34 | form = APIThing(app, '/form', 'form')
35 | 
36 | 
37 | @form
38 | def simple(data):
39 |     return 'simple'
40 | 
41 | 
42 | @form
43 | def simple_next(data):
44 |     return 'simple-next: %s' % data['next']
45 | 
46 | 
47 | @form(really=True)
48 | def confirm(data):
49 |     return 'confirm'
50 | 
51 | 
52 | @form
53 | def text(data):
54 |     text = data['text']
55 |     if text.startswith('err'):
56 |         raise APIError(text, 'category')
57 |     return 'text: %s' % text
58 | 
59 | 
60 | @form(really=True)
61 | def text_confirm(data):
62 |     text = data['text']
63 |     if text.startswith('err'):
64 |         raise APIError(text, 'category')
65 |     return 'text confirm: %s' % text
66 | 


--------------------------------------------------------------------------------
/scripts/lines.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Print various Python and wc lines of code.
 4 | 
 5 | function sloc {
 6 |     coverage report \
 7 |     | grep -A9999 ^--- \
 8 |     | grep -B9999 ^--- \
 9 |     | grep -v ^-- \
10 |     | awk '{ print $1 "\t" $2 }'
11 | }
12 | 
13 | function count {
14 |     sloc | grep "$@" | cut -f2 | paste -sd+ - | bc
15 |     sloc | grep "$@" | cut -f1 | xargs wc -l | tail -n-1 | awk '{ print $1 }'
16 | }
17 | 
18 | # cache sloc output
19 | _sloc=$( sloc )
20 | function sloc {
21 |     echo "$_sloc"
22 | }
23 | 
24 | {
25 |     echo '' stmts lines
26 |     echo src $( count ^src/ )
27 |     echo core $( count -e ^src/reader/core/ -e ^src/reader/__init__.py )
28 |     echo cli $( count ^src/reader/cli )
29 |     echo app $( count ^src/reader/app/ )
30 |     echo plugins $( count ^src/reader/plugins/ )
31 |     echo tests $( count ^tests/ )
32 |     echo total $( count '.' )
33 | } \
34 | | tr ' ' '\t'
35 | 


--------------------------------------------------------------------------------
/src/reader/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | reader
  3 | ======
  4 | 
  5 | A minimal feed reader.
  6 | 
  7 | Usage
  8 | -----
  9 | 
 10 | Here is small example of using reader.
 11 | 
 12 | Create a Reader object::
 13 | 
 14 |     reader = make_reader('db.sqlite')
 15 | 
 16 | Add a feed::
 17 | 
 18 |     reader.add_feed('http://www.hellointernet.fm/podcast?format=rss')
 19 | 
 20 | Update all the feeds::
 21 | 
 22 |     reader.update_feeds()
 23 | 
 24 | Get all the entries, both read and unread::
 25 | 
 26 |     entries = list(reader.get_entries())
 27 | 
 28 | Mark the first entry as read::
 29 | 
 30 |     reader.mark_entry_as_read(entries[0])
 31 | 
 32 | Print the titles of the unread entries::
 33 | 
 34 |     for e in reader.get_entries(read=False):
 35 |         print(e.title)
 36 | 
 37 | 
 38 | """
 39 | 
 40 | __version__ = '3.19.dev0'
 41 | 
 42 | # isort: off
 43 | 
 44 | from .core import (
 45 |     Reader as Reader,
 46 |     make_reader as make_reader,
 47 | )
 48 | 
 49 | from .types import (
 50 |     Feed as Feed,
 51 |     ExceptionInfo as ExceptionInfo,
 52 |     Entry as Entry,
 53 |     Content as Content,
 54 |     Enclosure as Enclosure,
 55 |     EntrySource as EntrySource,
 56 |     EntrySearchResult as EntrySearchResult,
 57 |     HighlightedString as HighlightedString,
 58 |     FeedCounts as FeedCounts,
 59 |     EntryCounts as EntryCounts,
 60 |     EntrySearchCounts as EntrySearchCounts,
 61 |     FeedSort as FeedSort,
 62 |     EntrySort as EntrySort,
 63 |     EntrySearchSort as EntrySearchSort,
 64 |     UpdateResult as UpdateResult,
 65 |     UpdatedFeed as UpdatedFeed,
 66 |     EntryUpdateStatus as EntryUpdateStatus,
 67 | )
 68 | 
 69 | from .exceptions import (
 70 |     ReaderError as ReaderError,
 71 |     FeedError as FeedError,
 72 |     FeedExistsError as FeedExistsError,
 73 |     FeedNotFoundError as FeedNotFoundError,
 74 |     InvalidFeedURLError as InvalidFeedURLError,
 75 |     EntryError as EntryError,
 76 |     EntryExistsError as EntryExistsError,
 77 |     EntryNotFoundError as EntryNotFoundError,
 78 |     UpdateError as UpdateError,
 79 |     ParseError as ParseError,
 80 |     UpdateHookError as UpdateHookError,
 81 |     SingleUpdateHookError as SingleUpdateHookError,
 82 |     UpdateHookErrorGroup as UpdateHookErrorGroup,
 83 |     StorageError as StorageError,
 84 |     SearchError as SearchError,
 85 |     SearchNotEnabledError as SearchNotEnabledError,
 86 |     InvalidSearchQueryError as InvalidSearchQueryError,
 87 |     TagError as TagError,
 88 |     TagNotFoundError as TagNotFoundError,
 89 |     ResourceNotFoundError as ResourceNotFoundError,
 90 |     PluginError as PluginError,
 91 |     InvalidPluginError as InvalidPluginError,
 92 |     PluginInitError as PluginInitError,
 93 |     ReaderWarning as ReaderWarning,
 94 | )
 95 | 
 96 | 
 97 | # For internal use only.
 98 | 
 99 | _CONFIG_ENVVAR = 'READER_CONFIG'
100 | _DB_ENVVAR = 'READER_DB'
101 | _PLUGIN_ENVVAR = 'READER_PLUGIN'
102 | _APP_PLUGIN_ENVVAR = 'READER_APP_PLUGIN'
103 | _CLI_PLUGIN_ENVVAR = 'READER_CLI_PLUGIN'
104 | 
105 | 
106 | # Constants.
107 | 
108 | USER_AGENT = f'python-reader/{__version__} (+https://github.com/lemon24/reader)'
109 | 
110 | 
111 | # Prevent any logging output by default. If no handler is set,
112 | # the messages bubble up to the root logger and get printed on stderr.
113 | # https://docs.python.org/3/howto/logging.html#library-config
114 | import logging  # noqa: E402
115 | 
116 | logging.getLogger('reader').addHandler(logging.NullHandler())
117 | 


--------------------------------------------------------------------------------
/src/reader/__main__.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | 
 4 | CANNOT_IMPORT = """\
 5 | Error: cannot import reader._cli
 6 | 
 7 | This might be due to missing dependencies. The command-line interface is
 8 | optional, use the 'cli' extra to install its dependencies:
 9 | 
10 |     pip install reader[cli]
11 | """
12 | 
13 | try:
14 |     from reader._cli import cli
15 | 
16 |     cli(prog_name='python -m reader')
17 | except ImportError:
18 |     print(CANNOT_IMPORT, file=sys.stderr)
19 |     raise
20 | 


--------------------------------------------------------------------------------
/src/reader/_app/cli.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | import reader
 4 | from reader._cli import setup_logging
 5 | 
 6 | 
 7 | def make_add_response_headers_middleware(wsgi_app, headers):
 8 |     def wsgi_app_wrapper(environ, start_response):
 9 |         def start_response_wrapper(status, response_headers, exc_info=None):
10 |             response_headers.extend(headers)
11 |             return start_response(status, response_headers, exc_info)
12 | 
13 |         return wsgi_app(environ, start_response_wrapper)
14 | 
15 |     return wsgi_app_wrapper
16 | 
17 | 
18 | @click.command()
19 | @click.pass_obj
20 | @click.option('-h', '--host', default='localhost', help="The interface to bind to.")
21 | @click.option('-p', '--port', default=8080, type=int, help="The port to bind to.")
22 | @click.option(
23 |     '--plugin',
24 |     multiple=True,
25 |     envvar=reader._APP_PLUGIN_ENVVAR,
26 |     help="Import path to a web app plug-in. Can be passed multiple times.",
27 | )
28 | @click.option('-v', '--verbose', count=True)
29 | def serve(config, host, port, plugin, verbose):
30 |     """Start a local HTTP reader server."""
31 |     setup_logging(verbose)
32 |     from werkzeug.serving import run_simple
33 | 
34 |     from . import create_app
35 | 
36 |     if plugin:
37 |         config['app']['plugins'] = dict.fromkeys(plugin)
38 | 
39 |     app = create_app(config)
40 |     app.wsgi_app = make_add_response_headers_middleware(
41 |         app.wsgi_app,
42 |         [('Referrer-Policy', 'same-origin')],
43 |     )
44 | 
45 |     run_simple(host, port, app)
46 | 


--------------------------------------------------------------------------------
/src/reader/_app/templates/add_entry.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% import "macros.html" as macros %}
 4 | 
 5 | 
 6 | {% block page_title %}Add entry to {{ macros.feed_title(feed) }}{% endblock %}
 7 | {% block main_title %}Add entry to <b>{{ macros.feed_title(feed) }}</b>{% endblock %}
 8 | 
 9 | 
10 | {% block body %}
11 | 
12 | <div>
13 | 
14 | 
15 | <form action="{{ url_for('.form_api') }}" method="post">
16 | 
17 | <p><input type="text" name="link" placeholder="link" autocomplete="off">
18 | <p><input type="text" name="title" placeholder="title" autocomplete="off">
19 | <p><button type="submit" name="action" value="add-entry" autocomplete="off">add</button>
20 | 
21 | <input type="hidden" name='feed-url' value='{{ feed.url }}'>
22 | 
23 | {# TODO: maybe redirect to entry page? how? – we don't know the entry url yet #}
24 | <input type="hidden" name='next' value='{{ url_for('.entries', feed=feed.url) }}'>
25 | 
26 | </form>
27 | 
28 | 
29 | <ul class="controls">
30 | 
31 | {% for message in get_flashed_messages_by_prefix(
32 |     ('add-entry', feed.url),
33 | ) %}
34 | <li class="error">{{ message }}
35 | {% endfor %}
36 | 
37 | </ul>
38 | 
39 | 
40 | </div>
41 | 
42 | {% endblock %}
43 | 


--------------------------------------------------------------------------------
/src/reader/_app/templates/entry.html:
--------------------------------------------------------------------------------
  1 | {% extends "layout.html" %}
  2 | 
  3 | {% import "macros.html" as macros %}
  4 | 
  5 | {% block page_title %}Entry: {{ (entry.title or entry.link or entry.id) | trim | striptags }}{% endblock %}
  6 | {% block main_title %}Entry: <a href="{{ entry.link }}">{{ (entry.title or entry.link or entry.id) | striptags }}</a>{% endblock %}
  7 | 
  8 | 
  9 | {% block body %}
 10 | 
 11 | {% set feed = entry.feed %}
 12 | {% set content = entry.get_content() %}
 13 | 
 14 | 
 15 | <div class="entry">
 16 | 
 17 | 
 18 | <ul class="controls">
 19 | 
 20 | <li>
 21 |     {% if entry.author %} by {{ entry.author }}{% endif %}
 22 |     in <a href="{{ url_for('.entries', feed=entry.feed.url) }}">{{ entry.feed_resolved_title or feed.url }}</a>
 23 | <li>
 24 |     {%- set published = entry.published or entry.updated_not_none -%}
 25 |     <span title="{{ published }}">{{ published | humanize_naturaltime }}</span>
 26 | 
 27 | {% set next = url_for('.entry', **request.args) %}
 28 | {% set context = {'feed-url': feed.url, 'entry-id': entry.id} %}
 29 | 
 30 | {% if entry.read %}
 31 |     {{ macros.simple_button('.form_api', 'mark-as-unread', 'unread', leave_disabled=true, next=next, context=context, title=entry.read_modified or "not modified") }}
 32 | {% else %}
 33 |     {{ macros.simple_button('.form_api', 'mark-as-read', 'read', leave_disabled=true, next=next, context=context, title=entry.read_modified or "not modified") }}
 34 | {% endif %}
 35 | 
 36 | {% if not entry.important %}
 37 |     {{ macros.simple_button('.form_api', 'mark-as-important', 'important', leave_disabled=true, next=next, context=context, title=entry.important_modified or "not modified") }}
 38 | {% endif %}
 39 | {% if entry.important is not none %}
 40 |     {{ macros.simple_button('.form_api', 'clear-important', "clear " + ("important" if entry.important else "don't care"), leave_disabled=true, next=next, context=context) }}
 41 | {% endif %}
 42 | {% if entry.important is not false %}
 43 |     {{ macros.simple_button('.form_api', 'mark-as-unimportant', "don't care", leave_disabled=true, next=next, context=context, title=entry.important_modified or "not modified") }}
 44 | {% endif %}
 45 | 
 46 | {% if entry.added_by == 'user' %}
 47 |     {{ macros.confirm_button('.form_api', 'delete-entry', 'delete', leave_disabled=true, next=url_for('.entries', **request.args), context=context) }}
 48 | {% endif %}
 49 | 
 50 | <li>
 51 | <a href="{{ url_for('.metadata', feed=entry.feed.url, entry=entry.id) }}">update metadata</a>
 52 | 
 53 | {{ macros.readtime(tags) }}
 54 | 
 55 | 
 56 | {% for message in get_flashed_messages_by_prefix(
 57 |     ('mark-as-read', feed.url, entry.id),
 58 |     ('mark-as-unread', feed.url, entry.id),
 59 |     ('mark-as-important', feed.url, entry.id),
 60 |     ('clear-important', feed.url, entry.id),
 61 |     ('mark-as-unimportant', feed.url, entry.id),
 62 |     ('delete-entry', feed.url, entry.id),
 63 | ) %}
 64 | <li class="error">{{ message }}
 65 | {% endfor %}
 66 | 
 67 | </ul>
 68 | 
 69 | 
 70 | {# TODO: Also show summary. #}
 71 | {# TODO: This allows iframes to show; is it safe? #}
 72 | {# TODO: This should be styled somehow. #}
 73 | {# TODO: h1 inside article is as big as the h1 in the header. #}
 74 | {# TODO: Tables look wonky if they're too wide; hard to fix without cooperation from the html. #}
 75 | 
 76 | 
 77 | {% if content %}
 78 | <article>
 79 | {% if not content.is_html -%}
 80 | <pre class='plaintext'>{{ content.value }}</pre>
 81 | {%- else -%}
 82 | {{ content.value | safe }}
 83 | {%- endif %}
 84 | 
 85 | </article>
 86 | {% else %}
 87 | <p>no content</p>
 88 | {% endif %}
 89 | 
 90 | {{ macros.entry_enclosures(entry) }}
 91 | 
 92 | 
 93 | <p>Links:
 94 | {% for title, href in additional_links(entry) %}
 95 | <a href="{{ href }}">{{ title }}</a>&ensp;
 96 | {% endfor %}
 97 | </ul>
 98 | 
 99 | 
100 | </div>
101 | {% endblock %}
102 | 


--------------------------------------------------------------------------------
/src/reader/_app/templates/layout.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | 
 3 | <meta name="viewport" content="width=device-width" />
 4 | <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
 5 | 
 6 | <script src="{{ url_for('static', filename='controls.js') }}"></script>
 7 | <script>
 8 | 
 9 | window.onload = function () {
10 |     register_all({{ url_for('reader.form_api') | tojson | safe }});
11 | };
12 | 
13 | </script>
14 | 
15 | <title>{% block page_title %}{% endblock %}</title>
16 | 
17 | <div id="top-bar">
18 | <ul class="controls">
19 | <li>
20 | <a href="{{ url_for('reader.entries') }}">entries</a>
21 | <a href="{{ url_for('reader.feeds') }}">feeds</a>
22 | <a href="{{ url_for('reader.tags') }}">tags</a>
23 | <a href="{{ url_for('reader.metadata') }}">metadata</a>
24 | <a href="{{ url_for('v2.entries') }}">v2</a>
25 | 
26 | {{ macros.text_input_button_get(
27 |     'reader.preview', 'add feed', 'url', 'url',
28 | ) }}
29 | 
30 | 
31 | {# TODO: this is likely not needed since add-feed became a GET button #}
32 | {% for message in get_flashed_messages_by_prefix('add-feed') %}
33 | <li class="error">{{ message }}
34 | {% endfor %}
35 | </ul>
36 | 
37 | </div>
38 | 
39 | 
40 | <h1>{% block main_title %}{% endblock %}</h1>
41 | 
42 | {% block body %}{% endblock %}
43 | 
44 | 
45 | <div class='footer'>
46 | <p>
47 | page generated in about {{ g.request_time() | round(3, 'ceil') }} seconds
48 | by <a href="https://github.com/lemon24/reader">reader</a>._app
49 | {{ g.reader_version }}
50 | </p>
51 | 
52 | {% if g.reader_timer %}
53 | <details>
54 | <summary>
55 | spent {{ g.reader_timer.total('Reader.') | round(3, 'ceil') }} seconds in reader calls
56 | </summary>
57 | {{ g.reader_timer.format_stats(tablefmt='html') | safe }}
58 | </details>
59 | {% endif %}
60 | 
61 | </div>
62 | 
63 | 
64 | {% if config.DEBUG  %}
65 | <p style="
66 | color: white;
67 | background: red;
68 | padding: .5rem 1rem;
69 | margin: 0;
70 | position: fixed;
71 | bottom: 0;
72 | right: 0;
73 | ">
74 | 
75 | <span title='{{ config.READER_CONFIG.data | toyaml | escape }}'>
76 |     {{ config.READER_CONFIG.merged('app').reader.url }}
77 | </span>
78 | 
79 | {% set maxrss = debug_maxrss_mib() %}
80 | <span title="maxrss = {{ maxrss | round(3) }} MiB">
81 | rss={{ maxrss | round(1) }}
82 | </span>
83 | 
84 | </p>
85 | {% endif %}
86 | 


--------------------------------------------------------------------------------
/src/reader/_app/templates/metadata.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% import "macros.html" as macros %}
 4 | 
 5 | 
 6 | {% macro make_title() %}
 7 |     {% if feed %}
 8 |         {% if not entry %}
 9 |             Metadata for
10 |             <b><a href="{{ url_for('.entries', feed=feed.url) }}"
11 |                 title="{{ macros.feed_title_secondary(feed) }}">
12 |             {{ macros.feed_title(feed) }}</a></b>
13 |         {% else %}
14 |             Metadata for
15 |             <b><a href="{{ url_for('.entry', feed=entry.feed.url, entry=entry.id) }}">
16 |             {{ entry.title or "untitled" }}</a></b>
17 |         {% endif %}
18 |     {% else %}
19 |         Global metadata
20 |     {% endif %}
21 | {% endmacro %}
22 | 
23 | {% block page_title %}{{ make_title() | striptags }}{% endblock %}
24 | {% block main_title %}{{ make_title() }}{% endblock %}
25 | 
26 | 
27 | {% block body %}
28 | 
29 | 
30 | <div>
31 | <form action="{{ url_for('.form_api') }}" method="post">
32 | <input type="text" name="key" placeholder="key" autocomplete="off">
33 | <button type="submit" name="action" value="add-metadata" autocomplete="off">add</button>
34 | {% if feed %}
35 | <input type="hidden" name='feed-url' value='{{ feed.url }}'>
36 | {% if entry %}
37 | <input type="hidden" name='entry-id' value='{{ entry.id }}'>
38 | {% endif %}
39 | {% endif %}
40 | <input type="hidden" name='next' value='{{ url_for('.metadata', **request.args) }}'>
41 | </form>
42 | 
43 | 
44 | <ul class="controls">
45 | 
46 | {% set resource_id = entry.resource_id or feed.resource_id or () %}
47 | 
48 | {% for message in get_flashed_messages_by_prefix(
49 |     ('add-metadata',) + resource_id,
50 | ) %}
51 | <li class="error">{{ message }}
52 | {% endfor %}
53 | 
54 | </ul>
55 | 
56 | </div>
57 | 
58 | 
59 | {% for key, value in metadata | sort %}
60 | <div id="metadata-{{ loop.index }}" class="metadata">
61 | 
62 | <h2>{{ key or '""' }}</h2>
63 | 
64 | <ul class="controls">
65 | 
66 | {% for message in get_flashed_messages_by_prefix(
67 |     ('update-metadata',) + resource_id + (key,),
68 | ) %}
69 | <li class="error">{{ message }}
70 | {% endfor %}
71 | 
72 | </ul>
73 | 
74 | <form action="{{ url_for('.form_api') }}" method="post">
75 | <textarea name="value" rows="12" cols="60">{{ to_pretty_json(value) }}</textarea>
76 | <p>
77 | <button type="submit" name="action" value="update-metadata" autocomplete="off">update</button>
78 | <button type="submit" name="action" value="delete-metadata" autocomplete="off">delete</button>
79 | </p>
80 | <input type="hidden" name='key' value='{{ key }}'>
81 | {% if feed %}
82 | <input type="hidden" name='feed-url' value='{{ feed.url }}'>
83 | {% if entry %}
84 | <input type="hidden" name='entry-id' value='{{ entry.id }}'>
85 | {% endif %}
86 | {% endif %}
87 | <input type="hidden" name='next' value='{{ url_for('.metadata', **request.args) }}#metadata-{{ loop.index }}'>
88 | </form>
89 | 
90 | </div>
91 | 
92 | {% else %}
93 | <p>no metadata for this resource</p>
94 | {% endfor %}
95 | 
96 | {% endblock %}
97 | 


--------------------------------------------------------------------------------
/src/reader/_app/templates/tags.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% import "macros.html" as macros %}
 4 | 
 5 | 
 6 | {% block page_title %}Tags{% endblock %}
 7 | {% block main_title %}Tags{% endblock %}
 8 | 
 9 | 
10 | {% block body %}
11 | 
12 | <div id="update-tags">
13 | <ul class="controls">
14 | 
15 | {{ macros.toggle_link('counts', [
16 |     ('yes', 'counts'),
17 |     ('no', 'no counts'),
18 | ], 'no', '.tags') }}
19 | 
20 | {% if error %}
21 | <li class="error"><b>error</b>: {{ error }}
22 | {% endif %}
23 | 
24 | </ul>
25 | </div>
26 | 
27 | 
28 | <dl class="tag-list">
29 | 
30 | {% for tag, feed_counts, entry_counts in tags %}
31 | <dt id="tag-{{ loop.index }}">
32 |     {% if tag == none %} <span class="tag-text">all</span>
33 |     {% elif tag == true %} <span class="tag-text">any tags</span>
34 |     {% elif tag == false %} <span class="tag-text">no tags</span>
35 |     {% else %} <span class="tag">{{ tag }}</span>
36 |     {% endif %}
37 | </dt>
38 | 
39 | <dd>
40 |     {% set url_kwargs = {'tags': [tag] | tojson } if tag is not none else {} %}
41 | 
42 |     {% if feed_counts %}
43 |         <abbr title='({{ feed_counts.broken }} broken, {{ feed_counts.total - feed_counts.updates_enabled }} disabled)'>{{ feed_counts.total }}
44 |     {% endif %}
45 |     <a href="{{ url_for('.feeds', **url_kwargs) }}">feeds</a>
46 |     {%- if feed_counts %}</abbr>{% endif -%}
47 |     ,
48 |     {% if entry_counts %}
49 |         {{ macros.entry_counts(entry_counts, url_for('.entries', **url_kwargs)) }}
50 |     {% else %}
51 |         <a href="{{ url_for('.entries', **url_kwargs) }}">entries</a>
52 |     {% endif %}
53 |     {% if entry_counts %}</abbr>{% endif %}
54 | </dd>
55 | 
56 | {% else %}
57 | 
58 | {% if not error %}
59 | <p>no tags</p>
60 | {% endif %}
61 | 
62 | {% endfor %}
63 | </dl>
64 | 
65 | 
66 | 
67 | {% endblock %}
68 | 


--------------------------------------------------------------------------------
/src/reader/_app/v2/__init__.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | from functools import partial
  3 | 
  4 | from flask import abort
  5 | from flask import Blueprint
  6 | from flask import current_app
  7 | from flask import redirect
  8 | from flask import request
  9 | from flask import url_for
 10 | from jinja2_fragments.flask import render_block
 11 | 
 12 | from reader import InvalidSearchQueryError
 13 | 
 14 | from .. import EntryProxy
 15 | from .. import get_reader
 16 | from .. import stream_template
 17 | from .forms import EntryFilter
 18 | from .forms import SearchEntryFilter
 19 | 
 20 | 
 21 | blueprint = Blueprint(
 22 |     'v2', __name__, template_folder='templates', static_folder='static'
 23 | )
 24 | 
 25 | 
 26 | @blueprint.route('/')
 27 | def entries():
 28 |     reader = get_reader()
 29 | 
 30 |     # TODO: search improvements
 31 |     # TODO: paqgination
 32 |     # TODO: read time
 33 | 
 34 |     if request.args.get('q', '').strip():
 35 |         form = SearchEntryFilter(request.args)
 36 |     else:
 37 |         form = EntryFilter(request.args)
 38 | 
 39 |     form_args = form.args
 40 |     if q := form_args.pop('Q', ''):
 41 |         form_args['q'] = q
 42 |         return redirect(url_for('.entries', **form_args))
 43 |     if form_args != request.args.to_dict():
 44 |         return redirect(url_for('.entries', **form_args))
 45 | 
 46 |     feed = None
 47 |     if form.feed.data:
 48 |         feed = reader.get_feed(form.feed.data, None)
 49 |         if not feed:
 50 |             abort(404)
 51 | 
 52 |     kwargs = dict(form.data)
 53 |     if query := kwargs.pop('search', None):
 54 | 
 55 |         def get_entries(**kwargs):
 56 |             for sr in reader.search_entries(query, **kwargs):
 57 |                 yield EntryProxy(sr, reader.get_entry(sr))
 58 | 
 59 |     else:
 60 |         get_entries = reader.get_entries
 61 | 
 62 |     entries = []
 63 |     if form.validate():
 64 |         try:
 65 |             entries = eager_iterator(get_entries(**kwargs, limit=64))
 66 |         except StopIteration:
 67 |             pass
 68 |         except InvalidSearchQueryError as e:
 69 |             form.search.errors.append(f"invalid query: {e}")
 70 | 
 71 |     return stream_template(
 72 |         'v2/entries.html',
 73 |         form=form,
 74 |         entries=entries,
 75 |         feed=feed,
 76 |     )
 77 | 
 78 | 
 79 | def eager_iterator(it):
 80 |     it = iter(it)
 81 |     try:
 82 |         return itertools.chain([next(it)], it)
 83 |     except StopIteration:
 84 |         return it
 85 | 
 86 | 
 87 | @blueprint.route('/mark-as', methods=['POST'])
 88 | def mark_as():
 89 |     reader = get_reader()
 90 | 
 91 |     entry = request.form['feed-url'], request.form['entry-id']
 92 | 
 93 |     if 'read' in request.form:
 94 |         match request.form['read']:
 95 |             case 'true':
 96 |                 reader.set_entry_read(entry, True)
 97 |             case 'false':
 98 |                 reader.set_entry_read(entry, False)
 99 |             case _:
100 |                 abort(422)
101 | 
102 |     if 'important' in request.form:
103 |         match request.form['important']:
104 |             case 'true':
105 |                 reader.set_entry_important(entry, True)
106 |             case 'false':
107 |                 reader.set_entry_important(entry, False)
108 |             case 'none':
109 |                 reader.set_entry_important(entry, None)
110 |             case _:
111 |                 abort(422)
112 | 
113 |     if request.headers.get('hx-request') == 'true':
114 |         return render_block(
115 |             'v2/entries.html',
116 |             'entry_form',
117 |             entry=reader.get_entry(entry),
118 |             next=request.form['next'],
119 |             # equivalent to {% import "v2/macros.html" as macros %}
120 |             macros=current_app.jinja_env.get_template('v2/macros.html').module,
121 |         )
122 | 
123 |     return redirect(request.form['next'], code=303)
124 | 


--------------------------------------------------------------------------------
/src/reader/_app/v2/static/style.css:
--------------------------------------------------------------------------------
 1 | 
 2 | .navbar {
 3 |   --bs-navbar-padding-y: 0.25rem;
 4 |   --bs-navbar-toggler-padding-y: 0.25rem;
 5 |   --bs-navbar-toggler-padding-x: 0.25rem;
 6 |   --bs-navbar-toggler-font-size: 1rem;
 7 |   --bs-navbar-toggler-border-color: rgba(0, 0, 0, 0);
 8 | }
 9 | 
10 | .nav.controls {
11 |   --bs-nav-link-padding-x: 0;
12 |   --bs-nav-link-padding-y: 0;
13 |   gap: 1rem;
14 | }
15 | .nav.controls .nav-link.active {
16 |   color: var(--bs-navbar-active-color);
17 | }
18 | 
19 | .htmx-indicator {
20 |   display: none;
21 | }
22 | .htmx-request .htmx-indicator, .htmx-request.htmx-indicator {
23 |   display: inline-block;
24 | }
25 | .htmx-request .label, .htmx-request.label {
26 |   display: none;
27 | }
28 | 


--------------------------------------------------------------------------------
/src/reader/_app/v2/static/theme.js:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Color mode toggler for Bootstrap's docs (https://getbootstrap.com/)
 3 |  * Copyright 2011-2024 The Bootstrap Authors
 4 |  * Licensed under the Creative Commons Attribution 3.0 Unported License.
 5 |  */
 6 | 
 7 | /*
 8 |  * Modified to use the Bootstrap Icons font, instead of SVG sprites.
 9 |  */
10 | 
11 | (() => {
12 |   'use strict'
13 | 
14 |   const getStoredTheme = () => localStorage.getItem('theme')
15 |   const setStoredTheme = theme => localStorage.setItem('theme', theme)
16 | 
17 |   const getPreferredTheme = () => {
18 |     const storedTheme = getStoredTheme()
19 |     if (storedTheme) {
20 |       return storedTheme
21 |     }
22 | 
23 |     return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light'
24 |   }
25 | 
26 |   const setTheme = theme => {
27 |     if (theme === 'auto') {
28 |       document.documentElement.setAttribute('data-bs-theme', (window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light'))
29 |     } else {
30 |       document.documentElement.setAttribute('data-bs-theme', theme)
31 |     }
32 |   }
33 | 
34 |   const getIconCls = btn => {
35 |     return btn.querySelector('.bi').classList.values().find(x => x.startsWith('bi-'))
36 |   }
37 | 
38 |   setTheme(getPreferredTheme())
39 | 
40 |   const showActiveTheme = (theme, focus = false) => {
41 |     const themeSwitcher = document.querySelector('#theme')
42 | 
43 |     if (!themeSwitcher) {
44 |       return
45 |     }
46 | 
47 |     const themeSwitcherText = document.querySelector('#theme-text')
48 |     const activeThemeIcon = document.querySelector('.theme-icon-active')
49 |     const btnToActive = document.querySelector(`[data-bs-theme-value="${theme}"]`)
50 |     const clsOfActiveBtn = btnToActive.querySelector('.bi').classList.values().find(x => x.startsWith('bi-'))
51 | 
52 |     document.querySelectorAll('[data-bs-theme-value]').forEach(element => {
53 |       element.classList.remove('active')
54 |       element.setAttribute('aria-pressed', 'false')
55 |     })
56 | 
57 |     btnToActive.classList.add('active')
58 |     btnToActive.setAttribute('aria-pressed', 'true')
59 |     activeThemeIcon.classList.remove(
60 |       activeThemeIcon.classList.values().find(x => x.startsWith('bi-'))
61 |     )
62 |     activeThemeIcon.classList.add(clsOfActiveBtn)
63 |     const themeSwitcherLabel = `${themeSwitcherText.textContent} (${btnToActive.dataset.bsThemeValue})`
64 |     themeSwitcher.setAttribute('aria-label', themeSwitcherLabel)
65 | 
66 |     if (focus) {
67 |       themeSwitcher.focus()
68 |     }
69 |   }
70 | 
71 |   window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', () => {
72 |     const storedTheme = getStoredTheme()
73 |     if (storedTheme !== 'light' && storedTheme !== 'dark') {
74 |       setTheme(getPreferredTheme())
75 |     }
76 |   })
77 | 
78 |   window.addEventListener('DOMContentLoaded', () => {
79 |     showActiveTheme(getPreferredTheme())
80 | 
81 |     document.querySelectorAll('[data-bs-theme-value]')
82 |     .forEach(toggle => {
83 |       toggle.addEventListener('click', () => {
84 |         const theme = toggle.getAttribute('data-bs-theme-value')
85 |         setStoredTheme(theme)
86 |         setTheme(theme)
87 |         showActiveTheme(theme, true)
88 |       })
89 |     })
90 |   })
91 | })()
92 | 


--------------------------------------------------------------------------------
/src/reader/_app/v2/templates/v2/macros.html:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | {%- macro input(field, icon=none, class=none) %}
 4 | <div class="row mb-3{% if class %} {{ class }}{% endif %}">
 5 |   <div class="col-md-12 input-group{% if field.errors %} has-validation{% endif %}">
 6 |     {% if icon -%}
 7 |     <span class="input-group-text"><i class="bi bi-{{icon}}"></i></span>
 8 |     {%- endif %}
 9 |     {{ field(
10 |       class="form-control" + (' is-invalid' if field.errors else ''),
11 |       placeholder=field.label.text) }}
12 |     {%- if field.errors %}
13 |     <div class="invalid-feedback">
14 |       {%- for error in field.errors %}
15 |         {{ error }}
16 |       {%- endfor %}
17 |     </div>
18 |     {%- endif %}
19 |   </div>
20 | </div>
21 | {%- endmacro %}
22 | 
23 | 
24 | {%- macro radio(field) %}
25 | <fieldset class="row mb-1">
26 |   <legend class="col-form-label col-sm-2 pt-0">{{ field.label.text }}</legend>
27 |   <div class="col-sm-10">
28 |     {%- for option in field %}
29 |     <div class="form-check form-check-inline">
30 |       {{ option(class="form-check-input") }}
31 |       {{ option.label(class="form-check-label") }}
32 |     </div>
33 |     {%- endfor %}
34 |   </div>
35 | </fieldset>
36 | {%- endmacro %}
37 | 
38 | 
39 | {%- macro bs_file_icon(mimetype) -%}
40 | {%- set type = (mimetype or '').partition('/')[0] -%}
41 | {%- if type == 'audio' -%} file-earmark-music
42 | {%- elif type == 'image' -%} file-earmark-image
43 | {%- elif type == 'video' -%} file-earmark-play
44 | {%- elif type == 'text' -%} file-earmark-text
45 | {%- else -%} file-earmark
46 | {%- endif -%}
47 | {%- endmacro -%}
48 | 


--------------------------------------------------------------------------------
/src/reader/_app/wsgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | To run a local development server:
 4 | 
 5 |     FLASK_DEBUG=1 FLASK_TRAP_BAD_REQUEST_ERRORS=1 \
 6 |     FLASK_APP=src/reader/_app/wsgi.py \
 7 |     READER_CONFIG=examples/config.yaml READER_DB=db.sqlite \
 8 |     flask run -h 0.0.0.0 -p 8000
 9 | 
10 | """
11 | 
12 | import os
13 | 
14 | import yaml
15 | 
16 | import reader._app
17 | import reader._config
18 | 
19 | 
20 | # TODO: the other envvars except _CONFIG_ENVVAR are for compatibility only
21 | 
22 | if reader._CONFIG_ENVVAR in os.environ:
23 |     with open(os.environ[reader._CONFIG_ENVVAR]) as file:
24 |         config = reader._config.make_reader_config(yaml.safe_load(file))
25 | else:
26 |     config = reader._config.make_reader_config({})
27 | 
28 | if reader._DB_ENVVAR in os.environ:
29 |     config.all['reader']['url'] = os.environ[reader._DB_ENVVAR]
30 | if reader._PLUGIN_ENVVAR in os.environ:
31 |     config.all['reader']['plugins'] = dict.fromkeys(
32 |         os.environ[reader._PLUGIN_ENVVAR].split()
33 |     )
34 | if reader._APP_PLUGIN_ENVVAR in os.environ:
35 |     config.data['app']['plugins'] = dict.fromkeys(
36 |         os.environ[reader._APP_PLUGIN_ENVVAR].split()
37 |     )
38 | 
39 | app = reader._app.create_app(config)
40 | app.config['TRAP_BAD_REQUEST_ERRORS'] = bool(
41 |     os.environ.get('FLASK_TRAP_BAD_REQUEST_ERRORS', '')
42 | )
43 | 


--------------------------------------------------------------------------------
/src/reader/_hash_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generate stable hashes for Python data objects.
 3 | Contains no business logic.
 4 | 
 5 | The hashes should be stable across interpreter implementations and versions.
 6 | 
 7 | Supports dataclass instances, datetimes, and JSON-serializable objects.
 8 | 
 9 | Empty dataclass fields are ignored, to allow adding new fields without
10 | the hash changing. Empty means one of: None, '', (), [], or {}.
11 | 
12 | The dataclass type is ignored: two instances of different types
13 | will have the same hash if they have the same attribute/value pairs.
14 | 
15 | Design choices explained in https://death.andgravity.com/stable-hashing
16 | 
17 | Implemented for https://github.com/lemon24/reader/issues/179
18 | 
19 | """
20 | 
21 | from __future__ import annotations
22 | 
23 | import dataclasses
24 | import datetime
25 | import hashlib
26 | import json
27 | from collections.abc import Collection
28 | from typing import Any
29 | 
30 | 
31 | # The first byte of the hash contains its version,
32 | # to allow upgrading the implementation without changing existing hashes.
33 | # (In practice, it's likely we'll just let the hash change and update
34 | # the affected objects again; nevertheless, it's good to have the option.)
35 | #
36 | # A previous version recommended using a check_hash(thing, hash) -> bool
37 | # function instead of direct equality checking; it was removed because
38 | # it did not allow objects to cache the hash.
39 | 
40 | _VERSION = 0
41 | _EXCLUDE = '_hash_exclude_'
42 | 
43 | 
44 | def get_hash(thing: object) -> bytes:
45 |     prefix = _VERSION.to_bytes(1, 'big')
46 |     digest = hashlib.md5(_json_dumps(thing).encode('utf-8')).digest()
47 |     return prefix + digest[:-1]
48 | 
49 | 
50 | def _json_dumps(thing: object) -> str:
51 |     return json.dumps(
52 |         thing,
53 |         default=_json_default,
54 |         # force formatting-related options to known values
55 |         ensure_ascii=False,
56 |         sort_keys=True,
57 |         indent=None,
58 |         separators=(',', ':'),
59 |     )
60 | 
61 | 
62 | def _json_default(thing: object) -> Any:
63 |     try:
64 |         return _dataclass_dict(thing)
65 |     except TypeError:
66 |         pass
67 |     if isinstance(thing, datetime.datetime):
68 |         return thing.isoformat(timespec='microseconds')
69 |     raise TypeError(f"Object of type {type(thing).__name__} is not JSON serializable")
70 | 
71 | 
72 | def _dataclass_dict(thing: object) -> dict[str, Any]:
73 |     # we could have used dataclasses.asdict()
74 |     # with a dict_factory that drops empty values,
75 |     # but asdict() is recursive and we need to intercept and check
76 |     # the _hash_exclude_ of nested dataclasses;
77 |     # this way, json.dumps() does the recursion instead of asdict()
78 | 
79 |     # raises TypeError for non-dataclasses
80 |     fields = dataclasses.fields(thing)  # type: ignore[arg-type]
81 |     # ... but doesn't for dataclass *types*
82 |     if isinstance(thing, type):
83 |         raise TypeError("got type, expected instance")
84 | 
85 |     exclude = getattr(thing, _EXCLUDE, ())
86 | 
87 |     rv = {}
88 |     for field in fields:
89 |         if field.name in exclude:
90 |             continue
91 | 
92 |         value = getattr(thing, field.name)
93 |         if value is None or not value and isinstance(value, Collection):
94 |             continue
95 | 
96 |         rv[field.name] = value
97 | 
98 |     return rv
99 | 


--------------------------------------------------------------------------------
/src/reader/_parser/_http_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | HTTP utilities. Contains no business logic.
 3 | 
 4 | """
 5 | 
 6 | from collections.abc import Iterable
 7 | 
 8 | import werkzeug.http
 9 | 
10 | 
11 | parse_options_header = werkzeug.http.parse_options_header
12 | parse_accept_header = werkzeug.http.parse_accept_header
13 | parse_date = werkzeug.http.parse_date
14 | 
15 | 
16 | def unparse_accept_header(values: Iterable[tuple[str, float]]) -> str:
17 |     return werkzeug.datastructures.MIMEAccept(values).to_header()
18 | 


--------------------------------------------------------------------------------
/src/reader/_parser/file.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import pathlib
 4 | from collections.abc import Iterator
 5 | from contextlib import contextmanager
 6 | from dataclasses import dataclass
 7 | from typing import Any
 8 | from typing import IO
 9 | 
10 | from ..exceptions import ParseError
11 | from . import wrap_exceptions
12 | from ._url_utils import extract_path
13 | from ._url_utils import resolve_root
14 | 
15 | 
16 | @dataclass(frozen=True)
17 | class FileRetriever:
18 |     """Bare path and file:// URI parser.
19 | 
20 |     Allows restricting file-system access to a single directory;
21 |     see :func:`~reader.make_reader` for details.
22 | 
23 |     """
24 | 
25 |     feed_root: str
26 | 
27 |     def __post_init__(self) -> None:
28 |         # give feed_root checks a chance to fail early
29 |         self._normalize_url('known-good-feed-url')
30 | 
31 |     @contextmanager
32 |     def __call__(self, url: str, *args: Any, **kwargs: Any) -> Iterator[IO[bytes]]:
33 |         try:
34 |             normalized_url = self._normalize_url(url)
35 |         except ValueError as e:
36 |             raise ParseError(url, message=str(e)) from None
37 | 
38 |         with wrap_exceptions(url, "while reading feed"):
39 |             with open(normalized_url, 'rb') as file:
40 |                 yield file
41 | 
42 |     def validate_url(self, url: str) -> None:
43 |         self._normalize_url(url)
44 | 
45 |     def _normalize_url(self, url: str) -> str:
46 |         path = extract_path(url)
47 |         if self.feed_root:
48 |             path = resolve_root(self.feed_root, path)
49 |             if pathlib.PurePath(path).is_reserved():
50 |                 raise ValueError("path must not be reserved")
51 |         return path
52 | 


--------------------------------------------------------------------------------
/src/reader/_plugins/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Plug-in infrastructure. Not stable.
 3 | 
 4 | Also package containing **unstable** plugins shipped with reader.
 5 | 
 6 | Note that while the plugin entry points (import names) are relatively stable,
 7 | the contents of the actual plugins is not.
 8 | 
 9 | """
10 | 
11 | import functools
12 | from contextlib import contextmanager
13 | from pkgutil import resolve_name
14 | 
15 | 
16 | class LoaderError(Exception):
17 |     pass
18 | 
19 | 
20 | def raise_exception(message, cause):
21 |     raise LoaderError(message) from cause
22 | 
23 | 
24 | class Loader:
25 |     """Plugin loader.
26 | 
27 |     Allows customizing plugin import/initialization failure behavior.
28 | 
29 |     The load(name, wrap=True) allows any plugin initialization errors
30 |     to raise a single exception type,
31 |     since make_reader(plugins=...) just lets the exception propagate.
32 | 
33 |     """
34 | 
35 |     def load(self, name, *, wrap=False):
36 |         try:
37 |             plugin = resolve_name(name)
38 |         except (ImportError, AttributeError, ValueError) as e:
39 |             self.handle_import_error(f"could not import plugin {name}", e)
40 |             return None
41 | 
42 |         if wrap:
43 |             plugin = self._wrap_init(name)(plugin)
44 | 
45 |         return plugin
46 | 
47 |     @contextmanager
48 |     def _wrap_init(self, name):
49 |         try:
50 |             yield
51 |         except Exception as e:
52 |             self.handle_init_error(f"while initializing plugin {name}", e)
53 | 
54 |     def init(self, target, names):
55 |         for name in names:
56 |             plugin = self.load(name)
57 | 
58 |             if not plugin:
59 |                 continue
60 | 
61 |             with self._wrap_init(name):
62 |                 plugin(target)
63 | 
64 |     handle_import_error = staticmethod(raise_exception)
65 |     handle_init_error = staticmethod(raise_exception)
66 | 


--------------------------------------------------------------------------------
/src/reader/_plugins/share.py:
--------------------------------------------------------------------------------
 1 | """
 2 | share
 3 | ~~~~~
 4 | 
 5 | Add social sharing links at the end of the entry page.
 6 | 
 7 | To load::
 8 | 
 9 |     READER_APP_PLUGIN='reader._plugins.share:init' \\
10 |     python -m reader serve
11 | 
12 | """
13 | 
14 | from urllib.parse import quote
15 | from urllib.parse import urlparse
16 | 
17 | 
18 | TEMPLATES = {
19 |     'Twitter': "https://twitter.com/share?text={title}&url={url}",
20 |     'HN': "https://news.ycombinator.com/submitlink?u={url}&t={title}",
21 |     'Reddit': "https://www.reddit.com/submit?url={url}&title={title}",
22 | }
23 | 
24 | 
25 | def percent_encode(s, encoding="ascii"):
26 |     return ''.join([f'%{b:0>2x}' for b in s.encode(encoding)])
27 | 
28 | 
29 | def share(entry):
30 |     if not entry.link:
31 |         return
32 |     link = quote(entry.link)
33 |     title = quote(entry.title or '')
34 | 
35 |     for name, template in TEMPLATES.items():
36 |         url = template.format(url=link, title=title)
37 | 
38 |         # prevent ad blockers from messing with these
39 |         url = urlparse(url)
40 |         url = url._replace(
41 |             netloc=percent_encode(url.netloc),
42 |             path='/'.join(
43 |                 percent_encode(c) if 'share' in c.lower() else c
44 |                 for c in url.path.split('/')
45 |             ),
46 |         )
47 |         url = url.geturl()
48 | 
49 |         yield name, url
50 | 
51 | 
52 | def init(app):
53 |     app.reader_additional_links.append(share)
54 | 


--------------------------------------------------------------------------------
/src/reader/_plugins/sqlite_releases.py:
--------------------------------------------------------------------------------
  1 | """
  2 | sqlite_releases
  3 | ~~~~~~~~~~~~~~~
  4 | 
  5 | Create a feed out of the SQLite release history pages at:
  6 | 
  7 | * https://www.sqlite.org/changes.html
  8 | * https://www.sqlite.org/chronology.html
  9 | 
 10 | Also serves as an example of how to write custom parsers.
 11 | 
 12 | This plugin needs additional dependencies, use the ``unstable-plugins`` extra
 13 | to install them:
 14 | 
 15 | .. code-block:: bash
 16 | 
 17 |     pip install reader[unstable-plugins]
 18 | 
 19 | To load::
 20 | 
 21 |     READER_PLUGIN='reader._plugins.sqlite_releases:init' \\
 22 |     python -m reader ...
 23 | 
 24 | """
 25 | 
 26 | import warnings
 27 | from datetime import datetime
 28 | from datetime import timezone
 29 | from urllib.parse import urlparse
 30 | from urllib.parse import urlunparse
 31 | 
 32 | import bs4
 33 | 
 34 | from reader._parser import wrap_exceptions
 35 | from reader._types import EntryData
 36 | from reader._types import FeedData
 37 | 
 38 | 
 39 | warnings.filterwarnings(
 40 |     'ignore',
 41 |     message='No parser was explicitly specified',
 42 |     module='reader._plugins.sqlite_releases',
 43 | )
 44 | 
 45 | 
 46 | FULL_URL = 'https://www.sqlite.org/changes.html'
 47 | URLS = [FULL_URL, 'https://www.sqlite.org/chronology.html']
 48 | 
 49 | 
 50 | def extract_text(soup):
 51 |     for h3 in soup.select('body h3'):
 52 |         a_name = None
 53 |         for element, _ in zip(h3.previous_siblings, range(3), strict=False):
 54 |             if element.name == 'h3':
 55 |                 break
 56 |             if element.name == 'a' and 'name' in element.attrs:
 57 |                 a_name = element
 58 |                 break
 59 | 
 60 |         content = []
 61 |         last_a_name_index = None
 62 |         for i, element in enumerate(h3.next_siblings):
 63 |             if element.name == 'h3':
 64 |                 break
 65 |             if element.name == 'a' and 'name' in element.attrs:
 66 |                 last_a_name_index = i
 67 |             content.append(element)
 68 |         if last_a_name_index and len(content) - last_a_name_index <= 3:
 69 |             content = content[:last_a_name_index]
 70 | 
 71 |         yield h3.text, a_name['name'] if a_name else None, ''.join(map(str, content))
 72 | 
 73 | 
 74 | def make_entries(feed_url, url, soup):
 75 |     for title, fragment, content in extract_text(soup):
 76 |         try:
 77 |             updated = datetime.strptime(title.split()[0], '%Y-%m-%d').replace(
 78 |                 tzinfo=timezone.utc
 79 |             )
 80 |         except (ValueError, IndexError):
 81 |             continue
 82 | 
 83 |         link = urlunparse(urlparse(url)._replace(fragment=fragment))
 84 | 
 85 |         yield EntryData(
 86 |             feed_url=feed_url,
 87 |             id=title,
 88 |             updated=updated,
 89 |             title=title,
 90 |             link=link,
 91 |             summary=content,
 92 |         )
 93 | 
 94 | 
 95 | def make_feed(feed_url, url, soup):
 96 |     return FeedData(url=feed_url, title=soup.title and soup.title.text, link=url)
 97 | 
 98 | 
 99 | def parse(url, file, headers):
100 |     with wrap_exceptions(url, "while reading feed"):
101 |         soup = bs4.BeautifulSoup(file)
102 |     with wrap_exceptions(url, "while parsing page"):
103 |         feed = make_feed(url, FULL_URL, soup)
104 |         entries = list(make_entries(url, FULL_URL, soup))
105 |         feed = feed._replace(updated=max(e.updated for e in entries))
106 |     return feed, entries
107 | 
108 | 
109 | def init(reader):
110 |     for url in URLS:
111 |         reader._parser.mount_parser_by_url(url, parse)
112 | 


--------------------------------------------------------------------------------
/src/reader/_plugins/templates/preview_feed_list.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% import "macros.html" as macros %}
 4 | 
 5 | 
 6 | {% macro make_title() %}
 7 |     Feeds for {{ url }}
 8 | {% endmacro %}
 9 | 
10 | 
11 | {% block page_title %}{{ make_title() | striptags }}{% endblock %}
12 | {% block main_title %}{{ make_title() }}{% endblock %}
13 | 
14 | 
15 | {% block body %}
16 | 
17 | 
18 | {% if errors %}
19 | <ul class="controls">
20 | {% for message in errors %}
21 | <li class="error">error: {{ message }}
22 | {% endfor %}
23 | </ul>
24 | 
25 | 
26 | {# style similar to macros.entry_enclosures #}
27 | 
28 | {% elif alternates %}
29 | <ul class="preview-feed-list">
30 | 
31 | {% for alternate in alternates %}
32 | <li>
33 | <a href="{{ url_for('reader.preview', url=alternate.href) }}">
34 | {{ alternate.title or alternate.href }}</a>
35 | <small>
36 | {% if alternate.type %}{{ alternate.type }}{% endif %}
37 | </small>
38 | </li>
39 | 
40 | {% endfor %}
41 | </ul>
42 | {% else %}
43 | <p>no feeds for this page
44 | 
45 | {% endif %}
46 | 
47 | {% endblock %}
48 | 


--------------------------------------------------------------------------------
/src/reader/_storage/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Any
 4 | 
 5 | from .._types import ChangeTrackerType
 6 | from .._types import SearchType
 7 | from ._base import StorageBase
 8 | from ._changes import Changes
 9 | from ._entries import EntriesMixin
10 | from ._feeds import FeedsMixin
11 | from ._tags import TagsMixin
12 | 
13 | 
14 | # Row value support was added in 3.15.
15 | # pragma_*() tabled-valued functions were added in 3.16.
16 | # last_insert_rowid() support for FTS5 was added in 3.18.
17 | MINIMUM_SQLITE_VERSION = (3, 18)
18 | 
19 | # Both storage and search use the JSON1 extension.
20 | REQUIRED_SQLITE_FUNCTIONS = ['json']
21 | 
22 | 
23 | class Storage(FeedsMixin, EntriesMixin, TagsMixin, StorageBase):
24 |     """Data access object used for all storage (except search).
25 | 
26 |     This class is split into per-domain mixins, add new methods accordingly.
27 | 
28 |     Add a test_storage.py::test_errors_locked test for each new public method.
29 | 
30 |     """
31 | 
32 |     def __init__(self, path: str, timeout: float | None = None):
33 |         super().__init__(path, timeout)
34 |         self.changes: ChangeTrackerType = Changes(self)
35 | 
36 |     def make_search(self) -> SearchType:
37 |         from ._search import Search
38 | 
39 |         return Search(self)
40 | 


--------------------------------------------------------------------------------
/src/reader/_storage/_base.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import json
  4 | import os
  5 | import sqlite3
  6 | import sys
  7 | from collections.abc import Callable
  8 | from collections.abc import Iterable
  9 | from functools import partial
 10 | from typing import Any
 11 | from typing import TypeVar
 12 | 
 13 | from ..exceptions import StorageError
 14 | from . import _sqlite_utils
 15 | from ._sql_utils import paginated_query
 16 | from ._sql_utils import Query
 17 | 
 18 | 
 19 | APPLICATION_ID = b'read'
 20 | 
 21 | _T = TypeVar('_T')
 22 | 
 23 | 
 24 | # also used by tests
 25 | CONNECTION_CLS = sqlite3.Connection
 26 | 
 27 | debug = os.environ.get('READER_DEBUG_STORAGE', '')
 28 | assert set(debug) <= {'m', 't', 'T', 'i'}, f"invalid READER_DEBUG_STORAGE={debug}"
 29 | 
 30 | if debug:  # pragma: no cover
 31 | 
 32 |     class CONNECTION_CLS(_sqlite_utils.DebugConnection):  # type: ignore # noqa: F811
 33 |         _set_trace = 't' or 'T' in debug
 34 |         _io_counters = 'i' in debug
 35 |         _pid = os.getpid()
 36 | 
 37 |         def _log_method(self, data):  # type: ignore
 38 |             data['pid'] = self._pid
 39 |             stmt = None
 40 |             if 'T' in debug:
 41 |                 stmt = data.pop('stmt', None)
 42 |             print('STORAGE_DEBUG', json.dumps(data), file=sys.stderr)
 43 |             if stmt:
 44 |                 print(stmt, file=sys.stderr)
 45 | 
 46 | 
 47 | wrap_exceptions = partial(_sqlite_utils.wrap_exceptions, StorageError)
 48 | 
 49 | 
 50 | class StorageBase:
 51 |     # Private API, used by tests.
 52 |     chunk_size = 2**8
 53 | 
 54 |     @wrap_exceptions(message="while opening database")
 55 |     def __init__(self, path: str, timeout: float | None = None):
 56 |         kwargs: dict[str, Any] = {'factory': CONNECTION_CLS}
 57 |         if timeout is not None:
 58 |             kwargs['timeout'] = timeout
 59 | 
 60 |         # at least the "PRAGMA foreign_keys = ON" part of setup_db
 61 |         # has to run for every connection (in every thread),
 62 |         # since it's not persisted across connections
 63 |         self.factory = _sqlite_utils.LocalConnectionFactory(
 64 |             path, self.setup_db, **kwargs
 65 |         )
 66 | 
 67 |     def get_db(self) -> sqlite3.Connection:
 68 |         return self.factory()
 69 | 
 70 |     @staticmethod
 71 |     def setup_db(db: sqlite3.Connection) -> None:
 72 |         # Private API, used by tests.
 73 | 
 74 |         from . import MINIMUM_SQLITE_VERSION
 75 |         from . import REQUIRED_SQLITE_FUNCTIONS
 76 |         from ._schema import MIGRATION
 77 | 
 78 |         return _sqlite_utils.setup_db(
 79 |             db,
 80 |             migration=MIGRATION,
 81 |             id=APPLICATION_ID,
 82 |             minimum_sqlite_version=MINIMUM_SQLITE_VERSION,
 83 |             required_sqlite_functions=REQUIRED_SQLITE_FUNCTIONS,
 84 |         )
 85 | 
 86 |     @wrap_exceptions()
 87 |     def __enter__(self) -> None:
 88 |         self.factory.__enter__()
 89 | 
 90 |     @wrap_exceptions()
 91 |     def __exit__(self, *_: Any) -> None:
 92 |         self.factory.__exit__()
 93 | 
 94 |     @wrap_exceptions()
 95 |     def close(self) -> None:
 96 |         self.factory.close()
 97 | 
 98 |     def paginated_query(
 99 |         self,
100 |         make_query: Callable[[], tuple[Query, dict[str, Any]]],
101 |         limit: int | None = None,
102 |         last: tuple[Any, ...] | None = None,
103 |         row_factory: Callable[[tuple[Any, ...]], _T] | None = None,
104 |     ) -> Iterable[_T]:
105 |         with wrap_exceptions():
106 |             yield from paginated_query(
107 |                 self.get_db(),
108 |                 make_query,
109 |                 self.chunk_size,
110 |                 limit or 0,
111 |                 last,
112 |                 row_factory,
113 |             )
114 | 


--------------------------------------------------------------------------------
/src/reader/_storage/_html_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | HTML utilities. Contains no business logic.
 3 | 
 4 | """
 5 | 
 6 | from __future__ import annotations
 7 | 
 8 | import warnings
 9 | from typing import TYPE_CHECKING
10 | 
11 | 
12 | if TYPE_CHECKING:  # pragma: no cover
13 |     import bs4
14 | 
15 | 
16 | # BeautifulSoup warns if not giving it a parser explicitly; full text:
17 | #
18 | #   No parser was explicitly specified, so I'm using the best available
19 | #   HTML parser for this system ("..."). This usually isn't a problem,
20 | #   but if you run this code on another system, or in a different virtual
21 | #   environment, it may use a different parser and behave differently.
22 | #
23 | # We are ok with any parser, and with how BeautifulSoup picks the best one if
24 | # available. Explicitly using generic features (e.g. `('html', 'fast')`,
25 | # the default) instead of a specific parser still warns.
26 | #
27 | # Currently there's no way to allow users to pick a parser, and we don't want
28 | # to force a specific parser, so there's no point in warning.
29 | #
30 | # When changing this, also change the equivalent pytest.filterwarnings config.
31 | #
32 | # TODO: Expose BeautifulSoup(features=...) when we have a config system.
33 | #
34 | warnings.filterwarnings(
35 |     'ignore',
36 |     message='No parser was explicitly specified',
37 |     module='reader._storage._html_utils',
38 | )
39 | 
40 | 
41 | def strip_html(html: str, features: str | None = None) -> str:
42 |     soup = get_soup(html)
43 |     remove_nontext_elements(soup)
44 |     return soup.get_text(separator=' ')
45 | 
46 | 
47 | def get_soup(html: str, features: str | None = None) -> bs4.BeautifulSoup:
48 |     # lazy import (https://github.com/lemon24/reader/issues/297)
49 |     import bs4
50 | 
51 |     return bs4.BeautifulSoup(html, features=features)
52 | 
53 | 
54 | def remove_nontext_elements(soup: bs4.BeautifulSoup) -> None:
55 |     # <script>, <noscript> and <style> don't contain things relevant to search.
56 |     # <title> probably does, but its content should already be in the entry title.
57 |     #
58 |     # Although <head> is supposed to contain machine-readable content, Firefox
59 |     # shows any free-floating text it contains, so we should keep it around.
60 |     #
61 |     for e in soup.select('script, noscript, style, title'):
62 |         e.replace_with('\n')
63 | 


--------------------------------------------------------------------------------
/src/reader/_vendor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/src/reader/_vendor/__init__.py


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Vendored kurtmckee/feedparser:develop as of 2024-06-26:
 3 | https://github.com/kurtmckee/feedparser/tree/efcb89b51b6f7443ecea508a7a572e3f16e7cd4c
 4 | 
 5 | Vendored because:
 6 | 
 7 | * latest release (6.0.11) does not include memory usage reduction PR
 8 |   https://github.com/kurtmckee/feedparser/pull/302
 9 | 
10 | Changes from the commit above:
11 | 
12 | * this comment
13 | * api.py: fix "import feedparser" absolute import
14 | 
15 | TODO: Use feedparser once the pull requests are merged/released.
16 | 
17 | """
18 | # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
19 | # Copyright 2002-2008 Mark Pilgrim
20 | # All rights reserved.
21 | #
22 | # This file is part of feedparser.
23 | #
24 | # Redistribution and use in source and binary forms, with or without modification,
25 | # are permitted provided that the following conditions are met:
26 | #
27 | # * Redistributions of source code must retain the above copyright notice,
28 | #   this list of conditions and the following disclaimer.
29 | # * Redistributions in binary form must reproduce the above copyright notice,
30 | #   this list of conditions and the following disclaimer in the documentation
31 | #   and/or other materials provided with the distribution.
32 | #
33 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
34 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 | # POSSIBILITY OF SUCH DAMAGE."""
44 | 
45 | from .api import parse
46 | from .datetimes import registerDateHandler
47 | from .exceptions import (
48 |     CharacterEncodingOverride,
49 |     CharacterEncodingUnknown,
50 |     FeedparserError,
51 |     NonXMLContentType,
52 |     UndeclaredNamespace,
53 | )
54 | from .util import FeedParserDict
55 | 
56 | __author__ = "Kurt McKee <contactme@kurtmckee.org>"
57 | __license__ = "BSD 2-clause"
58 | __version__ = "6.0.11"
59 | 
60 | # HTTP "User-Agent" header to send to servers when downloading feeds.
61 | # If you are embedding feedparser in a larger application, you should
62 | # change this to your application name and URL.
63 | USER_AGENT = "feedparser/%s +https://github.com/kurtmckee/feedparser/" % __version__
64 | 
65 | # If you want feedparser to automatically resolve all relative URIs, set this
66 | # to 1.
67 | RESOLVE_RELATIVE_URIS = 1
68 | 
69 | # If you want feedparser to automatically sanitize all potentially unsafe
70 | # HTML content, set this to 1.
71 | SANITIZE_HTML = 1
72 | 
73 | 
74 | # If you want feedparser to use only a prefix of the feed to detect encodings
75 | # (uses less memory), set this to 1.
76 | OPTIMISTIC_ENCODING_DETECTION = 1
77 | 
78 | 
79 | __all__ = (
80 |     "parse",
81 |     "registerDateHandler",
82 |     "FeedParserDict",
83 |     "FeedparserError",
84 |     "CharacterEncodingOverride",
85 |     "CharacterEncodingUnknown",
86 |     "NonXMLContentType",
87 |     "UndeclaredNamespace",
88 | )
89 | 


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/datetimes/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 2 | # Copyright 2002-2008 Mark Pilgrim
 3 | # All rights reserved.
 4 | #
 5 | # This file is a part of feedparser.
 6 | #
 7 | # Redistribution and use in source and binary forms, with or without
 8 | # modification, are permitted provided that the following conditions are met:
 9 | #
10 | # * Redistributions of source code must retain the above copyright notice,
11 | #   this list of conditions and the following disclaimer.
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 | # POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | from time import struct_time
29 | from typing import Callable, List, Optional
30 | 
31 | from .asctime import _parse_date_asctime
32 | from .greek import _parse_date_greek
33 | from .hungarian import _parse_date_hungarian
34 | from .iso8601 import _parse_date_iso8601
35 | from .korean import _parse_date_nate, _parse_date_onblog
36 | from .perforce import _parse_date_perforce
37 | from .rfc822 import _parse_date_rfc822
38 | from .w3dtf import _parse_date_w3dtf
39 | 
40 | _date_handlers: List[Callable[[str], Optional[struct_time]]] = []
41 | 
42 | 
43 | def registerDateHandler(func):
44 |     """Register a date handler function (takes string, returns 9-tuple date in GMT)"""
45 |     _date_handlers.insert(0, func)
46 | 
47 | 
48 | def _parse_date(date_string):
49 |     """Parses a variety of date formats into a 9-tuple in GMT"""
50 |     if not date_string:
51 |         return None
52 |     for handler in _date_handlers:
53 |         try:
54 |             date9tuple = handler(date_string)
55 |         except (KeyError, OverflowError, ValueError, AttributeError):
56 |             continue
57 |         if not date9tuple:
58 |             continue
59 |         if len(date9tuple) != 9:
60 |             continue
61 |         return date9tuple
62 |     return None
63 | 
64 | 
65 | registerDateHandler(_parse_date_onblog)
66 | registerDateHandler(_parse_date_nate)
67 | registerDateHandler(_parse_date_greek)
68 | registerDateHandler(_parse_date_hungarian)
69 | registerDateHandler(_parse_date_perforce)
70 | registerDateHandler(_parse_date_asctime)
71 | registerDateHandler(_parse_date_iso8601)
72 | registerDateHandler(_parse_date_rfc822)
73 | registerDateHandler(_parse_date_w3dtf)
74 | 


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/datetimes/asctime.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 2 | # Copyright 2002-2008 Mark Pilgrim
 3 | # All rights reserved.
 4 | #
 5 | # This file is a part of feedparser.
 6 | #
 7 | # Redistribution and use in source and binary forms, with or without
 8 | # modification, are permitted provided that the following conditions are met:
 9 | #
10 | # * Redistributions of source code must retain the above copyright notice,
11 | #   this list of conditions and the following disclaimer.
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 | # POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | from .rfc822 import _parse_date_rfc822
29 | 
30 | _months = [
31 |     "jan",
32 |     "feb",
33 |     "mar",
34 |     "apr",
35 |     "may",
36 |     "jun",
37 |     "jul",
38 |     "aug",
39 |     "sep",
40 |     "oct",
41 |     "nov",
42 |     "dec",
43 | ]
44 | 
45 | 
46 | def _parse_date_asctime(dt):
47 |     """Parse asctime-style dates.
48 | 
49 |     Converts asctime to RFC822-compatible dates and uses the RFC822 parser
50 |     to do the actual parsing.
51 | 
52 |     Supported formats (format is standardized to the first one listed):
53 | 
54 |     * {weekday name} {month name} dd hh:mm:ss {+-tz} yyyy
55 |     * {weekday name} {month name} dd hh:mm:ss yyyy
56 |     """
57 | 
58 |     parts = dt.split()
59 | 
60 |     # Insert a GMT timezone, if needed.
61 |     if len(parts) == 5:
62 |         parts.insert(4, "+0000")
63 | 
64 |     # Exit if there are not six parts.
65 |     if len(parts) != 6:
66 |         return None
67 | 
68 |     # Reassemble the parts in an RFC822-compatible order and parse them.
69 |     return _parse_date_rfc822(
70 |         " ".join(
71 |             [
72 |                 parts[0],
73 |                 parts[2],
74 |                 parts[1],
75 |                 parts[5],
76 |                 parts[3],
77 |                 parts[4],
78 |             ]
79 |         )
80 |     )
81 | 


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/datetimes/hungarian.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 2 | # Copyright 2002-2008 Mark Pilgrim
 3 | # All rights reserved.
 4 | #
 5 | # This file is a part of feedparser.
 6 | #
 7 | # Redistribution and use in source and binary forms, with or without
 8 | # modification, are permitted provided that the following conditions are met:
 9 | #
10 | # * Redistributions of source code must retain the above copyright notice,
11 | #   this list of conditions and the following disclaimer.
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 | # POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | import re
29 | 
30 | from .w3dtf import _parse_date_w3dtf
31 | 
32 | # Unicode strings for Hungarian date strings
33 | _hungarian_months = {
34 |     "janu\u00e1r": "01",  # e1 in iso-8859-2
35 |     "febru\u00e1ri": "02",  # e1 in iso-8859-2
36 |     "m\u00e1rcius": "03",  # e1 in iso-8859-2
37 |     "\u00e1prilis": "04",  # e1 in iso-8859-2
38 |     "m\u00e1ujus": "05",  # e1 in iso-8859-2
39 |     "j\u00fanius": "06",  # fa in iso-8859-2
40 |     "j\u00falius": "07",  # fa in iso-8859-2
41 |     "augusztus": "08",
42 |     "szeptember": "09",
43 |     "okt\u00f3ber": "10",  # f3 in iso-8859-2
44 |     "november": "11",
45 |     "december": "12",
46 | }
47 | 
48 | _hungarian_date_format_re = re.compile(
49 |     r"(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})([+-](\d{,2}:\d{2}))"
50 | )
51 | 
52 | 
53 | def _parse_date_hungarian(date_string):
54 |     """Parse a string according to a Hungarian 8-bit date format."""
55 |     m = _hungarian_date_format_re.match(date_string)
56 |     if not m or m.group(2) not in _hungarian_months:
57 |         return None
58 |     month = _hungarian_months[m.group(2)]
59 |     day = m.group(3)
60 |     if len(day) == 1:
61 |         day = "0" + day
62 |     hour = m.group(4)
63 |     if len(hour) == 1:
64 |         hour = "0" + hour
65 |     w3dtfdate = f"{m.group(1)}-{month}-{day}T{hour}:{m.group(5)}{m.group(6)}"
66 |     return _parse_date_w3dtf(w3dtfdate)
67 | 


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/datetimes/korean.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 2 | # Copyright 2002-2008 Mark Pilgrim
 3 | # All rights reserved.
 4 | #
 5 | # This file is a part of feedparser.
 6 | #
 7 | # Redistribution and use in source and binary forms, with or without
 8 | # modification, are permitted provided that the following conditions are met:
 9 | #
10 | # * Redistributions of source code must retain the above copyright notice,
11 | #   this list of conditions and the following disclaimer.
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 | # POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | import re
29 | 
30 | from .w3dtf import _parse_date_w3dtf
31 | 
32 | # 8-bit date handling routines written by ytrewq1.
33 | _korean_year = "\ub144"  # b3e2 in euc-kr
34 | _korean_month = "\uc6d4"  # bff9 in euc-kr
35 | _korean_day = "\uc77c"  # c0cf in euc-kr
36 | _korean_am = "\uc624\uc804"  # bfc0 c0fc in euc-kr
37 | _korean_pm = "\uc624\ud6c4"  # bfc0 c8c4 in euc-kr
38 | 
39 | _korean_onblog_date_re = re.compile(
40 |     r"(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})"
41 |     % (_korean_year, _korean_month, _korean_day)
42 | )
43 | 
44 | _korean_nate_date_re = re.compile(
45 |     r"(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})"
46 |     % (_korean_am, _korean_pm)
47 | )
48 | 
49 | 
50 | def _parse_date_onblog(dateString):
51 |     """Parse a string according to the OnBlog 8-bit date format"""
52 |     m = _korean_onblog_date_re.match(dateString)
53 |     if not m:
54 |         return
55 |     w3dtfdate = (
56 |         "%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s"
57 |         % {
58 |             "year": m.group(1),
59 |             "month": m.group(2),
60 |             "day": m.group(3),
61 |             "hour": m.group(4),
62 |             "minute": m.group(5),
63 |             "second": m.group(6),
64 |             "zonediff": "+09:00",
65 |         }
66 |     )
67 |     return _parse_date_w3dtf(w3dtfdate)
68 | 
69 | 
70 | def _parse_date_nate(dateString):
71 |     """Parse a string according to the Nate 8-bit date format"""
72 |     m = _korean_nate_date_re.match(dateString)
73 |     if not m:
74 |         return
75 |     hour = int(m.group(5))
76 |     ampm = m.group(4)
77 |     if ampm == _korean_pm:
78 |         hour += 12
79 |     hour = str(hour)
80 |     if len(hour) == 1:
81 |         hour = "0" + hour
82 |     w3dtfdate = (
83 |         "%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s"
84 |         % {
85 |             "year": m.group(1),
86 |             "month": m.group(2),
87 |             "day": m.group(3),
88 |             "hour": hour,
89 |             "minute": m.group(6),
90 |             "second": m.group(7),
91 |             "zonediff": "+09:00",
92 |         }
93 |     )
94 |     return _parse_date_w3dtf(w3dtfdate)
95 | 


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/datetimes/perforce.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 2 | # Copyright 2002-2008 Mark Pilgrim
 3 | # All rights reserved.
 4 | #
 5 | # This file is a part of feedparser.
 6 | #
 7 | # Redistribution and use in source and binary forms, with or without
 8 | # modification, are permitted provided that the following conditions are met:
 9 | #
10 | # * Redistributions of source code must retain the above copyright notice,
11 | #   this list of conditions and the following disclaimer.
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 | # POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | import email.utils
29 | import re
30 | import time
31 | 
32 | 
33 | def _parse_date_perforce(date_string):
34 |     """parse a date in yyyy/mm/dd hh:mm:ss TTT format"""
35 |     # Fri, 2006/09/15 08:19:53 EDT
36 |     _my_date_pattern = re.compile(
37 |         r"(\w{,3}), (\d{,4})/(\d{,2})/(\d{2}) (\d{,2}):(\d{2}):(\d{2}) (\w{,3})"
38 |     )
39 | 
40 |     m = _my_date_pattern.search(date_string)
41 |     if m is None:
42 |         return None
43 |     dow, year, month, day, hour, minute, second, tz = m.groups()
44 |     months = [
45 |         "Jan",
46 |         "Feb",
47 |         "Mar",
48 |         "Apr",
49 |         "May",
50 |         "Jun",
51 |         "Jul",
52 |         "Aug",
53 |         "Sep",
54 |         "Oct",
55 |         "Nov",
56 |         "Dec",
57 |     ]
58 |     new_date_string = (
59 |         f"{dow}, {day} {months[int(month) - 1]} {year} {hour}:{minute}:{second} {tz}"
60 |     )
61 |     tm = email.utils.parsedate_tz(new_date_string)
62 |     if tm:
63 |         return time.gmtime(email.utils.mktime_tz(tm))
64 | 


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/exceptions.py:
--------------------------------------------------------------------------------
 1 | # Exceptions used throughout feedparser
 2 | # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 3 | # Copyright 2002-2008 Mark Pilgrim
 4 | # All rights reserved.
 5 | #
 6 | # This file is a part of feedparser.
 7 | #
 8 | # Redistribution and use in source and binary forms, with or without modification,
 9 | # are permitted provided that the following conditions are met:
10 | #
11 | # * Redistributions of source code must retain the above copyright notice,
12 | #   this list of conditions and the following disclaimer.
13 | # * Redistributions in binary form must reproduce the above copyright notice,
14 | #   this list of conditions and the following disclaimer in the documentation
15 | #   and/or other materials provided with the distribution.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 | # POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | __all__ = [
30 |     "FeedparserError",
31 |     "CharacterEncodingOverride",
32 |     "CharacterEncodingUnknown",
33 |     "NonXMLContentType",
34 |     "UndeclaredNamespace",
35 | ]
36 | 
37 | 
38 | class FeedparserError(Exception):
39 |     pass
40 | 
41 | 
42 | class CharacterEncodingOverride(FeedparserError):
43 |     pass
44 | 
45 | 
46 | class CharacterEncodingUnknown(FeedparserError):
47 |     pass
48 | 
49 | 
50 | class NonXMLContentType(FeedparserError):
51 |     pass
52 | 
53 | 
54 | class UndeclaredNamespace(Exception):
55 |     pass
56 | 


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/http.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 2 | # Copyright 2002-2008 Mark Pilgrim
 3 | # All rights reserved.
 4 | #
 5 | # This file is a part of feedparser.
 6 | #
 7 | # Redistribution and use in source and binary forms, with or without
 8 | # modification, are permitted provided that the following conditions are met:
 9 | #
10 | # * Redistributions of source code must retain the above copyright notice,
11 | #   this list of conditions and the following disclaimer.
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 | # POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | from __future__ import annotations
29 | 
30 | import typing
31 | 
32 | import requests
33 | 
34 | from .datetimes import _parse_date
35 | 
36 | # HTTP "Accept" header to send to servers when downloading feeds.
37 | ACCEPT_HEADER: str = (
38 |     "application/atom+xml"
39 |     ",application/rdf+xml"
40 |     ",application/rss+xml"
41 |     ",application/x-netcdf"
42 |     ",application/xml"
43 |     ";q=0.9,text/xml"
44 |     ";q=0.2,*/*"
45 |     ";q=0.1"
46 | )
47 | 
48 | 
49 | def get(url: str, result: dict[str, typing.Any]) -> bytes:
50 |     from . import USER_AGENT
51 | 
52 |     agent = USER_AGENT
53 | 
54 |     try:
55 |         response = requests.get(
56 |             url,
57 |             headers={"User-Agent": agent, "Accept": ACCEPT_HEADER},
58 |             timeout=10,
59 |         )
60 |     except requests.RequestException as exception:
61 |         result["bozo"] = True
62 |         result["bozo_exception"] = exception
63 |         return b""
64 | 
65 |     # Lowercase the HTTP header keys for comparisons per RFC 2616.
66 |     result["headers"] = {k.lower(): v for k, v in response.headers.items()}
67 | 
68 |     # save HTTP headers
69 |     if "etag" in result["headers"]:
70 |         result["etag"] = result["headers"]["etag"]
71 |     if "last-modified" in result["headers"]:
72 |         modified = result["headers"]["last-modified"]
73 |         if modified:
74 |             result["modified"] = modified
75 |             result["modified_parsed"] = _parse_date(modified)
76 |     result["href"] = response.url
77 |     result["status"] = response.status_code
78 |     return response.content
79 | 


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/namespaces/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/src/reader/_vendor/feedparser/namespaces/__init__.py


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/namespaces/admin.py:
--------------------------------------------------------------------------------
 1 | # Support for the administrative elements extension
 2 | # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 3 | # Copyright 2002-2008 Mark Pilgrim
 4 | # All rights reserved.
 5 | #
 6 | # This file is a part of feedparser.
 7 | #
 8 | # Redistribution and use in source and binary forms, with or without
 9 | # modification, are permitted provided that the following conditions are met:
10 | #
11 | # * Redistributions of source code must retain the above copyright notice,
12 | #   this list of conditions and the following disclaimer.
13 | # * Redistributions in binary form must reproduce the above copyright notice,
14 | #   this list of conditions and the following disclaimer in the documentation
15 | #   and/or other materials provided with the distribution.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 | # POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | from ..util import FeedParserDict
30 | 
31 | 
32 | class Namespace:
33 |     # RDF Site Summary 1.0 Modules: Administrative
34 |     # http://web.resource.org/rss/1.0/modules/admin/
35 | 
36 |     supported_namespaces = {
37 |         "http://webns.net/mvcb/": "admin",
38 |     }
39 | 
40 |     def _start_admin_generatoragent(self, attrs_d):
41 |         self.push("generator", 1)
42 |         value = self._get_attribute(attrs_d, "rdf:resource")
43 |         if value:
44 |             self.elementstack[-1][2].append(value)
45 |         self.pop("generator")
46 |         self._get_context()["generator_detail"] = FeedParserDict({"href": value})
47 | 
48 |     def _start_admin_errorreportsto(self, attrs_d):
49 |         self.push("errorreportsto", 1)
50 |         value = self._get_attribute(attrs_d, "rdf:resource")
51 |         if value:
52 |             self.elementstack[-1][2].append(value)
53 |         self.pop("errorreportsto")
54 | 


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/namespaces/cc.py:
--------------------------------------------------------------------------------
 1 | # Support for the Creative Commons licensing extensions
 2 | # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 3 | # Copyright 2002-2008 Mark Pilgrim
 4 | # All rights reserved.
 5 | #
 6 | # This file is a part of feedparser.
 7 | #
 8 | # Redistribution and use in source and binary forms, with or without
 9 | # modification, are permitted provided that the following conditions are met:
10 | #
11 | # * Redistributions of source code must retain the above copyright notice,
12 | #   this list of conditions and the following disclaimer.
13 | # * Redistributions in binary form must reproduce the above copyright notice,
14 | #   this list of conditions and the following disclaimer in the documentation
15 | #   and/or other materials provided with the distribution.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 | # POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | from ..util import FeedParserDict
30 | 
31 | 
32 | class Namespace:
33 |     supported_namespaces = {
34 |         # RDF-based namespace
35 |         "http://creativecommons.org/ns#license": "cc",
36 |         # Old RDF-based namespace
37 |         "http://web.resource.org/cc/": "cc",
38 |         # RSS-based namespace
39 |         "http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html": (
40 |             "creativecommons"
41 |         ),
42 |         # Old RSS-based namespace
43 |         "http://backend.userland.com/creativeCommonsRssModule": "creativecommons",
44 |     }
45 | 
46 |     def _start_cc_license(self, attrs_d):
47 |         context = self._get_context()
48 |         value = self._get_attribute(attrs_d, "rdf:resource")
49 |         attrs_d = FeedParserDict()
50 |         attrs_d["rel"] = "license"
51 |         if value:
52 |             attrs_d["href"] = value
53 |         context.setdefault("links", []).append(attrs_d)
54 | 
55 |     def _start_creativecommons_license(self, attrs_d):
56 |         self.push("license", 1)
57 | 
58 |     _start_creativeCommons_license = _start_creativecommons_license
59 | 
60 |     def _end_creativecommons_license(self):
61 |         value = self.pop("license")
62 |         context = self._get_context()
63 |         attrs_d = FeedParserDict()
64 |         attrs_d["rel"] = "license"
65 |         if value:
66 |             attrs_d["href"] = value
67 |         context.setdefault("links", []).append(attrs_d)
68 |         del context["license"]
69 | 
70 |     _end_creativeCommons_license = _end_creativecommons_license
71 | 


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/namespaces/psc.py:
--------------------------------------------------------------------------------
 1 | # Support for the Podlove Simple Chapters format
 2 | # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 3 | # Copyright 2002-2008 Mark Pilgrim
 4 | # All rights reserved.
 5 | #
 6 | # This file is a part of feedparser.
 7 | #
 8 | # Redistribution and use in source and binary forms, with or without
 9 | # modification, are permitted provided that the following conditions are met:
10 | #
11 | # * Redistributions of source code must retain the above copyright notice,
12 | #   this list of conditions and the following disclaimer.
13 | # * Redistributions in binary form must reproduce the above copyright notice,
14 | #   this list of conditions and the following disclaimer in the documentation
15 | #   and/or other materials provided with the distribution.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 | # POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | import datetime
30 | import re
31 | 
32 | from .. import util
33 | 
34 | 
35 | class Namespace:
36 |     supported_namespaces = {
37 |         "http://podlove.org/simple-chapters": "psc",
38 |     }
39 | 
40 |     def __init__(self):
41 |         # chapters will only be captured while psc_chapters_flag is True.
42 |         self.psc_chapters_flag = False
43 |         super().__init__()
44 | 
45 |     def _start_psc_chapters(self, attrs_d):
46 |         context = self._get_context()
47 |         if "psc_chapters" not in context:
48 |             self.psc_chapters_flag = True
49 |             attrs_d["chapters"] = []
50 |             context["psc_chapters"] = util.FeedParserDict(attrs_d)
51 | 
52 |     def _end_psc_chapters(self):
53 |         self.psc_chapters_flag = False
54 | 
55 |     def _start_psc_chapter(self, attrs_d):
56 |         if self.psc_chapters_flag:
57 |             start = self._get_attribute(attrs_d, "start")
58 |             attrs_d["start_parsed"] = _parse_psc_chapter_start(start)
59 | 
60 |             context = self._get_context()["psc_chapters"]
61 |             context["chapters"].append(util.FeedParserDict(attrs_d))
62 | 
63 | 
64 | format_ = re.compile(r"^((\d{2}):)?(\d{2}):(\d{2})(\.(\d{3}))?$")
65 | 
66 | 
67 | def _parse_psc_chapter_start(start):
68 |     m = format_.match(start)
69 |     if m is None:
70 |         return None
71 | 
72 |     _, h, m, s, _, ms = m.groups()
73 |     h, m, s, ms = (int(h or 0), int(m), int(s), int(ms or 0))
74 |     return datetime.timedelta(0, h * 60 * 60 + m * 60 + s, ms * 1000)
75 | 


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/parsers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/src/reader/_vendor/feedparser/parsers/__init__.py


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/parsers/loose.py:
--------------------------------------------------------------------------------
 1 | # The loose feed parser that interfaces with an SGML parsing library
 2 | # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 3 | # Copyright 2002-2008 Mark Pilgrim
 4 | # All rights reserved.
 5 | #
 6 | # This file is a part of feedparser.
 7 | #
 8 | # Redistribution and use in source and binary forms, with or without modification,
 9 | # are permitted provided that the following conditions are met:
10 | #
11 | # * Redistributions of source code must retain the above copyright notice,
12 | #   this list of conditions and the following disclaimer.
13 | # * Redistributions in binary form must reproduce the above copyright notice,
14 | #   this list of conditions and the following disclaimer in the documentation
15 | #   and/or other materials provided with the distribution.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
18 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 | # POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | 
30 | class LooseXMLParser:
31 |     contentparams = None
32 | 
33 |     def __init__(self, baseuri=None, baselang=None, encoding=None, entities=None):
34 |         self.baseuri = baseuri or ""
35 |         self.lang = baselang or None
36 |         self.encoding = encoding or "utf-8"  # character encoding
37 |         self.entities = entities or {}
38 |         super().__init__()
39 | 
40 |     @staticmethod
41 |     def _normalize_attributes(kv):
42 |         k = kv[0].lower()
43 |         v = k in ("rel", "type") and kv[1].lower() or kv[1]
44 |         # the sgml parser doesn't handle entities in attributes, nor
45 |         # does it pass the attribute values through as unicode, while
46 |         # strict xml parsers do -- account for this difference
47 |         v = v.replace("&amp;", "&")
48 |         return k, v
49 | 
50 |     def decode_entities(self, element, data):
51 |         data = data.replace("&#60;", "&lt;")
52 |         data = data.replace("&#x3c;", "&lt;")
53 |         data = data.replace("&#x3C;", "&lt;")
54 |         data = data.replace("&#62;", "&gt;")
55 |         data = data.replace("&#x3e;", "&gt;")
56 |         data = data.replace("&#x3E;", "&gt;")
57 |         data = data.replace("&#38;", "&amp;")
58 |         data = data.replace("&#x26;", "&amp;")
59 |         data = data.replace("&#34;", "&quot;")
60 |         data = data.replace("&#x22;", "&quot;")
61 |         data = data.replace("&#39;", "&apos;")
62 |         data = data.replace("&#x27;", "&apos;")
63 |         if not self.contentparams.get("type", "xml").endswith("xml"):
64 |             data = data.replace("&lt;", "<")
65 |             data = data.replace("&gt;", ">")
66 |             data = data.replace("&amp;", "&")
67 |             data = data.replace("&quot;", '"')
68 |             data = data.replace("&apos;", "'")
69 |             data = data.replace("&#x2f;", "/")
70 |             data = data.replace("&#x2F;", "/")
71 |         return data
72 | 
73 |     @staticmethod
74 |     def strattrs(attrs):
75 |         return "".join(' {}="{}"'.format(n, v.replace('"', "&quot;")) for n, v in attrs)
76 | 


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/src/reader/_vendor/feedparser/py.typed


--------------------------------------------------------------------------------
/src/reader/_vendor/feedparser/sgml.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
 2 | # Copyright 2002-2008 Mark Pilgrim
 3 | # All rights reserved.
 4 | #
 5 | # This file is a part of feedparser.
 6 | #
 7 | # Redistribution and use in source and binary forms, with or without
 8 | # modification, are permitted provided that the following conditions are met:
 9 | #
10 | # * Redistributions of source code must retain the above copyright notice,
11 | #   this list of conditions and the following disclaimer.
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 | # POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | import re
29 | 
30 | import sgmllib  # type: ignore[import]
31 | 
32 | __all__ = [
33 |     "sgmllib",
34 |     "charref",
35 |     "tagfind",
36 |     "attrfind",
37 |     "entityref",
38 |     "incomplete",
39 |     "interesting",
40 |     "shorttag",
41 |     "shorttagopen",
42 |     "starttagopen",
43 |     "endbracket",
44 | ]
45 | 
46 | # sgmllib defines a number of module-level regular expressions that are
47 | # insufficient for the XML parsing feedparser needs. Rather than modify
48 | # the variables directly in sgmllib, they're defined here using the same
49 | # names, and the compiled code objects of several sgmllib.SGMLParser
50 | # methods are copied into _BaseHTMLProcessor so that they execute in
51 | # feedparser's scope instead of sgmllib's scope.
52 | charref = re.compile(r"&#(\d+|[xX][0-9a-fA-F]+);")
53 | tagfind = re.compile(r"[a-zA-Z][-_.:a-zA-Z0-9]*")
54 | attrfind = re.compile(
55 |     r"""\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)[$]?(\s*=\s*"""
56 |     r"""('[^']*'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$()_#=~'"@]*))?"""
57 | )
58 | 
59 | # Unfortunately, these must be copied over to prevent NameError exceptions
60 | entityref = sgmllib.entityref
61 | incomplete = sgmllib.incomplete
62 | interesting = sgmllib.interesting
63 | shorttag = sgmllib.shorttag
64 | shorttagopen = sgmllib.shorttagopen
65 | starttagopen = sgmllib.starttagopen
66 | 
67 | 
68 | class _EndBracketRegEx:
69 |     def __init__(self):
70 |         # Overriding the built-in sgmllib.endbracket regex allows the
71 |         # parser to find angle brackets embedded in element attributes.
72 |         self.endbracket = re.compile(
73 |             r"("
74 |             r"""[^'"<>]"""
75 |             r"""|"[^"]*"(?=>|/|\s|\w+=)"""
76 |             r"""|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])"""
77 |             r"""|.*?(?=[<>]"""
78 |             r")"
79 |         )
80 | 
81 |     def search(self, target, index=0):
82 |         match = self.endbracket.match(target, index)
83 |         if match is not None:
84 |             # Returning a new object in the calling thread's context
85 |             # resolves a thread-safety issue.
86 |             return EndBracketMatch(match)
87 |         return None
88 | 
89 | 
90 | class EndBracketMatch:
91 |     def __init__(self, match):
92 |         self.match = match
93 | 
94 |     def start(self, n):
95 |         return self.match.end(n)
96 | 
97 | 
98 | endbracket = _EndBracketRegEx()
99 | 


--------------------------------------------------------------------------------
/src/reader/plugins/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Built-in plug-ins.
 3 | 
 4 | """
 5 | 
 6 | from __future__ import annotations
 7 | 
 8 | from collections.abc import Callable
 9 | from collections.abc import Iterable
10 | from pkgutil import resolve_name
11 | from typing import TYPE_CHECKING
12 | from typing import Union
13 | 
14 | from ..exceptions import InvalidPluginError
15 | 
16 | 
17 | if TYPE_CHECKING:  # pragma: no cover
18 |     from . import Reader
19 | 
20 | 
21 | #: The :func:`~reader.make_reader` default list of :ref:`plugins <plugins>`.
22 | DEFAULT_PLUGINS = [
23 |     'reader.ua_fallback',
24 | ]
25 | 
26 | _PLUGIN_PREFIX = 'reader.'
27 | _MODULE_PREFIX = 'reader.plugins.'
28 | 
29 | 
30 | PluginType = Callable[['Reader'], None]
31 | PluginInput = Union[str, PluginType]
32 | 
33 | 
34 | def _load_plugins(plugins: Iterable[PluginInput]) -> Iterable[PluginType]:
35 |     for plugin in plugins:
36 |         yield _load_plugin(plugin)
37 | 
38 | 
39 | def _load_plugin(plugin: PluginInput) -> PluginType:
40 |     if not isinstance(plugin, str):
41 |         return plugin
42 | 
43 |     if not plugin.startswith(_PLUGIN_PREFIX):
44 |         raise InvalidPluginError(f"no such built-in plugin: {plugin!r}")
45 | 
46 |     module_name = plugin.replace(_PLUGIN_PREFIX, _MODULE_PREFIX, 1)
47 |     import_error = None
48 | 
49 |     try:
50 |         return resolve_name(module_name + ':init_reader')
51 |     except ModuleNotFoundError as e:
52 |         import_error = e
53 |     except ValueError:
54 |         pass
55 | 
56 |     try:
57 |         return resolve_name(plugin)
58 |     except (ModuleNotFoundError, AttributeError):
59 |         pass
60 | 
61 |     if import_error and import_error.name != module_name:
62 |         raise import_error
63 | 
64 |     raise InvalidPluginError(f"no such built-in plugin: {plugin!r}") from import_error
65 | 


--------------------------------------------------------------------------------
/src/reader/plugins/enclosure_dedupe.py:
--------------------------------------------------------------------------------
 1 | """
 2 | reader.enclosure_dedupe
 3 | ~~~~~~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Deduplicate the enclosures of an entry by enclosure URL.
 6 | 
 7 | .. todo::
 8 | 
 9 |     There should be a hook for this.
10 | 
11 | ..
12 |     Implemented for https://github.com/lemon24/reader/issues/78.
13 | 
14 | """
15 | 
16 | import functools
17 | 
18 | 
19 | def init_reader(reader):
20 |     get_entries = reader.get_entries
21 | 
22 |     @functools.wraps(get_entries)
23 |     def wrapper(*args, **kwargs):
24 |         for entry in get_entries(*args, **kwargs):
25 |             if entry.enclosures:
26 |                 enclosures_by_href = {}
27 |                 for e in entry.enclosures:
28 |                     enclosures_by_href.setdefault(e.href, e)
29 | 
30 |                 enclosures = tuple(enclosures_by_href.values())
31 |                 entry = entry._replace(enclosures=enclosures)
32 | 
33 |             yield entry
34 | 
35 |     reader.get_entries = wrapper
36 | 


--------------------------------------------------------------------------------
/src/reader/plugins/ua_fallback.py:
--------------------------------------------------------------------------------
 1 | """
 2 | reader.ua_fallback
 3 | ~~~~~~~~~~~~~~~~~~
 4 | 
 5 | Retry feed requests that get ``403 Forbidden``
 6 | with a different user agent.
 7 | 
 8 | Sometimes, servers blocks requests coming from *reader*
 9 | based on the user agent.
10 | This plugin retries the request with feedparser's user agent,
11 | which seems to be more widely accepted.
12 | 
13 | Servers/CDNs known to not accept the *reader* UA: Cloudflare, WP Engine.
14 | 
15 | .. todo::
16 | 
17 |     Maybe cache if the fallback is needed as reader metadata,
18 |     and change the UA on the first request instead of retrying.
19 | 
20 | ..
21 |     Implemented for https://github.com/lemon24/reader/issues/181
22 | 
23 | """
24 | 
25 | import logging
26 | 
27 | 
28 | _LOG_HEADERS = ['Server', 'X-Powered-By']
29 | 
30 | log = logging.getLogger(__name__)
31 | 
32 | 
33 | def _ua_fallback_response_hook(session, response, request, **kwargs):
34 |     if not response.status_code == 403:
35 |         return None
36 | 
37 |     ua = request.headers.get('User-Agent', session.headers.get('User-Agent'))
38 |     if not ua:  # pragma: no cover
39 |         return None
40 | 
41 |     # lazy import (https://github.com/lemon24/reader/issues/297)
42 |     from .._parser.feedparser import feedparser
43 | 
44 |     ua_prefix = feedparser.USER_AGENT.partition(" ")[0]
45 |     request.headers['User-Agent'] = f'{ua_prefix} {ua}'
46 | 
47 |     log_headers = {
48 |         h: response.headers[h] for h in _LOG_HEADERS if h in response.headers
49 |     }
50 |     log.info(
51 |         "%s: got status code %i, "
52 |         "retrying with feedparser User-Agent; "
53 |         "relevant response headers: %s",
54 |         request.url,
55 |         response.status_code,
56 |         log_headers,
57 |     )
58 | 
59 |     return request
60 | 
61 | 
62 | def init_reader(reader):
63 |     reader._parser.session_factory.response_hooks.append(_ua_fallback_response_hook)
64 | 


--------------------------------------------------------------------------------
/src/reader/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/src/reader/py.typed


--------------------------------------------------------------------------------
/src/reader/utils.py:
--------------------------------------------------------------------------------
 1 | """Too specific to be in core, too small to have dedicated modules."""
 2 | 
 3 | from collections.abc import Collection
 4 | from urllib.parse import urlencode
 5 | 
 6 | from . import EntryExistsError
 7 | from . import FeedExistsError
 8 | from . import Reader
 9 | from .types import _entry_argument
10 | from .types import EntryInput
11 | 
12 | 
13 | def archive_entries(
14 |     reader: Reader,
15 |     entries: Collection[EntryInput],
16 |     /,
17 |     feed_url: str = 'reader:archived',
18 |     feed_user_title: str | None = 'Archived',
19 | ) -> None:
20 |     """Copy a list of entries to a special "archived" feed.
21 | 
22 |     Entries that are already in the archived feed will be overwritten.
23 | 
24 |     The original entries will remain unchanged.
25 | 
26 |     Args:
27 |         reader (Reader): A reader instance.
28 |         entries (list(tuple(str, str) or Entry)): Entries to be archived.
29 |         feed_url (str):
30 |             The URL of the archived feed.
31 |             If the feed does not exist, it will be created.
32 |         feed_user_title (str or None):
33 |             :attr:`~.Feed.user_title` for the archived feed.
34 | 
35 |     Raises:
36 |         EntryExistsError: If any of the entries does not exist.
37 |         StorageError
38 | 
39 |     .. versionadded:: 3.16
40 | 
41 |     """
42 |     entry_ids = [_entry_argument(e) for e in entries]
43 | 
44 |     try:
45 |         reader.add_feed(feed_url, allow_invalid_url=True)
46 |         reader.disable_feed_updates(feed_url)
47 |     except FeedExistsError:
48 |         pass
49 |     reader.set_feed_user_title(feed_url, feed_user_title)
50 | 
51 |     for src in entry_ids:
52 |         dst = feed_url, _make_archived_entry_id(feed_url, src)
53 |         try:
54 |             reader.copy_entry(src, dst)
55 |         except EntryExistsError:
56 |             reader.delete_entry(dst)
57 |             reader.copy_entry(src, dst)
58 | 
59 |     # TODO: ideally, archiving may redirect to a view of the archived entries
60 |     #
61 |     # this can be achieved in one of the following ways:
62 |     #
63 |     # * filter by the archived entry ids
64 |     #   * get_entries(entries=...) does not exist
65 |     #   * if there are a lot of entries, the query string may be to big
66 |     # * filter by entry source – get_entries(source=...)
67 |     #   * this will not include entries that already have a source
68 |     #   * idem for original_feed_url
69 |     # * filter by entry id prefix – reader:archived?feed=...&
70 |     #   * get_entries(entry_id_prefix=...) does not exist
71 |     #   * by far the most correct
72 |     #
73 |     # until we figure this out, leaving return type to None
74 | 
75 | 
76 | def _make_archived_entry_id(feed_url: str, entry: tuple[str, str]) -> str:
77 |     query = urlencode({'feed': entry[0], 'entry': entry[1]})
78 |     return f"{feed_url}?{query}"
79 | 


--------------------------------------------------------------------------------
/tests/data/10.json:
--------------------------------------------------------------------------------
1 | {
2 |     "version": "https://jsonfeed.org/version/1",
3 |     "items": []
4 | }
5 | 


--------------------------------------------------------------------------------
/tests/data/10.json.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from reader import Content
 4 | from reader import Enclosure
 5 | from reader._types import EntryData
 6 | from reader._types import FeedData
 7 | 
 8 | 
 9 | feed = FeedData(
10 |     url=f'{url_base}10.json',
11 |     version='json10',
12 | )
13 | 
14 | entries = []
15 | 


--------------------------------------------------------------------------------
/tests/data/custom:
--------------------------------------------------------------------------------
1 | custom feed
2 | 


--------------------------------------------------------------------------------
/tests/data/empty.atom:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <feed xmlns="http://www.w3.org/2005/Atom">
3 | 
4 |     <entry>
5 |         <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
6 |     </entry>
7 | 
8 | </feed>
9 | 


--------------------------------------------------------------------------------
/tests/data/empty.atom.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from reader import Content
 4 | from reader import Enclosure
 5 | from reader._types import EntryData
 6 | from reader._types import FeedData
 7 | 
 8 | 
 9 | feed = FeedData(url=f'{url_base}empty.atom', version='atom10')
10 | 
11 | entries = [
12 |     EntryData(
13 |         feed_url=feed.url,
14 |         id='urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a',
15 |         updated=None,
16 |         # added by feedparser
17 |         link='urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a',
18 |     )
19 | ]
20 | 


--------------------------------------------------------------------------------
/tests/data/empty.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "https://jsonfeed.org/version/1.1",
 3 |     "items": [
 4 |         {
 5 |             "id": "1",
 6 |             "content_text": "content"
 7 |         }
 8 |     ]
 9 | }
10 | 


--------------------------------------------------------------------------------
/tests/data/empty.json.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from reader import Content
 4 | from reader import Enclosure
 5 | from reader._types import EntryData
 6 | from reader._types import FeedData
 7 | 
 8 | 
 9 | feed = FeedData(
10 |     url=f'{url_base}empty.json',
11 |     version='json11',
12 | )
13 | 
14 | entries = [
15 |     EntryData(
16 |         feed_url=feed.url,
17 |         id='1',
18 |         updated=None,
19 |         content=(
20 |             Content(
21 |                 value='content',
22 |                 type='text/plain',
23 |             ),
24 |         ),
25 |     ),
26 | ]
27 | 


--------------------------------------------------------------------------------
/tests/data/empty.rss:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" ?>
 2 | <rss version="2.0">
 3 | <channel>
 4 | 
 5 |     <item>
 6 |         <guid isPermaLink="false">7bd204c6-1655-4c27-aeee-53f933c5395f</guid>
 7 |     </item>
 8 | 
 9 | </channel>
10 | </rss>
11 | 


--------------------------------------------------------------------------------
/tests/data/empty.rss.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from reader import Content
 4 | from reader import Enclosure
 5 | from reader._types import EntryData
 6 | from reader._types import FeedData
 7 | 
 8 | 
 9 | feed = FeedData(url=f'{url_base}empty.rss', version='rss20')
10 | 
11 | entries = [
12 |     EntryData(
13 |         feed_url=feed.url, id='7bd204c6-1655-4c27-aeee-53f933c5395f', updated=None
14 |     )
15 | ]
16 | 


--------------------------------------------------------------------------------
/tests/data/full.atom:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 | 
 4 |     <title>Example Feed</title>
 5 |     <link href="http://example.org/"/>
 6 |     <updated>2003-12-13T18:30:02Z</updated>
 7 |     <author>
 8 |         <name>John Doe</name>
 9 |     </author>
10 |     <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
11 |     <subtitle>all your examples are belong to us</subtitle>
12 | 
13 |     <entry>
14 |         <title>Atom-Powered Robots Run Amok</title>
15 |         <link href="http://example.org/2003/12/13/atom03"/>
16 |         <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
17 |         <updated>2003-12-13T18:30:02Z</updated>
18 |         <published>2003-12-13T09:17:51-08:00</published>
19 |         <author>
20 |             <name>John Doe</name>
21 |         </author>
22 |         <summary>Some text.</summary>
23 | 
24 |         <content>content</content>
25 |         <content type="text/whatever">content with type</content>
26 |         <content lang="en">content with lang</content>
27 | 
28 |         <link rel="enclosure" href="http://example.org/enclosure" />
29 |         <link rel="enclosure" href="http://example.org/enclosure-with-type" type="text/whatever" />
30 |         <link rel="enclosure" href="http://example.org/enclosure-with-length" length="1000" />
31 |         <link rel="enclosure" href="http://example.org/enclosure-with-bad-length" length="xyz" />
32 |         <link rel="enclosure" href="" />
33 | 
34 |         <source>
35 |             <title>Source Title</title>
36 |             <link href="http://example.org/source"/>
37 |             <link rel="self" href="http://example.org/source.xml"/>
38 |             <author>
39 |                 <name>Source Author</name>
40 |             </author>
41 |             <updated>2003-12-13T18:30:02Z</updated>
42 |             <id>1234567890</id>
43 |             <subtitle>source subtitle</subtitle>
44 |         </source>
45 |     </entry>
46 | 
47 |     <entry>
48 |         <title>Atom-Powered Robots Run Amok Again</title>
49 |         <id>urn:uuid:00000000-cfb8-4ebb-aaaa-00000000000</id>
50 |         <updated>2003-12-13T00:00:00Z</updated>
51 |     </entry>
52 | 
53 |     <entry>
54 |         <id>source:only-url</id>
55 |         <source>
56 |             <link rel="self" href="only:url"/>
57 |         </source>
58 |     </entry>
59 |     <entry>
60 |         <id>source:only-title</id>
61 |         <source>
62 |             <title>only-title</title>
63 |         </source>
64 |     </entry>
65 |     <entry>
66 |         <id>source:empty</id>
67 |         <source>
68 |             <unknown/>
69 |             <link href="http://example.org/source"/>
70 |             <author>
71 |                 <name>Source Author</name>
72 |             </author>
73 |             <updated>2003-12-13T18:30:02Z</updated>
74 |             <id>1234567890</id>
75 |             <subtitle>source subtitle</subtitle>
76 |         </source>
77 |     </entry>
78 | 
79 | </feed>
80 | 


--------------------------------------------------------------------------------
/tests/data/full.atom.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from reader import Content
 4 | from reader import Enclosure
 5 | from reader import EntrySource
 6 | from reader._types import EntryData
 7 | from reader._types import FeedData
 8 | 
 9 | 
10 | feed = FeedData(
11 |     url=f'{url_base}full.atom',
12 |     updated=datetime.datetime(2003, 12, 13, 18, 30, 2, tzinfo=datetime.UTC),
13 |     title='Example Feed',
14 |     link='http://example.org/',
15 |     author='John Doe',
16 |     subtitle='all your examples are belong to us',
17 |     version='atom10',
18 | )
19 | 
20 | entries = [
21 |     EntryData(
22 |         feed_url=feed.url,
23 |         id='urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a',
24 |         updated=datetime.datetime(2003, 12, 13, 18, 30, 2, tzinfo=datetime.UTC),
25 |         title='Atom-Powered Robots Run Amok',
26 |         link='http://example.org/2003/12/13/atom03',
27 |         author='John Doe',
28 |         published=datetime.datetime(2003, 12, 13, 17, 17, 51, tzinfo=datetime.UTC),
29 |         summary='Some text.',
30 |         content=(
31 |             # the text/plain type comes from feedparser
32 |             Content(value='content', type='text/plain'),
33 |             Content(value='content with type', type='text/whatever'),
34 |             Content(value='content with lang', type='text/plain', language='en'),
35 |         ),
36 |         enclosures=(
37 |             # the text/html type comes from feedparser
38 |             Enclosure(href='http://example.org/enclosure', type='text/html'),
39 |             Enclosure(
40 |                 href='http://example.org/enclosure-with-type', type='text/whatever'
41 |             ),
42 |             Enclosure(
43 |                 href='http://example.org/enclosure-with-length',
44 |                 type='text/html',
45 |                 length=1000,
46 |             ),
47 |             Enclosure(
48 |                 href='http://example.org/enclosure-with-bad-length', type='text/html'
49 |             ),
50 |         )
51 |         + (
52 |             # feedparser resolves empty href to the base,
53 |             # but only for Atom, and only if the base has a scheme(?);
54 |             # document this (somewhat pointless) behavior
55 |             (Enclosure(href=feed.url, type='text/html'),)
56 |             if feed.url.startswith('http')
57 |             else ()
58 |         ),
59 |         source=EntrySource(
60 |             url='http://example.org/source.xml',
61 |             updated=datetime.datetime(2003, 12, 13, 18, 30, 2, tzinfo=datetime.UTC),
62 |             title='Source Title',
63 |             link='http://example.org/source',
64 |             author='Source Author',
65 |             subtitle='source subtitle',
66 |         ),
67 |     ),
68 |     EntryData(
69 |         feed_url=feed.url,
70 |         id='urn:uuid:00000000-cfb8-4ebb-aaaa-00000000000',
71 |         updated=datetime.datetime(2003, 12, 13, 0, 0, 0, tzinfo=datetime.UTC),
72 |         title='Atom-Powered Robots Run Amok Again',
73 |         # link comes from feedparser
74 |         link='urn:uuid:00000000-cfb8-4ebb-aaaa-00000000000',
75 |     ),
76 |     EntryData(
77 |         feed_url=feed.url,
78 |         id='source:only-url',
79 |         # link comes from feedparser
80 |         link='source:only-url',
81 |         source=EntrySource(url='only:url'),
82 |     ),
83 |     EntryData(
84 |         feed_url=feed.url,
85 |         id='source:only-title',
86 |         # link comes from feedparser
87 |         link='source:only-title',
88 |         source=EntrySource(title='only-title'),
89 |     ),
90 |     EntryData(
91 |         feed_url=feed.url,
92 |         id='source:empty',
93 |         # link comes from feedparser
94 |         link='source:empty',
95 |     ),
96 | ]
97 | 


--------------------------------------------------------------------------------
/tests/data/full.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "https://jsonfeed.org/version/1.1",
 3 |     "title": "My Example Feed",
 4 |     "home_page_url": "https://example.org/",
 5 |     "feed_url": "https://example.org/feed.json",
 6 |     "authors": [
 7 |         {
 8 |             "name": "Joe",
 9 |             "url": "mailto:joe@example.com"
10 |         },
11 |         {
12 |             "name": "Jane"
13 |         }
14 |     ],
15 |     "description": "The Best Example Feed",
16 |     "language": "en",
17 |     "items": [
18 |         {
19 |             "id": "2",
20 |             "title": "Title",
21 |             "content_text": "Content with no link",
22 |             "content_html": "Content with <a href=\"http://example.com/\">link</a>",
23 |             "summary": "A summary",
24 |             "date_published": "2020-01-02T14:00:00-07:00",
25 |             "date_modified": "2020-01-04T01:00:00+0100",
26 |             "url": "https://example.org/second-item",
27 |             "language": "de",
28 |             "authors": [
29 |                 {
30 |                     "url": "mailto:joe@example.com"
31 |                 }
32 |             ],
33 |             "attachments": [
34 |                 {
35 |                     "url": "http://example.com/downloads/file.m4a",
36 |                     "mime_type": "audio/x-m4a",
37 |                     "size_in_bytes": 12345678,
38 |                     "duration_in_seconds": 1234
39 |                 },
40 |                 {
41 |                     "url": "http://example.com/downloads/another.mp3"
42 |                 },
43 |                 {
44 |                     "url": ""
45 |                 }
46 |             ]
47 |         },
48 |         {
49 |             "id": "1",
50 |             "content_html": "<p>Hello, world!</p>",
51 |             "url": "https://example.org/initial-post",
52 |             "date_published": "2020-01-02T12:00:00Z",
53 |             "author": {
54 |                 "name": "Jane"
55 |             }
56 |         }
57 |     ]
58 | }
59 | 


--------------------------------------------------------------------------------
/tests/data/full.json.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from reader import Content
 4 | from reader import Enclosure
 5 | from reader._types import EntryData
 6 | from reader._types import FeedData
 7 | 
 8 | 
 9 | feed = FeedData(
10 |     url=f'{url_base}full.json',
11 |     updated=None,
12 |     title='My Example Feed',
13 |     link='https://example.org/',
14 |     author='Joe',
15 |     subtitle='The Best Example Feed',
16 |     version='json11',
17 | )
18 | 
19 | entries = [
20 |     EntryData(
21 |         feed_url=feed.url,
22 |         id='2',
23 |         updated=datetime.datetime(2020, 1, 4, 0, 0, tzinfo=datetime.UTC),
24 |         title="Title",
25 |         link="https://example.org/second-item",
26 |         author="mailto:joe@example.com",
27 |         published=datetime.datetime(2020, 1, 2, 21, 0, tzinfo=datetime.UTC),
28 |         summary="A summary",
29 |         content=(
30 |             Content(
31 |                 value='Content with <a href="http://example.com/">link</a>',
32 |                 type='text/html',
33 |                 language='de',
34 |             ),
35 |             Content(
36 |                 value='Content with no link',
37 |                 type='text/plain',
38 |                 language='de',
39 |             ),
40 |         ),
41 |         enclosures=(
42 |             Enclosure(
43 |                 href='http://example.com/downloads/file.m4a',
44 |                 type='audio/x-m4a',
45 |                 length=12345678,
46 |             ),
47 |             Enclosure(
48 |                 href='http://example.com/downloads/another.mp3', type=None, length=None
49 |             ),
50 |         ),
51 |     ),
52 |     EntryData(
53 |         feed_url=feed.url,
54 |         id='1',
55 |         updated=None,
56 |         title=None,
57 |         link='https://example.org/initial-post',
58 |         author='Jane',
59 |         published=datetime.datetime(2020, 1, 2, 12, 0, tzinfo=datetime.UTC),
60 |         summary=None,
61 |         content=(
62 |             Content(value='<p>Hello, world!</p>', type='text/html', language='en'),
63 |         ),
64 |         enclosures=(),
65 |     ),
66 | ]
67 | 


--------------------------------------------------------------------------------
/tests/data/full.rss:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" ?>
 2 | <rss version="2.0">
 3 | <channel>
 4 |     <title>RSS Title</title>
 5 |     <description>This is an example of an RSS feed</description>
 6 |     <link>http://www.example.com/main.html</link>
 7 |     <lastBuildDate>Mon, 06 Sep 2010 00:01:00 +0000 </lastBuildDate>
 8 |     <pubDate>Sun, 06 Sep 2009 16:20:00 +0000</pubDate>
 9 |     <author>Example editor (me@example.com)</author>
10 |     <ttl>1800</ttl>
11 | 
12 |     <item>
13 |         <title>Example entry</title>
14 |         <description>Here is some text containing an interesting description.</description>
15 |         <link>http://www.example.com/blog/post/1</link>
16 |         <guid isPermaLink="false">7bd204c6-1655-4c27-aeee-53f933c5395f</guid>
17 |         <pubDate>Sun, 06 Sep 2009 16:20:00 +0000</pubDate>
18 |         <author>Example editor</author>
19 |         <content>Example content</content>
20 |         <enclosure url="" />
21 |         <enclosure url="http://example.com/enclosure" />
22 |         <enclosure url="http://example.com/enclosure-with-type" type="image/jpeg" />
23 |         <enclosure url="http://example.com/enclosure-with-length" length="100000" />
24 |         <enclosure url="http://example.com/enclosure-with-bad-length" length="xyz" />
25 |         <source url="http://example.com/source.xml">Source Title</source>
26 |     </item>
27 | 
28 |     <item>
29 |         <title>Example entry, again</title>
30 |         <guid isPermaLink="false">00000000-1655-4c27-aeee-00000000</guid>
31 |         <pubDate>Sun, 06 Sep 2009 00:00:00 +0000</pubDate>
32 |     </item>
33 | 
34 |     <item>
35 |         <guid isPermaLink="false">source:only-url</guid>
36 |         <source url="only-url"></source>
37 |     </item>
38 |     <item>
39 |         <guid isPermaLink="false">source:only-title</guid>
40 |         <source>only-title</source>
41 |     </item>
42 |     <item>
43 |         <guid isPermaLink="false">source:empty</guid>
44 |         <source url="" unknown="unknown"></source>
45 |     </item>
46 | 
47 | </channel>
48 | </rss>
49 | 


--------------------------------------------------------------------------------
/tests/data/full.rss.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from reader import Content
 4 | from reader import Enclosure
 5 | from reader import EntrySource
 6 | from reader._types import EntryData
 7 | from reader._types import FeedData
 8 | 
 9 | 
10 | feed = FeedData(
11 |     url=f'{url_base}full.rss',
12 |     updated=datetime.datetime(2010, 9, 6, 0, 1, tzinfo=datetime.UTC),
13 |     title='RSS Title',
14 |     link='http://www.example.com/main.html',
15 |     author='Example editor (me@example.com)',
16 |     subtitle='This is an example of an RSS feed',
17 |     version='rss20',
18 | )
19 | 
20 | entries = [
21 |     EntryData(
22 |         feed_url=feed.url,
23 |         id='7bd204c6-1655-4c27-aeee-53f933c5395f',
24 |         updated=None,
25 |         title='Example entry',
26 |         link='http://www.example.com/blog/post/1',
27 |         author='Example editor',
28 |         published=datetime.datetime(2009, 9, 6, 16, 20, tzinfo=datetime.UTC),
29 |         summary='Here is some text containing an interesting description.',
30 |         content=(
31 |             # the text/plain type comes from feedparser
32 |             Content(value='Example content', type='text/plain'),
33 |         ),
34 |         enclosures=(
35 |             Enclosure(href='http://example.com/enclosure'),
36 |             Enclosure(href='http://example.com/enclosure-with-type', type='image/jpeg'),
37 |             Enclosure(href='http://example.com/enclosure-with-length', length=100000),
38 |             Enclosure(href='http://example.com/enclosure-with-bad-length'),
39 |         ),
40 |         source=EntrySource(url='http://example.com/source.xml', title='Source Title'),
41 |     ),
42 |     EntryData(
43 |         feed_url=feed.url,
44 |         id='00000000-1655-4c27-aeee-00000000',
45 |         updated=None,
46 |         published=datetime.datetime(2009, 9, 6, 0, 0, 0, tzinfo=datetime.UTC),
47 |         title='Example entry, again',
48 |     ),
49 |     EntryData(
50 |         feed_url=feed.url,
51 |         id='source:only-url',
52 |         source=EntrySource(url='only-url'),
53 |     ),
54 |     EntryData(
55 |         feed_url=feed.url,
56 |         id='source:only-title',
57 |         source=EntrySource(title='only-title'),
58 |     ),
59 |     EntryData(
60 |         feed_url=feed.url,
61 |         id='source:empty',
62 |     ),
63 | ]
64 | 


--------------------------------------------------------------------------------
/tests/data/invalid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "https://jsonfeed.org/version/1.1",
 3 |     "title": ["not a string"],
 4 |     "home_page_url": {"not": "a dict"},
 5 |     "feed_url": {"not": "a dict"},
 6 |     "authors": {"not": "a list"},
 7 |     "language": {"bad": "language"},
 8 |     "items": [
 9 |         {
10 |             "id": "2",
11 |             "title": 1000,
12 |             "content_text": ["not a string"],
13 |             "content_html": true,
14 |             "summary": true,
15 |             "date_published": "not a date",
16 |             "date_modified": {},
17 |             "url": 1.2,
18 |             "language": true,
19 |             "authors": "not a list",
20 |             "attachments": [
21 |                 {
22 |                     "url": "control",
23 |                     "duration_in_seconds": "no one cares"
24 |                 },
25 |                 {
26 |                     "url": "",
27 |                     "size_in_bytes": 12345678
28 |                 },
29 |                 {
30 |                     "mime_type": "audio/x-m4a",
31 |                     "size_in_bytes": 12345678
32 |                 },
33 |                 {
34 |                     "url": "float size",
35 |                     "size_in_bytes": 100.123456
36 |                 },
37 |                 {
38 |                     "url": "non-number size",
39 |                     "size_in_bytes": {}
40 |                 },
41 |                 "not a dict"
42 |             ]
43 |         },
44 |         {
45 |             "id": 3.1415,
46 |             "title": "float id",
47 |             "author": "not a dict"
48 |         },
49 |         {
50 |             "id": "author name",
51 |             "authors": [{"name": []}]
52 |         },
53 |         {
54 |             "id": "author url",
55 |             "authors": [{"url": []}]
56 |         },
57 |         {
58 |             "id": "author name fallback",
59 |             "authors": [{"name": [], "url": "mailto:joe@example.com"}]
60 |         },
61 |         {
62 |             "id": "author empty dict",
63 |             "authors": [{}]
64 |         },
65 |         {
66 |             "id": "author empty list",
67 |             "authors": []
68 |         },
69 |         {
70 |             "id": "second author is good",
71 |             "authors": ["not a dict", {"name": "Jane"}]
72 |         }
73 |     ]
74 | }
75 | 


--------------------------------------------------------------------------------
/tests/data/invalid.json.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from reader import Content
 4 | from reader import Enclosure
 5 | from reader._types import EntryData
 6 | from reader._types import FeedData
 7 | 
 8 | 
 9 | feed = FeedData(
10 |     url=f'{url_base}invalid.json',
11 |     version='json11',
12 | )
13 | 
14 | entries = [
15 |     EntryData(
16 |         feed_url=feed.url,
17 |         id='2',
18 |         updated=None,
19 |         enclosures=(
20 |             Enclosure(href='control'),
21 |             Enclosure(href='float size', length=100),
22 |             Enclosure(href='non-number size'),
23 |         ),
24 |     ),
25 |     EntryData(
26 |         feed_url=feed.url,
27 |         id='3.1415',
28 |         title='float id',
29 |         updated=None,
30 |     ),
31 |     EntryData(
32 |         feed_url=feed.url,
33 |         id='author name',
34 |         updated=None,
35 |     ),
36 |     EntryData(
37 |         feed_url=feed.url,
38 |         id='author url',
39 |         updated=None,
40 |     ),
41 |     EntryData(
42 |         feed_url=feed.url,
43 |         id='author name fallback',
44 |         updated=None,
45 |         author='mailto:joe@example.com',
46 |     ),
47 |     EntryData(
48 |         feed_url=feed.url,
49 |         id='author empty dict',
50 |         updated=None,
51 |     ),
52 |     EntryData(
53 |         feed_url=feed.url,
54 |         id='author empty list',
55 |         updated=None,
56 |     ),
57 |     EntryData(
58 |         feed_url=feed.url,
59 |         id='second author is good',
60 |         updated=None,
61 |         author='Jane',
62 |     ),
63 | ]
64 | 


--------------------------------------------------------------------------------
/tests/data/relative.atom:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 | 
 4 |     <link href="file.html"/>
 5 | 
 6 |     <entry>
 7 |         <link href="entries/entry.html"/>
 8 |         <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
 9 |         <link rel="enclosure" href="enclosure?q=a#fragment" />
10 | 
11 |         <summary type="html">one &lt;a href="target"&gt;two&lt;/a&gt; three</summary>
12 | 
13 |         <content type="text/plain">&lt;script&gt;evil&lt;/script&gt; content</content>
14 |         <content type="text/html">&lt;script&gt;evil&lt;/script&gt; content</content>
15 | 
16 |     </entry>
17 | 
18 | </feed>
19 | 


--------------------------------------------------------------------------------
/tests/data/relative.atom.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from reader import Content
 4 | from reader import Enclosure
 5 | from reader._types import EntryData
 6 | from reader._types import FeedData
 7 | 
 8 | 
 9 | feed = FeedData(
10 |     url=f'{url_base}relative.atom',
11 |     link=f'{rel_base}file.html',
12 |     version='atom10',
13 | )
14 | 
15 | entries = [
16 |     EntryData(
17 |         feed_url=feed.url,
18 |         id='urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a',
19 |         updated=None,
20 |         link=f'{rel_base}entries/entry.html',
21 |         summary=f'one <a href="{rel_base}target">two</a> three',
22 |         content=(
23 |             Content(
24 |                 value='<script>evil</script> content', type='text/plain', language=None
25 |             ),
26 |             Content(value='content', type='text/html', language=None),
27 |         ),
28 |         enclosures=(
29 |             # the text/html type comes from feedparser
30 |             Enclosure(href=f'{rel_base}enclosure?q=a#fragment', type='text/html'),
31 |         ),
32 |     )
33 | ]
34 | 


--------------------------------------------------------------------------------
/tests/data/relative.rss:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" ?>
 2 | <rss version="2.0">
 3 | <channel>
 4 |     <link>file.html</link>
 5 | 
 6 |     <item>
 7 |         <link>blog/post/1</link>
 8 |         <guid isPermaLink="true">7bd204c6-1655-4c27-aeee-53f933c5395f</guid>
 9 |         <enclosure url="enclosure?q=a#fragment" />
10 | 
11 |         <description>one &lt;a href="target"&gt;two&lt;/a&gt; three</description>
12 | 
13 |         <content type="text/plain">&lt;script&gt;evil&lt;/script&gt; content</content>
14 |         <content type="text/html">&lt;script&gt;evil&lt;/script&gt; content</content>
15 | 
16 |     </item>
17 | 
18 | </channel>
19 | </rss>
20 | 


--------------------------------------------------------------------------------
/tests/data/relative.rss.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from reader import Content
 4 | from reader import Enclosure
 5 | from reader._types import EntryData
 6 | from reader._types import FeedData
 7 | 
 8 | 
 9 | feed = FeedData(
10 |     url=f'{url_base}relative.rss',
11 |     link=f'{rel_base}file.html',
12 |     version='rss20',
13 | )
14 | 
15 | entries = [
16 |     EntryData(
17 |         feed_url=feed.url,
18 |         id=f'{rel_base}7bd204c6-1655-4c27-aeee-53f933c5395f',
19 |         updated=None,
20 |         link=f'{rel_base}blog/post/1',
21 |         summary=f'one <a href="{rel_base}target">two</a> three',
22 |         content=(
23 |             Content(
24 |                 value='<script>evil</script> content', type='text/plain', language=None
25 |             ),
26 |             Content(value='content', type='text/html', language=None),
27 |         ),
28 |         enclosures=(
29 |             # for RSS feedparser doesn't make relative links absolute
30 |             # (it does for Atom)
31 |             Enclosure(href='enclosure?q=a#fragment'),
32 |         ),
33 |     )
34 | ]
35 | 


--------------------------------------------------------------------------------
/tests/data/sqlite_releases.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html><head>
 3 | <meta http-equiv="content-type" content="text/html; charset=UTF-8">
 4 | <title>Release History Of SQLite</title>
 5 | <!-- path= -->
 6 | </head>
 7 | <body>
 8 | 
 9 | <h1 align=center>Release History</h1>
10 | 
11 | <p>
12 | This page provides a high-level summary of changes to SQLite.
13 | </p>
14 | 
15 | <a name="version_3_34_1"></a>
16 | <h3>2021-01-20 (3.34.1)</h3>
17 | Fix a potential use-after-free bug.
18 | 
19 | <a name="version_3_34_0"></a>
20 | <h3>2020-12-01 (3.34.0)</h3><p>
21 | Added the <a href="c3ref/txn_state.html">sqlite3_txn_state()</a> interface.
22 | </p>
23 | 
24 | <h3>2000-05-30</h3>
25 | Added the <b>LIKE</b> operator.
26 | <h3>2000-05-29</h3>
27 | Initial Public Release of Alpha code
28 | 


--------------------------------------------------------------------------------
/tests/data/unknown.json:
--------------------------------------------------------------------------------
1 | {
2 |     "version": "https://jsonfeed.org/version/1234",
3 |     "items": []
4 | }
5 | 


--------------------------------------------------------------------------------
/tests/data/unknown.json.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from reader import Content
 4 | from reader import Enclosure
 5 | from reader._types import EntryData
 6 | from reader._types import FeedData
 7 | 
 8 | 
 9 | feed = FeedData(
10 |     url=f'{url_base}unknown.json',
11 |     version='json',
12 | )
13 | 
14 | entries = []
15 | 


--------------------------------------------------------------------------------
/tests/reader_test_plugins/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemon24/reader/bcc46c54d304d693a9031666ac68b6fa1fe5f6c1/tests/reader_test_plugins/__init__.py


--------------------------------------------------------------------------------
/tests/reader_test_plugins/good.py:
--------------------------------------------------------------------------------
1 | def init_reader(reader):
2 |     pass
3 | 


--------------------------------------------------------------------------------
/tests/reader_test_plugins/init_error.py:
--------------------------------------------------------------------------------
1 | def init_reader(reader):
2 |     raise ValueError('someerror')
3 | 


--------------------------------------------------------------------------------
/tests/reader_test_plugins/missing_dependency.py:
--------------------------------------------------------------------------------
1 | import some_module_that_doesnt_exist_random_number_to_avoid_collisions_4
2 | 


--------------------------------------------------------------------------------
/tests/reader_test_plugins/missing_entry_point.py:
--------------------------------------------------------------------------------
1 | # no init_reader() here
2 | 


--------------------------------------------------------------------------------
/tests/test__utils.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from reader._utils import deprecated
  4 | from reader._utils import deprecated_wrapper
  5 | 
  6 | 
  7 | # Normally, the stuff in _utils is tested by tests for higher level code,
  8 | # but some of the things aren't always used.
  9 | 
 10 | 
 11 | def test_deprecated_wrapper():
 12 |     def new(arg):
 13 |         raise ValueError(arg)
 14 | 
 15 |     old = deprecated_wrapper('old', new, '1.0', '2.0')
 16 | 
 17 |     _check_deprecated(old)
 18 | 
 19 | 
 20 | def test_deprecated():
 21 |     @deprecated('new', '1.0', '2.0')
 22 |     def old(arg):
 23 |         "docstring"
 24 |         raise ValueError(arg)
 25 | 
 26 |     assert '\n\ndocstring\n\n' in old.__doc__
 27 | 
 28 |     _check_deprecated(old)
 29 | 
 30 | 
 31 | def test_deprecated_property():
 32 |     class Class:
 33 |         @property
 34 |         @deprecated('new', '1.0', '2.0', property=True)
 35 |         def old(self):
 36 |             "docstring"
 37 |             raise ValueError()
 38 | 
 39 |     with pytest.raises(ValueError), pytest.deprecated_call() as warnings:
 40 |         Class().old
 41 | 
 42 |     assert Class.old.fget.__name__ == 'old'
 43 |     assert Class.old.fget.__doc__ == (
 44 |         'Deprecated variant of :attr:`new`.\n\n'
 45 |         'docstring\n'
 46 |         '\n'
 47 |         '.. deprecated:: 1.0\n'
 48 |         '    This property will be removed in *reader* 2.0.\n'
 49 |         '    Use :attr:`new` instead.\n\n'
 50 |     )
 51 | 
 52 |     warning = warnings.pop()
 53 | 
 54 |     assert (
 55 |         str(warning.message)
 56 |         == 'old is deprecated and will be removed in reader 2.0. Use new instead.'
 57 |     )
 58 | 
 59 | 
 60 | def _check_deprecated(old):
 61 |     with pytest.raises(ValueError) as excinfo, pytest.deprecated_call() as warnings:
 62 |         old('whatever')
 63 | 
 64 |     assert excinfo.value.args[0] == 'whatever'
 65 | 
 66 |     assert old.__name__ == 'old'
 67 |     assert old.__doc__.startswith('Deprecated alias for :meth:`new`.\n\n')
 68 |     assert old.__doc__.endswith(
 69 |         '\n'
 70 |         '.. deprecated:: 1.0\n'
 71 |         '    This method will be removed in *reader* 2.0.\n'
 72 |         '    Use :meth:`new` instead.\n\n'
 73 |     )
 74 | 
 75 |     assert len(warnings.list) == 1
 76 |     warning = warnings.pop()
 77 | 
 78 |     assert warning.category is DeprecationWarning
 79 |     assert (
 80 |         str(warning.message)
 81 |         == 'old() is deprecated and will be removed in reader 2.0. Use new() instead.'
 82 |     )
 83 | 
 84 | 
 85 | def test_better_str_partial():
 86 |     from reader._utils import BetterStrPartial as partial
 87 | 
 88 |     def fn():
 89 |         pass
 90 | 
 91 |     assert str(partial(fn, 1, two=2)) == "fn(1, two=2)"
 92 | 
 93 |     fn.__name__ = ''
 94 |     assert str(partial(fn, 1)) == "<noname>(1)"
 95 | 
 96 |     class Cls:
 97 |         def meth(self):
 98 |             pass
 99 | 
100 |     assert str(partial(Cls.meth, two=2)) == 'meth(two=2)'
101 |     assert str(partial(Cls().meth, two=2)) == 'meth(two=2)'
102 | 


--------------------------------------------------------------------------------
/tests/test_app_wsgi.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def dummy_plugin(reader):
 5 |     reader._dummy_was_here = True
 6 | 
 7 | 
 8 | def test_app_wsgi(monkeypatch, db_path):
 9 |     # This assumes no-one else imports reader._app.wsgi.app.
10 |     # Also, further imports will yield the same app from this test.
11 |     monkeypatch.setitem(os.environ, 'READER_DB', db_path)
12 |     monkeypatch.setitem(os.environ, 'READER_PLUGIN', 'test_app_wsgi:dummy_plugin')
13 |     monkeypatch.setitem(os.environ, 'READER_APP_PLUGIN', 'test_app_wsgi:dummy_plugin')
14 | 
15 |     from reader._app import get_reader
16 |     from reader._app.wsgi import app
17 | 
18 |     with app.app_context():
19 |         assert get_reader()._dummy_was_here
20 | 
21 |     assert app._dummy_was_here
22 | 


--------------------------------------------------------------------------------
/tests/test_bench.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import sys
 3 | 
 4 | import pytest
 5 | from click.testing import CliRunner
 6 | 
 7 | from reader import make_reader
 8 | from test_cli import patch_app_dir
 9 | from test_reader_filter import setup_reader_for_tags
10 | 
11 | 
12 | root_dir = os.path.dirname(__file__)
13 | sys.path.insert(0, os.path.join(root_dir, '../scripts'))
14 | import bench
15 | from bench import cli
16 | 
17 | 
18 | pytestmark = pytest.mark.slow
19 | pytest.importorskip("numpy")
20 | 
21 | 
22 | @pytest.fixture(scope='module')
23 | def db_path(tmp_path_factory):
24 |     dir = tmp_path_factory.mktemp("data")
25 |     db_path = str(dir.joinpath('db.sqlite'))
26 |     with make_reader(db_path) as reader:
27 |         setup_reader_for_tags(reader)
28 |     return db_path
29 | 
30 | 
31 | @pytest.mark.parametrize('command', [['time', '-n1'], ['profile']])
32 | def test_commands_work(command, db_path):
33 |     runner = CliRunner()
34 |     result = runner.invoke(
35 |         cli, command + ['--db', db_path] + ['get_entries_all', 'show']
36 |     )
37 |     assert result.exit_code == 0, result.exception
38 | 
39 | 
40 | def test_list():
41 |     runner = CliRunner()
42 |     result = runner.invoke(cli, ['list'])
43 |     assert 'get_entries_all' in result.output.splitlines()
44 |     assert 'show' in result.output.split()
45 | 


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from reader._config import Config
  4 | 
  5 | 
  6 | CONFIG_INIT_DATA = [
  7 |     (Config({}), {'default': {}}),
  8 |     (Config({}, sections={'cli', 'app'}), {'default': {}, 'cli': {}, 'app': {}}),
  9 |     (
 10 |         Config({'reader': {'k': 'v'}}, sections={'cli', 'app'}),
 11 |         {'default': {'reader': {'k': 'v'}}, 'cli': {}, 'app': {}},
 12 |     ),
 13 |     (
 14 |         Config({'default': {'reader': {'k': 'v'}}}, sections={'cli', 'app'}),
 15 |         {'default': {'reader': {'k': 'v'}}, 'cli': {}, 'app': {}},
 16 |     ),
 17 | ]
 18 | 
 19 | 
 20 | @pytest.mark.parametrize('config, data', CONFIG_INIT_DATA)
 21 | def test_config_init(config, data):
 22 |     assert config.data == data
 23 | 
 24 | 
 25 | def test_config_init_error():
 26 |     with pytest.raises(ValueError):
 27 |         Config({'default': {'reader': {}}, 'reader': {}})
 28 | 
 29 | 
 30 | def test_config_merged():
 31 |     config = Config(
 32 |         {
 33 |             'url': 'default-url',
 34 |             'plugins': {'default-plugin': None, 'another-plugin': 1},
 35 |             'cli': {'url': 'cli-url'},
 36 |             'app': {'plugins': {'app-plugin': None, 'another-plugin': 2}},
 37 |         },
 38 |         sections={'cli', 'app'},
 39 |         merge_keys={
 40 |             'plugins',
 41 |         },
 42 |     )
 43 | 
 44 |     assert config.merged('cli') == {
 45 |         'url': 'cli-url',
 46 |         'plugins': {'default-plugin': None, 'another-plugin': 1},
 47 |     }
 48 | 
 49 |     assert config.merged('app') == {
 50 |         'url': 'default-url',
 51 |         'plugins': {'default-plugin': None, 'another-plugin': 2, 'app-plugin': None},
 52 |     }
 53 | 
 54 | 
 55 | def test_config_merged_recursive():
 56 |     config = Config(
 57 |         {
 58 |             'reader': {'plugins': {'default-reader-plugin': None}},
 59 |             'plugins': {'default-plugin': None},
 60 |             'app': {
 61 |                 'reader': {'plugins': {'app-reader-plugin': None}},
 62 |                 'plugins': {'app-plugin': None},
 63 |             },
 64 |         },
 65 |         sections={
 66 |             'app',
 67 |         },
 68 |         merge_keys={'reader', 'plugins'},
 69 |     )
 70 |     assert config.merged('app') == {
 71 |         'reader': {
 72 |             'plugins': {'default-reader-plugin': None, 'app-reader-plugin': None}
 73 |         },
 74 |         'plugins': {'default-plugin': None, 'app-plugin': None},
 75 |     }
 76 | 
 77 | 
 78 | def test_config_all():
 79 |     config = Config(
 80 |         {
 81 |             'url': 'default-url',
 82 |             'nested': {'default-key': 'default-nested'},
 83 |             'cli': {
 84 |                 'url': 'cli-url',
 85 |                 'nested': {'cli-key': 'cli-nested'},
 86 |             },
 87 |         },
 88 |         sections={'cli', 'app'},
 89 |         merge_keys={
 90 |             'nested',
 91 |         },
 92 |     )
 93 | 
 94 |     config.all['url'] = 'new-url'
 95 |     assert config.data == {
 96 |         'default': {
 97 |             'url': 'new-url',
 98 |             'nested': {'default-key': 'default-nested'},
 99 |         },
100 |         'cli': {
101 |             'url': 'new-url',
102 |             'nested': {'cli-key': 'cli-nested'},
103 |         },
104 |         'app': {
105 |             'url': 'new-url',
106 |         },
107 |     }
108 | 
109 |     config.all['nested'] = {'new-key': 'new-value'}
110 |     assert config.data == dict.fromkeys(
111 |         ('default', 'cli', 'app'),
112 |         {
113 |             'url': 'new-url',
114 |             'nested': {'new-key': 'new-value'},
115 |         },
116 |     )
117 | 


--------------------------------------------------------------------------------
/tests/test_exceptions.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | 
  3 | import pytest
  4 | 
  5 | from reader import EntryError
  6 | from reader import FeedError
  7 | from reader import SingleUpdateHookError
  8 | from reader import TagError
  9 | from reader import UpdateHookErrorGroup
 10 | from reader.exceptions import _FancyExceptionBase
 11 | 
 12 | 
 13 | def test_fancy_exception_base():
 14 |     exc = _FancyExceptionBase('message')
 15 |     assert str(exc) == 'message'
 16 | 
 17 |     exc = _FancyExceptionBase(message='message')
 18 |     assert str(exc) == 'message'
 19 | 
 20 |     cause = Exception('cause')
 21 | 
 22 |     exc = _FancyExceptionBase('message')
 23 |     exc.__cause__ = cause
 24 |     pickled_exc = pickle.dumps(exc)
 25 |     assert str(exc) == 'message: builtins.Exception: cause'
 26 |     assert str(exc) == str(pickle.loads(pickled_exc))
 27 | 
 28 |     class WithURL(_FancyExceptionBase):
 29 |         _default_message = 'default message'
 30 | 
 31 |         def __init__(self, url, **kwargs):
 32 |             super().__init__(**kwargs)
 33 |             self.url = url
 34 | 
 35 |         @property
 36 |         def _str(self):
 37 |             return self.url.upper()
 38 | 
 39 |     exc = WithURL('url')
 40 |     assert str(exc) == 'default message: URL'
 41 | 
 42 |     exc = WithURL('url', message='another message')
 43 |     exc.__cause__ = cause
 44 |     assert str(exc) == 'another message: URL: builtins.Exception: cause'
 45 | 
 46 | 
 47 | def _all_classes(cls):
 48 |     yield cls
 49 |     for subclass in cls.__subclasses__():
 50 |         yield from _all_classes(subclass)
 51 | 
 52 | 
 53 | def all_classes(*args, **kwargs):
 54 |     return list(_all_classes(*args, **kwargs))
 55 | 
 56 | 
 57 | @pytest.mark.parametrize('exc_type', all_classes(FeedError))
 58 | def test_feed_error_str(exc_type):
 59 |     exc = exc_type('url')
 60 |     assert repr('url') in str(exc)
 61 | 
 62 | 
 63 | @pytest.mark.parametrize('exc_type', all_classes(EntryError))
 64 | def test_entry_error_str(exc_type):
 65 |     exc = exc_type('url', 'id')
 66 |     assert repr(('url', 'id')) in str(exc)
 67 | 
 68 | 
 69 | @pytest.mark.parametrize('exc_type', all_classes(TagError))
 70 | def test_tag_error_str(exc_type):
 71 |     exc = exc_type(('object',), 'key')
 72 |     assert "'object': 'key'" in str(exc)
 73 | 
 74 | 
 75 | @pytest.mark.parametrize(
 76 |     'args, expected',
 77 |     [
 78 |         (
 79 |             ('before_feeds_update', 'myhook'),
 80 |             "unexpected hook error: before_feeds_update: 'myhook'",
 81 |         ),
 82 |         (
 83 |             ('before_feeds_update', 'myhook', ()),
 84 |             "unexpected hook error: before_feeds_update: 'myhook': ()",
 85 |         ),
 86 |         (
 87 |             ('before_feed_update', 'myhook', ('feed',)),
 88 |             "unexpected hook error: before_feed_update: 'myhook': 'feed'",
 89 |         ),
 90 |         (
 91 |             ('after_entry_update', 'myhook', ('feed', 'entry')),
 92 |             "unexpected hook error: after_entry_update: 'myhook': ('feed', 'entry')",
 93 |         ),
 94 |     ],
 95 | )
 96 | def test_single_update_hook_error_str(args, expected):
 97 |     exc = SingleUpdateHookError(*args)
 98 |     assert str(exc) == expected
 99 |     exc = SingleUpdateHookError(*args)
100 |     exc.__cause__ = Exception('cause')
101 |     assert str(exc) == expected + ": builtins.Exception: cause"
102 | 
103 | 
104 | def test_update_hook_error_group():
105 |     one = SingleUpdateHookError('before_feeds_update', 'one')
106 |     two = SingleUpdateHookError('before_feeds_update', 'two')
107 | 
108 |     group = UpdateHookErrorGroup('message', [one])
109 |     assert group.message == 'message'
110 |     assert group.exceptions == (one,)
111 | 
112 |     derived = group.derive([two])
113 |     assert derived.message == 'message'
114 |     assert derived.exceptions == (two,)
115 | 
116 |     with pytest.raises(TypeError):
117 |         UpdateHookErrorGroup('message', [Exception()])
118 |     with pytest.raises(TypeError):
119 |         group.derive([Exception()])
120 | 


--------------------------------------------------------------------------------
/tests/test_hash_utils.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from datetime import datetime
 3 | 
 4 | import pytest
 5 | 
 6 | from reader._hash_utils import get_hash
 7 | 
 8 | 
 9 | @dataclass
10 | class DataOne:
11 |     one: object
12 |     two: object = None
13 | 
14 | 
15 | @dataclass
16 | class DataTwo:
17 |     one: object
18 |     two: object = None
19 |     three: object = None
20 | 
21 | 
22 | def two_factory(one, value):
23 |     return DataTwo(one, three=value)
24 | 
25 | 
26 | @dataclass
27 | class DataThree:
28 |     one: object
29 |     two: object = None
30 |     _hash_exclude_ = frozenset(
31 |         {
32 |             'one',
33 |         }
34 |     )
35 | 
36 | 
37 | @pytest.mark.parametrize('value', ['', [], (), {}, None])
38 | @pytest.mark.parametrize('factory', [DataOne, DataTwo, two_factory])
39 | def test_empty(value, factory):
40 |     assert get_hash(DataOne(1)) == get_hash(factory(1, value))
41 |     assert get_hash(DataOne(1, factory(2, value))) == get_hash(
42 |         DataOne(1, factory(2, value))
43 |     )
44 | 
45 | 
46 | @pytest.mark.parametrize(
47 |     'thing, hash',
48 |     [
49 |         (None, b'\x007\xa6%\x9c\xc0\xc1\xda\xe2\x99\xa7\x86d\x89\xdf\xf0'),
50 |         (True, b'\x00\xb3&\xb5\x06+/\x0ei\x04h\x10qu4\xcb'),
51 |         (1, b'\x00\xc4\xcaB8\xa0\xb9#\x82\r\xccP\x9aou\x84'),
52 |         ('str', b'\x00v~-y\x12\xeb\xef\xdf\xe1\x84\x95\xedSc_'),
53 |         (['list'], b'\x00\xe1y\x01T;\x817\x06\x03\xeb\x03\x07\xf4\xed\xc5'),
54 |         (('tuple',), b'\x00\x95\xab\xbex\xc6\xff@\xdd\x02\xd5N\\\\\xbbY'),
55 |         ({'key': 'value'}, b"\x00\xa75?|\xdd\xce\x80\x8d\xe0\x03'G\xa0\xb7\xbe"),
56 |         (DataOne(1, 2), b'\x00\xbd]\x03\xe5\x0c\xca\xc3\xae\x17\xf1\x84\x01R@c'),
57 |         (DataTwo(1, 2), b'\x00\xbd]\x03\xe5\x0c\xca\xc3\xae\x17\xf1\x84\x01R@c'),
58 |         (DataOne(1, DataTwo(2)), b'\x00\xc4[\xfcY0\xffJ--\xb6\xd1M\xd7(\x8f'),
59 |         (
60 |             DataOne(1, [DataTwo(2), 3, datetime(2021, 1, 2)]),
61 |             b'\x00uU\xb7\xf7\x18\xfa\x06\x98h\x82\xeb\xfd\xdc\xbd.',
62 |         ),
63 |         (
64 |             DataOne(1, {'key': DataTwo(datetime(2021, 1, 2))}),
65 |             b'\x00\xc82CV\xed\xff.\x8d\x9e5&\xbc\xd4e/',
66 |         ),
67 |     ],
68 | )
69 | def test_hash(thing, hash):
70 |     assert get_hash(thing) == hash
71 | 
72 | 
73 | @pytest.mark.parametrize('thing', [object(), str, {1, 2}, b'ab'])
74 | def test_hash_error(thing):
75 |     with pytest.raises(TypeError):
76 |         get_hash(DataOne(thing))
77 |     with pytest.raises(TypeError):
78 |         get_hash(DataOne)
79 | 
80 | 
81 | def test_exclude():
82 |     assert get_hash(DataTwo(None, 2)) == get_hash(DataThree(1, 2))
83 |     assert get_hash(DataTwo(1, 2)) != get_hash(DataThree(1, 2))
84 |     assert get_hash(DataOne(DataTwo(None, 2), 'one')) == get_hash(
85 |         DataOne(DataThree(1, 2), 'one')
86 |     )
87 | 


--------------------------------------------------------------------------------
/tests/test_html_utils.py:
--------------------------------------------------------------------------------
 1 | import bs4
 2 | import pytest
 3 | 
 4 | from reader._storage._html_utils import strip_html
 5 | 
 6 | 
 7 | STRIP_HTML_DATA = [
 8 |     ('', ''),
 9 |     ('<br>', ''),
10 |     ('aabb', 'aabb'),
11 |     ('aa<br>bb', 'aa\nbb'),
12 |     ('aa<p>bb', 'aa\nbb'),
13 |     ('<script>ss</script>bb', 'bb'),
14 |     ('<noscript>ss</noscript>bb', 'bb'),
15 |     ('<style>ss</style>bb', 'bb'),
16 |     ('<title>ss</title>bb', 'bb'),
17 |     ('aa<script>ss</script>bb', 'aa\nbb'),
18 |     ('aa<noscript>ss</noscript>bb', 'aa\nbb'),
19 |     ('aa<style>ss</style>bb', 'aa\nbb'),
20 |     ('aa<title>tt</title>bb', 'aa\nbb'),
21 |     ('<head><script>ss</script></head>bb', 'bb'),
22 |     ('<head><noscript>ss</noscript>bb', 'bb'),
23 |     ('<head><style>ss</style></head>bb', 'bb'),
24 |     ('<head><title>tt</title>bb', 'bb'),
25 |     ('<head>aa<script>ss</script>bb', 'aa\nbb'),
26 |     ('<head>aa<noscript>ss</noscript></head>bb', 'aa\nbb'),
27 |     ('<head>aa<style>ss</style>bb', 'aa\nbb'),
28 |     ('<head>aa<title>tt</title></head>bb', 'aa\nbb'),
29 |     (
30 |         """
31 |         <head>
32 |             aa
33 |             <title>tt</title>
34 |             <p>bb
35 |             <script>ss</script>
36 |             <b>cc
37 |             <noscript>nn</noscript>
38 |             <style>ss</style>
39 |             dd
40 |         </head>
41 |         ee
42 |         """,
43 |         'aa\nbb\ncc\ndd\nee',
44 |     ),
45 | ]
46 | 
47 | 
48 | # We test all bs4 parsers, since we don't know/care what the user has installed.
49 | @pytest.mark.parametrize(
50 |     'features',
51 |     [
52 |         None,
53 |         pytest.param('lxml', marks=pytest.mark.requires_lxml),
54 |         'html.parser',
55 |         'html5lib',
56 |     ],
57 | )
58 | @pytest.mark.parametrize('input, expected_output', STRIP_HTML_DATA)
59 | def test_strip_html(input, expected_output, features):
60 |     output = strip_html(input, features)
61 |     if isinstance(output, str):
62 |         output = '\n'.join(output.split())
63 | 
64 |     # Special-case different <noscript> handling by html.parser/html5lib.
65 |     # https://www.crummy.com/software/BeautifulSoup/bs4/doc/#differences-between-parsers
66 |     default_builder_is_lxml = 'lxml' in type(bs4.BeautifulSoup('').builder).__module__
67 |     bad_at_noscript = features in {'html.parser', 'html5lib'} or (
68 |         features is None and not default_builder_is_lxml
69 |     )
70 |     if bad_at_noscript and '<noscript>' in input:
71 |         assert '<noscript>' not in output
72 |         return
73 | 
74 |     assert output == expected_output
75 | 


--------------------------------------------------------------------------------
/tests/test_lazy_imports.py:
--------------------------------------------------------------------------------
  1 | """
  2 | See https://github.com/lemon24/reader/issues/297 for details.
  3 | 
  4 | """
  5 | 
  6 | import os
  7 | import pkgutil
  8 | import subprocess
  9 | import sys
 10 | import textwrap
 11 | 
 12 | import pytest
 13 | 
 14 | import reader.plugins
 15 | from utils import parametrize_dict
 16 | 
 17 | 
 18 | # these tests take ~1s in total
 19 | pytestmark = pytest.mark.slow
 20 | 
 21 | 
 22 | ALL_PLUGINS = [
 23 |     'reader.' + m.name for m in pkgutil.iter_modules(reader.plugins.__path__)
 24 | ]
 25 | 
 26 | 
 27 | CODE_FMT = f"""
 28 | from reader import make_reader
 29 | 
 30 | # "maximal" reader
 31 | reader = make_reader(
 32 |     ':memory:',
 33 |     feed_root='',
 34 |     search_enabled=True,
 35 |     plugins={ALL_PLUGINS},
 36 | )
 37 | 
 38 | {{code}}
 39 | 
 40 | import sys
 41 | print(*sys.modules)
 42 | """
 43 | 
 44 | 
 45 | def get_imported_modules(code):
 46 |     # we don't want pytest-cov importing stuff in the subprocess
 47 |     # https://pytest-cov.readthedocs.io/en/latest/subprocess-support.html
 48 |     # https://github.com/pytest-dev/pytest-cov/blob/v4.0.0/src/pytest-cov.embed
 49 |     env = dict(os.environ)
 50 |     for k in list(env):
 51 |         if k.startswith('COV_CORE_'):
 52 |             env.pop(k)
 53 | 
 54 |     process = subprocess.run(
 55 |         [sys.executable, '-c', CODE_FMT.format(code=textwrap.dedent(code))],
 56 |         capture_output=True,
 57 |         text=True,
 58 |         timeout=10,
 59 |         env=env,
 60 |     )
 61 |     assert process.returncode == 0, process.stderr
 62 | 
 63 |     return process.stdout.split()
 64 | 
 65 | 
 66 | LAZY_MODULES = frozenset(
 67 |     """\
 68 |     bs4
 69 |     requests
 70 |     feedparser
 71 |     reader._vendor.feedparser
 72 |     urllib.request
 73 |     multiprocessing
 74 |     concurrent.futures
 75 |     """.split()
 76 | )
 77 | 
 78 | 
 79 | # all in a single script to save time
 80 | 
 81 | S_NO_IMPORTS = """\
 82 | list(reader.get_entries())
 83 | list(reader.search_entries('entry'))
 84 | reader._parser.session_factory.response_hooks.append('unused')
 85 | """, set()  # fmt: skip
 86 | 
 87 | 
 88 | # urllib.request being imported by requests/bs4 makes these kinda brittle, but eh...
 89 | 
 90 | S_ADD_HTTP = "reader.add_feed('http://example.com')", {
 91 |     'requests',
 92 |     'reader._vendor.feedparser',
 93 |     'urllib.request',
 94 | }
 95 | S_UPDATE_FEEDS = "reader.update_feeds()", {
 96 |     'requests',
 97 |     'reader._vendor.feedparser',
 98 |     'urllib.request',
 99 | }
100 | S_UPDATE_FEEDS_WORKERS = "reader.update_feeds(workers=2)", {
101 |     'requests',
102 |     'reader._vendor.feedparser',
103 |     'urllib.request',
104 |     'concurrent.futures',
105 | }
106 | S_UPDATE_SEARCH = """\
107 | from reader._types import EntryData, EntryUpdateIntent
108 | from datetime import datetime, timezone
109 | reader.add_feed('one', allow_invalid_url=True)
110 | dt = datetime(2010, 1, 1, tzinfo=timezone.utc)
111 | entry = EntryData('one', 'entry', summary='summary')
112 | reader._storage.add_or_update_entry(EntryUpdateIntent(entry, dt, dt, dt, dt))
113 | reader.update_search()
114 | """, {
115 |     'bs4',
116 |     'urllib.request',
117 | }
118 | 
119 | 
120 | SNIPPETS = {k: v for k, v in locals().items() if k.startswith('S_')}
121 | 
122 | 
123 | @parametrize_dict('code, expected_modules', SNIPPETS)
124 | def test_only_expected_modules_are_imported(code, expected_modules):
125 |     modules = set(get_imported_modules(code))
126 |     actual_modules = LAZY_MODULES & modules
127 | 
128 |     # sanity check
129 |     assert 'reader' in modules
130 | 
131 |     # not using == because imports can vary based on library versions,
132 |     # and we care more about slow stuff being imported accidentally
133 |     # than the other way around
134 |     # https://github.com/lemon24/reader/issues/349
135 |     assert actual_modules <= expected_modules, expected_modules
136 | 


--------------------------------------------------------------------------------
/tests/test_plugins_cli_status.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | import pytest
  4 | from click.testing import CliRunner
  5 | 
  6 | from reader._cli import cli
  7 | from test_cli import patch_app_dir
  8 | from utils import utc_datetime as datetime
  9 | 
 10 | 
 11 | @pytest.mark.slow
 12 | def test_cli_status(db_path, data_dir, make_reader, monkeypatch):
 13 |     monkeypatch.setattr('sys.argv', ['zero', 'one', 'two'])
 14 | 
 15 |     runner = CliRunner()
 16 | 
 17 |     def invoke(*args):
 18 |         return runner.invoke(
 19 |             cli,
 20 |             ('--db', db_path, '--feed-root', str(data_dir)) + args,
 21 |             catch_exceptions=False,
 22 |         )
 23 | 
 24 |     reader = make_reader(db_path, feed_root=str(data_dir))
 25 |     reader.add_feed('full.rss')
 26 | 
 27 |     monkeypatch.setattr(
 28 |         'reader.Reader._now', staticmethod(lambda: datetime(2010, 1, 1, 0, 2))
 29 |     )
 30 |     result = invoke(
 31 |         '--cli-plugin', 'reader._plugins.cli_status.init_cli', 'update', '-v'
 32 |     )
 33 |     assert result.exit_code == 0, result.output
 34 |     assert 'full.rss' in result.output
 35 | 
 36 |     entry = reader.get_entry(('reader:status', 'command: update'))
 37 |     assert entry.title == 'command: update'
 38 |     assert result.output in entry.content[0].value
 39 | 
 40 |     assert entry.content[0].value.replace(result.output, '<OUTPUT>\n') == OUTPUT
 41 | 
 42 | 
 43 | OUTPUT = """\
 44 | # 2010-01-01 00:02:00
 45 | 
 46 | OK
 47 | 
 48 | <OUTPUT>
 49 | 
 50 | 
 51 | """
 52 | 
 53 | 
 54 | @pytest.mark.slow
 55 | def test_many_runs(db_path, make_reader, monkeypatch):
 56 |     runner = CliRunner()
 57 | 
 58 |     def invoke(*args):
 59 |         return runner.invoke(cli, ('--db', db_path) + args, catch_exceptions=False)
 60 | 
 61 |     reader = make_reader(db_path)
 62 | 
 63 |     nows = [
 64 |         datetime(2009, 12, 31, 22, 59),
 65 |         datetime(2009, 12, 31, 23),
 66 |         datetime(2010, 1, 1),
 67 |         datetime(2010, 1, 1, 0, 2),
 68 |         datetime(2010, 1, 1, 0, 59),
 69 |         datetime(2010, 1, 1, 23),
 70 |     ]
 71 | 
 72 |     for now in nows:
 73 |         monkeypatch.setattr('reader.Reader._now', staticmethod(lambda: now))
 74 |         result = invoke(
 75 |             '--cli-plugin',
 76 |             'reader._plugins.cli_status.init_cli',
 77 |             'update',
 78 |         )
 79 |         assert result.exit_code == 0, result.output
 80 | 
 81 |     (entry,) = reader.get_entries(feed='reader:status')
 82 | 
 83 |     assert entry.id == "command: update"
 84 |     assert entry.updated == datetime(2010, 1, 1, 23)
 85 | 
 86 |     value = entry.content[0].value
 87 | 
 88 |     assert re.findall('^# .*', value, re.M) == [
 89 |         '# 2010-01-01 23:00:00',
 90 |         '# 2010-01-01 00:59:00',
 91 |         '# 2010-01-01 00:02:00',
 92 |         '# 2010-01-01 00:00:00',
 93 |         '# 2009-12-31 23:00:00',
 94 |     ]
 95 |     # '# 2009-12-31 22:59:00' gets deleted!
 96 | 
 97 |     assert value.replace(result.stdout, '<OUTPUT>\n').startswith(MANY_RUNS_PREFIX)
 98 | 
 99 | 
100 | MANY_RUNS_PREFIX = """\
101 | # 2010-01-01 23:00:00
102 | 
103 | OK
104 | 
105 | <OUTPUT>
106 | 
107 | 
108 | # 2010-01-01 00:59:00
109 | 
110 | OK
111 | 
112 | <OUTPUT>
113 | 
114 | 
115 | """
116 | 


--------------------------------------------------------------------------------
/tests/test_plugins_enclosure_dedupe.py:
--------------------------------------------------------------------------------
 1 | from reader import Enclosure
 2 | from utils import utc_datetime as datetime
 3 | 
 4 | 
 5 | def test_plugin(make_reader, parser):
 6 |     reader = make_reader(':memory:', plugins=['reader.enclosure_dedupe'])
 7 | 
 8 |     feed = parser.feed(1, datetime(2010, 1, 1))
 9 |     one = parser.entry(1, 1, datetime(2010, 1, 1))
10 |     two = parser.entry(
11 |         1,
12 |         2,
13 |         datetime(2010, 1, 1),
14 |         enclosures=(Enclosure('href'), Enclosure('another one')),
15 |     )
16 |     three = parser.entry(
17 |         1,
18 |         3,
19 |         datetime(2010, 1, 1),
20 |         enclosures=(Enclosure('href', 'text', 1), Enclosure('href', 'json', 2)),
21 |     )
22 | 
23 |     reader.add_feed(feed.url)
24 |     reader.update_feeds()
25 | 
26 |     assert {(e.id, e.enclosures) for e in reader.get_entries()} == {
27 |         (one.id, one.enclosures),
28 |         (two.id, two.enclosures),
29 |         (three.id, (Enclosure('href', 'text', 1),)),
30 |     }
31 | 


--------------------------------------------------------------------------------
/tests/test_plugins_preview_feed_list.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from reader._plugins.preview_feed_list import get_alternates
  4 | from test_app import make_app
  5 | from test_app import make_browser
  6 | from test_app import pytestmark
  7 | 
  8 | 
  9 | pytestmark = list(pytestmark)
 10 | pytestmark.append(
 11 |     pytest.mark.filterwarnings("ignore:No parser was explicitly specified")
 12 | )
 13 | 
 14 | 
 15 | MAIN_IN = """\
 16 | <link rel=alternate type=rss href=1 />
 17 | <link rel=alternate title=rss href=2 />
 18 | <link rel=alternate type=nope title=feed href=3 />
 19 | <link rel=alternate href=some/rss.xml />
 20 | <meta name=alternate type=whatever/atom href=4 />
 21 | <link rel=alternative type=feed href=5 />
 22 | 
 23 | <link rel=alternate title=nope href=nope />
 24 | <link rel=alternate title=also-nope />
 25 | 
 26 | """
 27 | 
 28 | MAIN_OUT = [
 29 |     {'href': '1', 'type': 'rss'},
 30 |     {'href': '2', 'title': 'rss'},
 31 |     {'href': '3', 'title': 'feed', 'type': 'nope'},
 32 |     {'href': 'some/rss.xml'},
 33 |     {'href': '4', 'type': 'whatever/atom'},
 34 |     {'href': '5', 'type': 'feed'},
 35 | ]
 36 | 
 37 | FALLBACK_IN = """\
 38 | <a href=1>rss</a>
 39 | <a href=some/atom.xml>text</a>
 40 | <a href=2 title=my-feed></a>
 41 | <a href=3 title=feed-title>feed-text</a>
 42 | <a href=4 type=whatever/rss> my  <span>text  </span> </a>
 43 | 
 44 | <a>feed nope</a>
 45 | <a href=nope>also-nope</a>
 46 | 
 47 | """
 48 | 
 49 | FALLBACK_OUT = [
 50 |     {'href': '1', 'title': 'rss'},
 51 |     {'href': 'some/atom.xml', 'title': 'text'},
 52 |     {'href': '2', 'title': 'my-feed'},
 53 |     {'href': '3', 'title': 'feed-text'},
 54 |     {'href': '4', 'title': 'my text', 'type': 'whatever/rss'},
 55 | ]
 56 | 
 57 | 
 58 | def setify(l):
 59 |     return frozenset(frozenset(d.items()) for d in l)
 60 | 
 61 | 
 62 | @pytest.mark.parametrize(
 63 |     'input, expected',
 64 |     [
 65 |         (MAIN_IN, MAIN_OUT),
 66 |         (FALLBACK_IN, FALLBACK_OUT),
 67 |         (MAIN_IN + FALLBACK_IN, MAIN_OUT),
 68 |     ],
 69 |     ids=['main', 'fallback', 'main+fallback'],
 70 | )
 71 | def test_get_alternates(input, expected):
 72 |     assert setify(get_alternates(input, '')) == setify(expected)
 73 | 
 74 | 
 75 | def test_get_alternates_relative():
 76 |     assert get_alternates("<link rel=alternate type=rss href=1 />", "url/") == [
 77 |         {'type': 'rss', 'href': 'url/1'}
 78 |     ]
 79 | 
 80 | 
 81 | @pytest.mark.slow
 82 | @pytest.mark.requires_lxml
 83 | def test_plugin(db_path, requests_mock):
 84 |     app = make_app(
 85 |         {
 86 |             'reader': {'url': db_path},
 87 |             'app': {'plugins': {'reader._plugins.preview_feed_list:init': None}},
 88 |         }
 89 |     )
 90 |     browser = make_browser(app)
 91 | 
 92 |     feed_url = 'http://example.com/'
 93 | 
 94 |     requests_mock.real_http = True
 95 |     requests_mock.get(
 96 |         feed_url,
 97 |         content=b"""
 98 |         <link
 99 |             rel="alternate"
100 |             type="application/atom+xml"
101 |             title="example.com news"
102 |             href="http://example.com/feed.xml"
103 |         />
104 |         """,
105 |     )
106 | 
107 |     browser.open('http://app/')
108 |     form = browser.select_form('#top-bar form')
109 |     form.input({'url': feed_url})
110 |     response = browser.submit_selected(form.form.find('button', text='add feed'))
111 |     assert response.status_code == 200
112 | 
113 |     page = browser.get_current_page()
114 |     assert page.select('title')[0].text == 'Feeds for ' + feed_url
115 | 
116 |     items = page.select('.preview-feed-list li')
117 |     assert len(items) == 1, items
118 |     item = items[0]
119 |     assert item.get_text(' ', strip=True) == "example.com news application/atom+xml"
120 |     item.select_one('a').attrs[
121 |         'href'
122 |     ] == '/preview?url=http%3A%2F%2Fexample.com%2Ffeed.xml'
123 | 


--------------------------------------------------------------------------------
/tests/test_plugins_sqlite_releases.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from reader._plugins.sqlite_releases import FULL_URL
 4 | from reader._plugins.sqlite_releases import init
 5 | from reader._types import FeedFilter
 6 | from utils import utc_datetime as datetime
 7 | 
 8 | 
 9 | @pytest.mark.filterwarnings("ignore:No parser was explicitly specified")
10 | def test_sqlite_releases(reader, requests_mock, data_dir):
11 |     init(reader)
12 | 
13 |     # we're not using .read_binary() because it messes with line endings on windows
14 |     with open(str(data_dir.joinpath('sqlite_releases.html')), 'rb') as f:
15 |         content = f.read()
16 | 
17 |     requests_mock.get(
18 |         FULL_URL,
19 |         content=content,
20 |         headers={
21 |             "Last-Modified": "Thu, 21 Jan 2021 01:23:58 +0000",
22 |             "ETag": "m6008d7aes58501",
23 |             "Content-type": "text/html; charset=utf-8",
24 |         },
25 |     )
26 | 
27 |     reader.add_feed(FULL_URL)
28 |     reader.update_feeds()
29 | 
30 |     (feed,) = reader.get_feeds()
31 |     assert feed.updated == datetime(2021, 1, 20, 0, 0)
32 |     assert feed.title == 'Release History Of SQLite'
33 |     assert feed.link == 'https://www.sqlite.org/changes.html'
34 | 
35 |     (feed_for_update,) = reader._storage.get_feeds_for_update(FeedFilter(FULL_URL))
36 |     assert feed_for_update.caching_info == {
37 |         'etag': 'm6008d7aes58501',
38 |         'last-modified': 'Thu, 21 Jan 2021 01:23:58 +0000',
39 |     }
40 | 
41 |     entries = list(reader.get_entries())
42 |     entry_data = [
43 |         (e.id, e.updated, e.title, e.link, e.summary.strip()) for e in entries
44 |     ]
45 |     assert entry_data == [
46 |         (
47 |             '2021-01-20 (3.34.1)',
48 |             datetime(2021, 1, 20, 0, 0),
49 |             '2021-01-20 (3.34.1)',
50 |             'https://www.sqlite.org/changes.html#version_3_34_1',
51 |             'Fix a potential use-after-free bug.',
52 |         ),
53 |         (
54 |             '2020-12-01 (3.34.0)',
55 |             datetime(2020, 12, 1, 0, 0),
56 |             '2020-12-01 (3.34.0)',
57 |             'https://www.sqlite.org/changes.html#version_3_34_0',
58 |             '<p>\nAdded the <a href="c3ref/txn_state.html">sqlite3_txn_state()</a> interface.\n</p>',
59 |         ),
60 |         (
61 |             '2000-05-30',
62 |             datetime(2000, 5, 30, 0, 0),
63 |             '2000-05-30',
64 |             'https://www.sqlite.org/changes.html',
65 |             'Added the <b>LIKE</b> operator.',
66 |         ),
67 |         (
68 |             '2000-05-29',
69 |             datetime(2000, 5, 29, 0, 0),
70 |             '2000-05-29',
71 |             'https://www.sqlite.org/changes.html',
72 |             'Initial Public Release of Alpha code',
73 |         ),
74 |     ]
75 | 


--------------------------------------------------------------------------------
/tests/test_plugins_ua_fallback.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from reader import ParseError
 4 | 
 5 | 
 6 | def test_fallback(requests_mock, make_reader):
 7 |     url = 'http://www.example.com/'
 8 | 
 9 |     reader = make_reader(':memory:', plugins=('reader.ua_fallback',))
10 |     reader.add_feed(url)
11 | 
12 |     matcher = requests_mock.get(url, status_code=403)
13 | 
14 |     with pytest.raises(ParseError) as exc_info:
15 |         reader.update_feed(url)
16 | 
17 |     assert '403' in str(exc_info.value)
18 | 
19 |     assert len(matcher.request_history) == 2
20 |     first_ua, second_ua = (r.headers['User-Agent'] for r in matcher.request_history)
21 | 
22 |     assert first_ua.startswith('python-reader/')
23 |     assert second_ua.startswith('feedparser/')
24 |     assert second_ua.endswith(first_ua)
25 | 
26 | 
27 | def test_noop(requests_mock, make_reader):
28 |     url = 'http://www.example.com/'
29 | 
30 |     reader = make_reader(':memory:', plugins=('reader.ua_fallback',))
31 |     reader.add_feed(url)
32 | 
33 |     matcher = requests_mock.get(url, status_code=404)
34 | 
35 |     with pytest.raises(ParseError) as exc_info:
36 |         reader.update_feed(url)
37 | 
38 |     assert '404' in str(exc_info.value)
39 |     assert len(matcher.request_history) == 1
40 | 


--------------------------------------------------------------------------------
/tests/test_reader_deprecations.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | 
3 | import pytest
4 | 
5 | from fakeparser import Parser
6 | 
7 | 
8 | # Nothing here (yet).
9 | 


--------------------------------------------------------------------------------
/tests/test_reader_plugins.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from reader import InvalidPluginError
 4 | from reader import PluginInitError
 5 | 
 6 | 
 7 | @pytest.fixture(autouse=True)
 8 | def set_module_prefix(monkeypatch):
 9 |     monkeypatch.setattr('reader.plugins._MODULE_PREFIX', 'reader_test_plugins.')
10 | 
11 | 
12 | def test_good(monkeypatch, make_reader):
13 |     def one(reader):
14 |         one.reader = reader
15 | 
16 |     def two(reader):
17 |         two.reader = reader
18 | 
19 |     monkeypatch.setattr('reader_test_plugins.good.init_reader', one)
20 | 
21 |     reader = make_reader(':memory:', plugins=['reader.good', two])
22 | 
23 |     assert one.reader is reader
24 |     assert two.reader is reader
25 | 
26 | 
27 | @pytest.mark.parametrize(
28 |     'plugin_name',
29 |     ['reader_test_plugins.good:init_reader', 'reader_test_plugins.good.init_reader'],
30 | )
31 | def test_good_full_path(monkeypatch, make_reader, plugin_name):
32 |     monkeypatch.setattr('reader.plugins._PLUGIN_PREFIX', 'reader_test_plugins.')
33 | 
34 |     def one(reader):
35 |         one.reader = reader
36 | 
37 |     monkeypatch.setattr('reader_test_plugins.good.init_reader', one)
38 | 
39 |     reader = make_reader(':memory:', plugins=[plugin_name])
40 | 
41 |     assert one.reader is reader
42 | 
43 | 
44 | def test_init_error_built_in(make_reader):
45 |     with pytest.raises(PluginInitError) as exc_info:
46 |         reader = make_reader(':memory:', plugins=['reader.init_error'])
47 | 
48 |     message = str(exc_info.value)
49 |     assert 'reader_test_plugins.init_error:init_reader' in message
50 |     assert 'someerror' in message
51 | 
52 | 
53 | def test_init_error_callable(make_reader):
54 |     from reader_test_plugins.init_error import init_reader as plugin
55 | 
56 |     with pytest.raises(PluginInitError) as exc_info:
57 |         reader = make_reader(':memory:', plugins=[plugin])
58 | 
59 |     message = str(exc_info.value)
60 |     assert 'reader_test_plugins.init_error:init_reader' in message
61 |     assert 'someerror' in message
62 | 
63 | 
64 | def test_non_built_in(monkeypatch, make_reader):
65 |     with pytest.raises(InvalidPluginError) as exc_info:
66 |         make_reader(':memory:', plugins=['reader_test_plugins.good:init_reader'])
67 | 
68 |     assert "no such built-in plugin: 'reader_test_plugins.good:init_reader'" in str(
69 |         exc_info.value
70 |     )
71 | 
72 | 
73 | def test_missing_plugin(make_reader):
74 |     with pytest.raises(InvalidPluginError) as exc_info:
75 |         make_reader(':memory:', plugins=['reader.unknown'])
76 | 
77 |     assert "no such built-in plugin: 'reader.unknown'" in str(exc_info.value)
78 | 
79 | 
80 | def test_missing_entry_point(make_reader):
81 |     with pytest.raises(AttributeError) as exc_info:
82 |         make_reader(':memory:', plugins=['reader.missing_entry_point'])
83 | 
84 | 
85 | def test_missing_dependency(make_reader):
86 |     with pytest.raises(ImportError) as exc_info:
87 |         make_reader(':memory:', plugins=['reader.missing_dependency'])
88 | 


--------------------------------------------------------------------------------
/tests/test_reader_utils.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import Mock
 2 | 
 3 | import pytest
 4 | 
 5 | from reader import EntryNotFoundError
 6 | from reader.utils import archive_entries
 7 | 
 8 | 
 9 | def test_archive_entries(reader, parser):
10 |     reader.copy_entry = Mock(wraps=reader.copy_entry)
11 | 
12 |     feed = parser.feed(1)
13 |     one = parser.entry(1, 'one', title='one')
14 |     two = parser.entry(1, '&?:/', title='not URL safe')
15 |     reader.add_feed(feed)
16 |     reader.update_feeds()
17 | 
18 |     # archive an entry, archived does not exist
19 | 
20 |     reader.copy_entry.reset_mock()
21 |     archive_entries(reader, [one])
22 | 
23 |     assert len(reader.copy_entry.call_args_list) == 1
24 |     assert {e.resource_id + (e.title,) for e in reader.get_entries()} == {
25 |         ('1', 'one', 'one'),
26 |         ('1', '&?:/', 'not URL safe'),
27 |         ('reader:archived', 'reader:archived?feed=1&entry=one', 'one'),
28 |     }
29 |     archived = reader.get_feed('reader:archived')
30 |     assert archived.updates_enabled is False
31 |     assert archived.user_title == 'Archived'
32 | 
33 |     # archive two entries (one already archived), archived exists
34 | 
35 |     one = parser.entry(1, 'one', title='new one')
36 |     reader.update_feeds()
37 | 
38 |     reader.copy_entry.reset_mock()
39 |     archive_entries(reader, [one, two])
40 | 
41 |     # 3 because one is copied (exists error), deleted, and then copied again
42 |     assert len(reader.copy_entry.call_args_list) == 3
43 |     assert {e.resource_id + (e.title,) for e in reader.get_entries()} == {
44 |         ('1', 'one', 'new one'),
45 |         ('1', '&?:/', 'not URL safe'),
46 |         ('reader:archived', 'reader:archived?feed=1&entry=one', 'new one'),
47 |         (
48 |             'reader:archived',
49 |             'reader:archived?feed=1&entry=%26%3F%3A%2F',
50 |             'not URL safe',
51 |         ),
52 |     }
53 | 
54 |     # archive inexistent entry
55 | 
56 |     with pytest.raises(EntryNotFoundError):
57 |         archive_entries(reader, [('1', 'inexistent')])
58 | 


--------------------------------------------------------------------------------
/tests/test_test_utils.py:
--------------------------------------------------------------------------------
 1 | from utils import reload_module
 2 | 
 3 | 
 4 | def test_reload_module(monkeypatch, reload_module):
 5 |     import ntpath
 6 |     import os
 7 |     import os.path
 8 |     import posixpath
 9 |     import urllib.request
10 | 
11 |     os_path_by_name = {'nt': ntpath, 'posix': posixpath}
12 | 
13 |     # on Windows, url2pathname is imported from nturl2path;
14 |     # on POSIX, url2pathname is defined in urllib.request;
15 |     # this is decided at urllib.request's import time, based on os.name
16 |     url2pathname_module_by_name = {'nt': 'nturl2path', 'posix': 'urllib.request'}
17 | 
18 |     the_other_os_name = {'nt': 'posix', 'posix': 'nt'}[os.name]
19 | 
20 |     before = os.name, os.path.__name__, urllib.request.url2pathname.__module__
21 | 
22 |     monkeypatch.setattr('os.name', the_other_os_name)
23 |     monkeypatch.setattr('os.path', os_path_by_name[the_other_os_name])
24 |     reload_module(urllib.request)
25 | 
26 |     # sanity check
27 |     assert os.name == the_other_os_name
28 |     assert os.path.__name__ == os_path_by_name[os.name].__name__
29 | 
30 |     assert (
31 |         urllib.request.url2pathname.__module__ == url2pathname_module_by_name[os.name]
32 |     )
33 | 
34 |     reload_module.undo()
35 | 
36 |     assert before == (os.name, os.path.__name__, urllib.request.url2pathname.__module__)
37 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = coverage-clean,py{313,312,311,py311},coverage-report,typing,docs
 3 | skip_missing_interpreters = true
 4 | 
 5 | [testenv]
 6 | extras =
 7 |     cli
 8 |     app
 9 |     tests
10 |     unstable-plugins
11 | 
12 | allowlist_externals =
13 |     ./run.sh
14 | 
15 | commands = {posargs:./run.sh coverage-run --cov-append -v}
16 | 
17 | depends =
18 |     py{313,312,311,py311}: coverage-clean
19 |     coverage-report: py{313,312,311,py311}
20 | 
21 | [testenv:coverage-clean]
22 | deps = coverage
23 | skip_install = true
24 | commands = coverage erase
25 | 
26 | [testenv:coverage-report]
27 | deps = coverage
28 | skip_install = true
29 | commands = ./run.sh coverage-report
30 | 
31 | [testenv:typing]
32 | extras =
33 |     search
34 |     tests
35 | commands = ./run.sh typing
36 | 
37 | [testenv:docs]
38 | extras =
39 |     docs
40 | commands = sphinx-build -W -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html
41 | 


--------------------------------------------------------------------------------