├── .flake8
├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── feature_request.md
    │   └── questions-others-template.md
    └── workflows
    │   ├── build.yml
    │   └── install_calibre.sh
├── .gitignore
├── .mypy.ini
├── .pylintrc
├── LICENSE
├── README.md
├── _generate.py
├── _opds.py
├── _recipe_utils.py
├── _recipes.py
├── _recipes_custom.py
├── _utils.py
├── babel.config.json
├── build-index.js
├── build.sh
├── debug.sh
├── package.json
├── recipes
    ├── README.txt
    ├── aeon.recipe.py
    ├── asahi-shimbun.recipe.py
    ├── asian-review.recipe.py
    ├── atlantic-magazine.recipe.py
    ├── atlantic.recipe.py
    ├── bloomberg-businessweek.recipe.py
    ├── bloomberg-news.recipe.py
    ├── bookforum-magazine.recipe.py
    ├── channelnewsasia.recipe.py
    ├── economist.recipe.py
    ├── eighteen-fortythree.recipe.py
    ├── fivebooks.recipe.py
    ├── fivethirtyeight.recipe.py
    ├── forbes-editors-picks.recipe.py
    ├── foreign-affairs.recipe.py
    ├── foreign-policy-magazine.recipe.py
    ├── foreign-policy.recipe.py
    ├── ft-paper.recipe.py
    ├── ft.recipe.py
    ├── fulcrum-sg.recipe.py
    ├── guardian.recipe.py
    ├── harpers-magazine.recipe.py
    ├── harvard-intl-review.recipe.py
    ├── hbr.recipe.py
    ├── includes
    │   ├── nyt.py
    │   └── recipes_shared.py
    ├── japan-times.recipe.py
    ├── joongangdaily.recipe.py
    ├── kirkus.recipe.py
    ├── knowable-magazine.recipe.py
    ├── korea-herald.recipe.py
    ├── lithub.recipe.py
    ├── logos
    │   ├── knowable.png
    │   └── thirdpole.png
    ├── london-review.recipe.py
    ├── longreads-features.recipe.py
    ├── mit-press-reader.recipe.py
    ├── mit-tech-review-magazine.recipe.py
    ├── mit-tech-review.recipe.py
    ├── mollywhite-newsletter.recipe.py
    ├── natesilver.recipe.py
    ├── nature.recipe.py
    ├── nautilus.recipe.py
    ├── new-republic-magazine.recipe.py
    ├── newyorker.recipe.py
    ├── nine-dashline.recipe.py
    ├── noema-magazine.recipe.py
    ├── nytimes-books.recipe.py
    ├── nytimes-global.recipe.py
    ├── nytimes-magazine.recipe.py
    ├── nytimes-paper.recipe.py
    ├── paris-review-blog.recipe.py
    ├── poetry.recipe.py
    ├── politico-magazine.recipe.py
    ├── propublica.recipe.py
    ├── prospect-magazine.recipe.py
    ├── quanta-magazine.recipe.py
    ├── restofworld.recipe.py
    ├── scientific-american.recipe.py
    ├── scmp.recipe.py
    ├── smithsonian-magazine.recipe.py
    ├── spectator-magazine.recipe.py
    ├── sydney-morning-herald.recipe.py
    ├── taipei-times.recipe.py
    ├── thediplomat.recipe.py
    ├── thirdpole.recipe.py
    ├── time-magazine.recipe.py
    ├── vox.recipe.py
    ├── wapo-paper.recipe.py
    ├── wapo.recipe.py
    ├── wired.recipe.py
    ├── world-today.recipe.py
    └── wsj-paper.recipe.py
├── recipes_custom
    └── README.txt
├── requirements.txt
├── static
    ├── OpenSans-Bold.ttf
    ├── OpenSans-Regular.ttf
    ├── OpenSans-Semibold.ttf
    ├── colours.scss
    ├── colours_custom.scss
    ├── favicon.svg
    ├── index.html
    ├── opds.scss
    ├── opds.xsl
    ├── opds_custom.scss
    ├── reader.html
    ├── reader.js
    ├── reader.scss
    ├── reader_custom.scss
    ├── reader_sprites.svg
    ├── site.js
    ├── site.scss
    ├── site_custom.scss
    ├── theme.js
    ├── viewer-theme-dark.scss
    ├── viewer-theme-light.scss
    ├── viewer-theme.scss
    └── viewer-theme_custom.scss
├── tests
    ├── __init__.py
    └── tests_recipe_utils.py
└── tox.ini


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | #max-line-length = 120
3 | extend-ignore =
4 |     # let black determine line length
5 |     E501,
6 |     # ref https://github.com/psf/black/issues/1437
7 |     E203
8 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | custom: ['https://buymeacoffee.com/ping/']
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Report a problem
 4 | title: "[BUG] "
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Please fill in the required information below:
11 | 
12 | **Describe the Bug/Error:**
13 | 
14 | *Please make sure the description is worded well enough to be understood with as much context and examples as possible.*
15 | 
16 | **Newsrack repository URL:**
17 | 
18 | **Error log (if relevant):**
19 | 
20 | GitHub Action run url: 
21 | 
22 | ```text
23 | # paste error log here, do not use screenshots
24 | ```
25 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest a feature
 4 | title: "[FEATURE] "
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Please fill in the required information below:
11 | 
12 | **Describe the Feature Request:**
13 | 
14 | *Please make sure the description is worded well enough to be understood with as much context and examples as possible.*
15 | 
16 | **Newsrack repository URL:**
17 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/questions-others-template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Questions/Others
 3 | about: Questions or other issues (not bug/feature request)
 4 | title: "[OTHERS] "
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Please fill in the required information below:
11 | 
12 | **Describe the Issue:**
13 | 
14 | *Please make sure the description is worded well enough to be understood with as much context and examples as possible.*
15 | 
16 | **Newsrack repository URL:**
17 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
  1 | name: "Build"
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - '**'
  7 |   schedule:
  8 |     # Customise: Cron interval
  9 |     - cron: "0 3,7,11,15,19,23 * * *"
 10 |   workflow_dispatch:
 11 |     inputs:
 12 |       regenerate:
 13 |         description: Enter csv of recipe slugs to specifically regenerate
 14 |         required: false
 15 |         type: string
 16 |       skip:
 17 |         description: Enter csv of recipe slugs to specifically skip
 18 |         required: false
 19 |         type: string
 20 |       verbose:
 21 |         description: Run recipe in verbose mode
 22 |         required: false
 23 |         type: boolean
 24 | 
 25 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
 26 | permissions:
 27 |   contents: read
 28 |   pages: write
 29 |   id-token: write
 30 | 
 31 | # Allow one concurrent deployment
 32 | concurrency:
 33 |   group: "pages"
 34 |   cancel-in-progress: true
 35 | 
 36 | jobs:
 37 |   deploy:
 38 |     environment:
 39 |       name: github-pages
 40 |       url: ${{ steps.deployment.outputs.page_url }}
 41 |     runs-on: ubuntu-latest
 42 |     if: github.event_name == 'schedule' || github.ref_name == github.event.repository.default_branch
 43 |     # Customise: Total job run time limit
 44 |     timeout-minutes: 60
 45 |     steps:
 46 |       - uses: actions/checkout@v4
 47 |         timeout-minutes: 1
 48 |         with:
 49 |           submodules: true
 50 |           fetch-depth: 1
 51 | 
 52 |       - name: Setup node
 53 |         uses: actions/setup-node@v3
 54 |         timeout-minutes: 1
 55 |         with:
 56 |           node-version: 'lts/*'
 57 | 
 58 |       - name: Install npm dependencies
 59 |         timeout-minutes: 1
 60 |         run: |
 61 |           npm install --no-fund
 62 | 
 63 |       - uses: actions/setup-python@v4
 64 |         timeout-minutes: 1
 65 |         with:
 66 |           python-version: '3.10'
 67 | 
 68 |       - name: Install python requirements
 69 |         timeout-minutes: 1
 70 |         run: python -m pip install -r requirements.txt --upgrade
 71 | 
 72 |       - name: Install calibre's and other dependencies
 73 |         timeout-minutes: 1
 74 |         run: sudo apt-fast update -y && sudo apt-fast install --no-install-recommends -y libegl1 libopengl0 imagemagick
 75 | 
 76 |       - name: Get latest calibre version
 77 |         id: calibrelatest
 78 |         run: |
 79 |           tag="$(curl -L --retry 3 --silent --fail 'https://api.github.com/repos/kovidgoyal/calibre/releases/latest' | jq -r .tag_name)" && \
 80 |           echo "calibre_ver=${tag#*v}" && \
 81 |           echo "calibre_ver=${tag#*v}" >> $GITHUB_ENV
 82 | 
 83 |       - name: Get calibre binaries cache
 84 |         id: cache-bin
 85 |         uses: actions/cache@v3
 86 |         timeout-minutes: 1
 87 |         with:
 88 |           path: cache/calibre
 89 |           key: cache-calibre-x86_64-${{ env.calibre_ver }}
 90 | 
 91 |       - name: Install calibre
 92 |         timeout-minutes: 1
 93 |         run: sh .github/workflows/install_calibre.sh
 94 | 
 95 |       - name: Download meta artifacts
 96 |         id: download-meta-artifact
 97 |         uses: dawidd6/action-download-artifact@v2
 98 |         timeout-minutes: 1
 99 |         with:
100 |           name: meta-artifacts
101 |           path: meta
102 |           search_artifacts:  true
103 |           if_no_artifact_found: warn
104 | 
105 |       - name: Setup Pages
106 |         id: setup_pages
107 |         uses: actions/configure-pages@v3
108 |         timeout-minutes: 1
109 | 
110 |       - name: Generate periodicals
111 |         env:
112 |           CI_PAGES_URL: ${{ steps.setup_pages.outputs.base_url }}
113 |           CALIBRE_CONFIG_DIRECTORY: ${{ github.workspace }}/calibre_config/
114 |           regenerate: ${{ github.event.inputs.regenerate }}
115 |           skip: ${{ github.event.inputs.skip }}
116 |           verbose: ${{ github.event.inputs.verbose }}
117 |           accounts: ${{ secrets.accounts }}
118 |         run: |
119 |           sh build.sh
120 |           if [[ -f 'job_summary.md' ]]; then cat 'job_summary.md' >> $GITHUB_STEP_SUMMARY; fi
121 |           echo -e "\n<"'!'"-- Commit ${GITHUB_SHA:0:7}, $(ebook-convert --version | head -n1) -->" >> public/index.html
122 |           rm -rf "$CALIBRE_CONFIG_DIRECTORY"
123 | 
124 |       # Ref: https://github.com/actions/starter-workflows/blob/main/pages/static.yml
125 |       - name: Upload artifact
126 |         uses: actions/upload-pages-artifact@v2
127 |         with:
128 |           path: ./public
129 | 
130 |       - name: Deploy to GitHub Pages
131 |         id: deployment
132 |         uses: actions/deploy-pages@v2
133 |         timeout-minutes: 2
134 | 
135 |       - uses: actions/upload-artifact@v3
136 |         timeout-minutes: 1
137 |         with:
138 |           name: meta-artifacts
139 |           path: meta
140 |           if-no-files-found: warn
141 | 


--------------------------------------------------------------------------------
/.github/workflows/install_calibre.sh:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------
 2 | # Install script for calibre
 3 | # -------------------------------------------------------
 4 | 
 5 | bin_folder="$GITHUB_WORKSPACE/cache/calibre"
 6 | mkdir -p "$bin_folder"
 7 | platform='x86_64'
 8 | bin_file="calibre-${platform}.txz"
 9 | sig_file="calibre-${platform}.txz.sha512"
10 | 
11 | if [ -f "${bin_folder}/${bin_file}" ]; then
12 |   echo "Cached $bin_file exists."
13 | else
14 |   echo "Cached $bin_file does not exist."
15 |   rm -rf "${bin_folder}/calibre-*"
16 |   tag="$(curl -L --retry 3 --show-error --silent --fail 'https://api.github.com/repos/kovidgoyal/calibre/releases/latest' | jq -r .tag_name)" && \
17 |   latest_version="${tag#*v}" && \
18 |   echo "Latest version: ${latest_version}" && \
19 |   dl_url="https://github.com/kovidgoyal/calibre/releases/download/${tag}/calibre-${latest_version}-${platform}.txz" && \
20 |   sig_url="https://calibre-ebook.com/signatures/calibre-${latest_version}-${platform}.txz.sha512" && \
21 |   sig2_url="https://code.calibre-ebook.com/signatures/calibre-${latest_version}-${platform}.txz.sha512" && \
22 |   { echo "Downloading sig $sig_url ..." && curl -L --retry 3 --show-error --silent --fail --output "${bin_folder}/${sig_file}" "$sig_url" || \
23 |     echo "Downloading sig $sig2_url ..." && curl -L --retry 3 --show-error --insecure --fail --silent --output "${bin_folder}/${sig_file}" "$sig2_url"; } && \
24 |   echo "Downloading bin $dl_url ..."
25 |   curl -L --retry 3 --show-error --silent --fail --output "${bin_folder}/${bin_file}.part" "$dl_url" && \
26 |   echo "$(cat "${bin_folder}/${sig_file}")  ${bin_folder}/${bin_file}.part" | sha512sum --check --status && \
27 |   mv "${bin_folder}/${bin_file}.part" "${bin_folder}/${bin_file}"
28 | fi
29 | 
30 | if [ -f "${bin_folder}/${bin_file}" ]; then
31 |   echo "Install from local..."
32 |   mkdir -p "$HOME/calibre-bin/calibre" && \
33 |   tar xf "${bin_folder}/${bin_file}" -C "$HOME/calibre-bin/calibre" && \
34 |   "$HOME/calibre-bin/calibre/calibre_postinstall" && \
35 |   export PATH=$PATH:$HOME/calibre-bin/calibre && \
36 |   calibre --version && \
37 |   echo "$HOME/calibre-bin/calibre" >> $GITHUB_PATH
38 | fi
39 | 
40 | calibre --version || {
41 |   echo "Install latest from calibre servers..."
42 |   mkdir -p ~/calibre-bin
43 |   wget --tries=3 --timeout=30 -nv -O- https://download.calibre-ebook.com/linux-installer.sh | sh /dev/stdin install_dir=~/calibre-bin isolated=y
44 |   export PATH=$PATH:$HOME/calibre-bin/calibre
45 |   calibre --version
46 |   echo "$HOME/calibre-bin/calibre" >> $GITHUB_PATH
47 | }
48 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | static/*.compiled.js
 2 | 
 3 | .env
 4 | meta/
 5 | *.recipe
 6 | *.epub
 7 | *.mobi
 8 | public/
 9 | debug/
10 | static/*.css
11 | job_summary.md
12 | 
13 | # Node #
14 | # Dependency directories
15 | node_modules/
16 | jspm_packages/
17 | 
18 | venv/
19 | venv3/
20 | .vscode/
21 | 
22 | coverage.sh
23 | cov_html/
24 | 
25 | *.iml
26 | .idea/
27 | 
28 | # OS generated files #
29 | .DS_Store
30 | .DS_Store?
31 | ._*
32 | .Spotlight-V100
33 | .Trashes
34 | ehthumbs.db
35 | Thumbs.db
36 | 
37 | # Byte-compiled / optimized / DLL files
38 | __pycache__/
39 | *.py[cod]
40 | *$py.class
41 | 
42 | # C extensions
43 | *.so
44 | 
45 | # Distribution / packaging
46 | .Python
47 | env/
48 | build/
49 | develop-eggs/
50 | dist/
51 | downloads/
52 | eggs/
53 | .eggs/
54 | lib/
55 | lib64/
56 | parts/
57 | sdist/
58 | var/
59 | *.egg-info/
60 | .installed.cfg
61 | *.egg
62 | 
63 | # PyInstaller
64 | #  Usually these files are written by a python script from a template
65 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
66 | *.manifest
67 | *.spec
68 | 
69 | # Installer logs
70 | pip-log.txt
71 | pip-delete-this-directory.txt
72 | 
73 | # Unit test / coverage reports
74 | htmlcov/
75 | .tox/
76 | .coverage
77 | .coverage.*
78 | .cache
79 | nosetests.xml
80 | coverage.xml
81 | *,cover
82 | .hypothesis/
83 | 
84 | # Translations
85 | *.mo
86 | *.pot
87 | 
88 | # Django stuff:
89 | *.log
90 | 
91 | # Sphinx documentation
92 | docs/_build/
93 | 
94 | # PyBuilder
95 | target/
96 | 
97 | #Ipython Notebook
98 | .ipynb_checkpoints
99 | 


--------------------------------------------------------------------------------
/.mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | ignore_missing_imports = True
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # newsrack
 2 | 
 3 | Generate an online "newsrack" of periodicals for your ereader.
 4 | 
 5 | Features:
 6 | - Download anywhere using your device browser
 7 | - Subscribe via OPDS feeds
 8 | 
 9 | Uses [calibre](https://calibre-ebook.com/) + [recipes](https://manual.calibre-ebook.com/news_recipe.html), [GitHub Actions](.github/workflows/build.yml), and hosted
10 | on [GitHub Pages](https://pages.github.com/).
11 | 
12 | ![eInk Kindle Screenshot](https://github.com/ping/newsrack/assets/104607/475daa53-f2d5-4469-b88e-7d5463399d73)
13 | ![Mobile Screenshot](https://github.com/ping/newsrack/assets/104607/76ec3514-8d89-43bc-a68c-909df42971cb)
14 | 
15 | [![Buy me a coffee](https://img.buymeacoffee.com/button-api/?text=Buy%20me%20a%20coffee&emoji=&slug=ping&button_colour=FFDD00&font_colour=000000&font_family=Bree&outline_colour=000000&coffee_colour=ffffff)](https://www.buymeacoffee.com/ping)
16 | 
17 | ## Running Your Own Instance
18 | 
19 | ### General Steps
20 | 
21 | 1. Fork this repository.
22 | 2. Create a new branch, for example `custom`. Using a new branch makes a few things, like contributing fixes for example, easier.
23 | 3. Add your own recipes to the [`recipes_custom/`](recipes_custom) folder and customise [_recipes_custom.py](_recipes_custom.py). Optional.
24 | 4. Customise the cron schedule and job run time in [.github/workflows/build.yml](.github/workflows/build.yml). Optional.
25 | 5. Set the new branch `custom` as default
26 |    - from Settings > Branches > Default branch
27 | 6. Enable Pages in repository settings to deploy from `GitHub Actions`
28 |    - from Settings > Pages > Build and deployment > Source
29 | 7. If needed, manually trigger the `Build` workflow from Actions to start your first build.
30 | 
31 | ### What Can Be Customised
32 | 
33 | `newsrack` supports extensive customisation such as:
34 | - add/remove recipes
35 | - the formats generated
36 | - when recipes are executed
37 | - cover colours and fonts
38 | 
39 | Review the [wiki](https://github.com/ping/newsrack/wiki#customisation) page to understand what can be customised and how to do so.
40 | 
41 | You can also refer to the [example fork repo](https://github.com/ping/newsrack-fork-test/) and see the [actual customisations](https://github.com/ping/newsrack-fork-test/compare/main...custom#files_bucket) in action.
42 | 
43 | 
44 | ## Available Recipes
45 | 
46 | `newsrack` has its own set of customised recipes. The full list of available recipes can be viewed on [here](https://github.com/ping/newsrack/wiki/Available-Recipes).
47 | 


--------------------------------------------------------------------------------
/_opds.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 https://github.com/ping/
 2 | #
 3 | # This software is released under the GNU General Public License v3.0
 4 | # https://opensource.org/licenses/GPL-3.0
 5 | 
 6 | # Helpers to generate opds xml - extremely minimal
 7 | from datetime import datetime
 8 | from typing import Dict, Optional
 9 | from xml.dom import minidom
10 | 
11 | extension_contenttype_map = {
12 |     ".epub": "application/epub+zip",
13 |     ".mobi": "application/x-mobipocket-ebook",
14 |     ".azw": "application/x-mobipocket-ebook",
15 |     ".azw3": "application/x-mobi8-ebook",
16 |     ".pdf": "application/pdf",
17 | }
18 | 
19 | 
20 | def simple_tag(
21 |     doc_root: minidom.Document,
22 |     tag: str,
23 |     value: Optional[str] = None,
24 |     attributes: Optional[Dict] = None,
25 | ) -> minidom.Element:
26 |     new_tag = doc_root.createElement(tag)
27 |     if value:
28 |         new_tag.appendChild(doc_root.createTextNode(value))
29 |     if attributes:
30 |         for k, v in attributes.items():
31 |             new_tag.setAttribute(k, v)
32 |     return new_tag
33 | 
34 | 
35 | def init_feed(
36 |     doc: minidom.Document, publish_site: str, feed_id: str, title: str
37 | ) -> minidom.Element:
38 |     feed = simple_tag(
39 |         doc,
40 |         "feed",
41 |         attributes={
42 |             "xmlns": "http://www.w3.org/2005/Atom",
43 |             "xmlns:dc": "http://purl.org/dc/terms/",
44 |             "xmlns:opds": "http://opds-spec.org/2010/catalog",
45 |         },
46 |     )
47 |     doc.appendChild(feed)
48 |     feed.appendChild(simple_tag(doc, "id", feed_id))
49 |     feed.appendChild(simple_tag(doc, "title", title))
50 |     feed.appendChild(simple_tag(doc, "updated", f"{datetime.now():%Y-%m-%dT%H:%M:%SZ}"))
51 |     feed_author = doc.createElement("author")
52 |     feed_author.appendChild(simple_tag(doc, "name", publish_site))
53 |     feed_author.appendChild(simple_tag(doc, "uri", publish_site))
54 |     feed.appendChild(feed_author)
55 |     return feed
56 | 


--------------------------------------------------------------------------------
/_recipes_custom.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from _recipe_utils import Recipe
 4 | 
 5 | # Define the categories display order, optional
 6 | categories_sort: List[str] = []
 7 | 
 8 | # Define your custom recipes list here
 9 | # Example: https://github.com/ping/newsrack-fork-test/blob/custom/_recipes_custom.py
10 | 
11 | recipes: List[Recipe] = [
12 |     # Recipe(
13 |     #     recipe="example",
14 |     #     slug="example",
15 |     #     src_ext="epub",
16 |     #     category="example",
17 |     # ),
18 | ]
19 | 


--------------------------------------------------------------------------------
/babel.config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "presets": [
3 |         "@babel/preset-env",
4 |         "minify"
5 |     ],
6 |     "comments": false
7 | }


--------------------------------------------------------------------------------
/build-index.js:
--------------------------------------------------------------------------------
 1 | // ref https://lunrjs.com/guides/index_prebuilding.html
 2 | var lunr = require('lunr'),
 3 |     stdin = process.stdin,
 4 |     stdout = process.stdout,
 5 |     buffer = []
 6 | 
 7 | stdin.resume()
 8 | stdin.setEncoding('utf8')
 9 | 
10 | stdin.on('data', function (data) {
11 |     buffer.push(data)
12 | })
13 | 
14 | // Ref https://github.com/olivernn/lunr.js/blob/aa5a878f62a6bba1e8e5b95714899e17e8150b38/lib/stop_word_filter.js#L43
15 | customStopWordFilter = lunr.generateStopWordFilter(['li'])  // to exclude <li>
16 | lunr.Pipeline.registerFunction(customStopWordFilter, 'customStopWordFilter')
17 | 
18 | stdin.on('end', function () {
19 |     // modified to exclude "/" "<" ">"
20 |     lunr.tokenizer.separator = /[\s\-\/<>’]+/
21 |     var documents = JSON.parse(buffer.join(''))
22 |     var idx = lunr(function () {
23 |         this.ref('id')
24 |         this.field('title')
25 |         this.field('articles')
26 |         this.field('tags')
27 |         this.field('category')
28 |         this.metadataWhitelist = ['position']
29 |         this.pipeline.before(lunr.stopWordFilter, customStopWordFilter)
30 | 
31 |         documents.forEach(function (doc) {
32 |             this.add(doc)
33 |         }, this)
34 |     })
35 |     stdout.write(JSON.stringify(idx))
36 | })
37 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | for recipe_folder in 'recipes' 'recipes_custom'
 2 | do
 3 |   # copy recipe_folder/*.recipe.py files to *.recipe
 4 |   if [ -n "$(ls -A "${recipe_folder}"/*.recipe.py 2>/dev/null)" ]
 5 |   then
 6 |     for f in "$recipe_folder"/*.recipe.py; do
 7 |         b="$(basename -- $f)"
 8 |         cp -p "$f" "${b%.py}"
 9 |     done
10 |   fi
11 |   # also support *.recipe files as is in calibre
12 |   # copy recipe_folder/*.recipe files to *.recipe
13 |   if [ -n "$(ls -A "${recipe_folder}"/*.recipe 2>/dev/null)" ]
14 |   then
15 |     for f in "$recipe_folder"/*.recipe; do
16 |         b="$(basename -- $f)"
17 |         cp -p "$f" "$b"
18 |     done
19 |   fi
20 | done
21 | 
22 | mkdir -p public meta \
23 | && cp -p static/*.svg public/ \
24 | && cp -p static/opds.xsl public/ \
25 | && npx babel static/site.js --out-file static/site.compiled.js \
26 | && npx babel static/reader.js --out-file static/reader.compiled.js \
27 | && npx babel static/theme.js --out-file static/theme.compiled.js \
28 | && cp -p static/theme.compiled.js public/theme.min.js \
29 | && npx sass -s compressed --no-source-map static/site.scss:static/site.css static/reader.scss:static/reader.css static/viewer-theme-light.scss:public/viewer-theme-light.css static/viewer-theme-dark.scss:public/viewer-theme-dark.css static/opds.scss:public/opds.css \
30 | && python3 _generate.py "$CI_PAGES_URL" "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/" "$GITHUB_SHA" "https://github.com/${GITHUB_REPOSITORY}/commit/${GITHUB_SHA}" "${GITHUB_RUN_ID}" "https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" \
31 | && node build-index.js < public/lunr_docs.json > public/lunr.json \
32 | && npx html-minifier-terser --input-dir public/ --output-dir public/ --collapse-whitespace --file-ext html \
33 | && rm -f *.recipe static/*.compiled.js public/lunr_docs.json
34 | 


--------------------------------------------------------------------------------
/debug.sh:
--------------------------------------------------------------------------------
 1 | # helper script for debuging/developing new recipes
 2 | if [ -z "$1" ];
 3 | then
 4 |     echo "No recipe specified."
 5 |     echo "Usage: sh debug.sh example"
 6 |     exit 9
 7 | fi
 8 | 
 9 | get_abs_dirname() {
10 |   # $1 : relative filename
11 |   echo "$(cd "$(dirname "$1")" && pwd)/"
12 | }
13 | # use to get shared code
14 | export recipes_includes=$(get_abs_dirname "recipes/includes/recipes_shared.py")
15 | 
16 | for recipe_folder in 'recipes' 'recipes_custom'
17 | do
18 |     if [ -f "$recipe_folder/$1.recipe.py" ]; then
19 |       cp -p "$recipe_folder/$1.recipe.py" "$1.recipe"
20 |     fi
21 |     if [ -f "$recipe_folder/$1.recipe" ]; then
22 |       cp -p "$recipe_folder/$1.recipe" "$1.recipe"
23 |     fi
24 | done
25 | 
26 | rm -rf debug
27 | ebook-convert "$1.recipe" .epub --test --debug-pipeline debug -vv && \
28 | open debug/input/index.html
29 | 
30 | if [ -f "$1.recipe" ]; then
31 |   rm -f "$1.recipe"
32 | fi
33 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "devDependencies": {
 3 |     "@babel/cli": "^7.21.0",
 4 |     "@babel/core": "^7.21.3",
 5 |     "@babel/preset-env": "^7.20.2",
 6 |     "babel-preset-minify": "^0.5.2",
 7 |     "html-minifier-terser": "^7.1.0",
 8 |     "lunr": "^2.3.9",
 9 |     "sass": "^1.60.0",
10 |     "terser": "^5.16.6"
11 |   }
12 | }
13 | 


--------------------------------------------------------------------------------
/recipes/README.txt:
--------------------------------------------------------------------------------
1 | This folder holds the default recipes distributed with newsrack.
2 | 
3 | Do not add your custom recipes to this folder.
4 | Put them in recipes_custom/ instead.
5 | 


--------------------------------------------------------------------------------
/recipes/aeon.recipe.py:
--------------------------------------------------------------------------------
  1 | # No longer working becauses css classes are dynamically generated
  2 | import os
  3 | import sys
  4 | 
  5 | # custom include to share code between recipes
  6 | sys.path.append(os.environ["recipes_includes"])
  7 | from recipes_shared import BasicNewsrackRecipe, format_title, get_date_format
  8 | 
  9 | from calibre.web.feeds.news import BasicNewsRecipe
 10 | 
 11 | _name = "Aeon"
 12 | 
 13 | 
 14 | class Aeon(BasicNewsrackRecipe, BasicNewsRecipe):
 15 |     title = _name
 16 |     __author__ = "ping"
 17 |     language = "en"
 18 |     description = (
 19 |         "A unique digital magazine, publishing some of the most profound and "
 20 |         "provocative thinking on the web. We ask the big questions and find "
 21 |         "the freshest, most original answers, provided by leading thinkers on "
 22 |         "science, philosophy, society and the arts. https://aeon.co/"
 23 |     )
 24 |     encoding = "utf-8"
 25 |     publication_type = "blog"
 26 |     masthead_url = "https://aeon.co/logo.png"
 27 |     oldest_article = 30
 28 |     max_articles_per_feed = 30
 29 |     compress_news_images_auto_size = 10
 30 | 
 31 |     remove_tags = [
 32 |         dict(
 33 |             class_=[
 34 |                 "sc-8c8cfef8-0",
 35 |                 "sc-114c07c9-0",
 36 |                 "sc-50e6fb3a-1",
 37 |                 "sc-c3e98e6e-0",
 38 |                 "sc-fd74dcf9-14",
 39 |                 "sc-50e6fb3a-1",
 40 |                 "sc-fd74dcf9-24",
 41 |                 "sc-a70232b9-5",
 42 |             ]
 43 |         ),
 44 |         dict(attrs={"data-test": "footer"}),
 45 |     ]
 46 |     remove_attributes = ["align", "style", "width", "height"]
 47 | 
 48 |     extra_css = """
 49 |     p .sc-2e8621ab-1 { margin-left: 0.5rem; }
 50 |     .sc-fd74dcf9-18 { margin-right: 0.6rem; }
 51 |     img.ld-image-block, img.lede-img, .sc-358cfb18-0 img { display: block; max-width: 100%; height: auto; }
 52 |     .ld-image-caption { font-size: 0.8rem; }
 53 |     """
 54 |     feeds = [(_name, "https://aeon.co/feed.rss")]
 55 | 
 56 |     def _find_article(self, data):
 57 |         if isinstance(data, dict):
 58 |             return data.get("@type", "") == "Article"
 59 |         return False
 60 | 
 61 |     def preprocess_raw_html_(self, raw_html, url):
 62 |         soup = self.soup(raw_html)
 63 |         article = self.get_ld_json(soup, filter_fn=self._find_article)
 64 |         if not (article and article.get("articleBody")):
 65 |             err_msg = f"Unable to find article: {url}"
 66 |             self.log.warning(err_msg)
 67 |             self.abort_article(err_msg)
 68 | 
 69 |         # "%Y-%m-%d"
 70 |         published_date = self.parse_date(article["datePublished"])
 71 |         if (not self.pub_date) or published_date > self.pub_date:
 72 |             self.pub_date = published_date
 73 |             self.title = format_title(_name, published_date)
 74 | 
 75 |         # display article date
 76 |         header = soup.find("h1") or soup.find("h2")
 77 |         if header:
 78 |             date_ele = soup.new_tag("div", attrs={"class": "custom-date-published"})
 79 |             date_ele.append(f"{published_date:{get_date_format()}}")
 80 |             header.insert_after(date_ele)
 81 | 
 82 |         # re-position header image
 83 |         essay_header = soup.find("div", class_="sc-fd74dcf9-26")
 84 |         if essay_header:
 85 |             header_img = essay_header.find("img")
 86 |             attribution = essay_header.find("div", class_="sc-b78f3ea9-3")
 87 |             if header_img and attribution:
 88 |                 header_img["class"] = "lede-img"
 89 |                 attribution.insert_before(header_img.extract())
 90 |             clean_up_ele = essay_header.find(class_="sc-358cfb18-6")
 91 |             if clean_up_ele:
 92 |                 clean_up_ele.decompose()
 93 | 
 94 |         byline = soup.find("div", class_="rah-static")
 95 |         if byline:
 96 |             for br in byline.find_all("br"):  # extraneous br
 97 |                 br.decompose()
 98 | 
 99 |         for link_class in (
100 |             "a.sc-2e8621ab-1",  # author link
101 |             "a.sc-fd74dcf9-18",  # article cat
102 |         ):
103 |             for a in soup.select(link_class):  # tags
104 |                 a.name = "span"
105 |         return str(soup)
106 | 
107 |     def parse_feeds(self):
108 |         return self.group_feeds_by_date(
109 |             filter_article=lambda a: "/videos/" not in a.url
110 |         )
111 | 


--------------------------------------------------------------------------------
/recipes/asian-review.recipe.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- mode: python -*-
 3 | # -*- coding: utf-8 -*-
 4 | """
 5 | asianreviewofbooks.com
 6 | """
 7 | 
 8 | # Original from https://github.com/kovidgoyal/calibre/blob/29cd8d64ea71595da8afdaec9b44e7100bff829a/recipes/asianreviewofbooks.recipe
 9 | 
10 | __license__ = "GPL v3"
11 | __copyright__ = "2012-2017, Darko Miletic <darko.miletic at gmail.com>"
12 | 
13 | import os
14 | import sys
15 | 
16 | # custom include to share code between recipes
17 | sys.path.append(os.environ["recipes_includes"])
18 | from recipes_shared import BasicNewsrackRecipe, format_title
19 | 
20 | from calibre.web.feeds.news import BasicNewsRecipe
21 | 
22 | _name = "Asian Review of Books"
23 | 
24 | 
25 | class AsianReviewOfBooks(BasicNewsrackRecipe, BasicNewsRecipe):
26 |     title = _name
27 |     __author__ = "Darko Miletic"
28 |     description = "In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication. https://asianreviewofbooks.com/"  # noqa
29 |     publisher = "The Asian Review of Books"
30 |     category = "literature, books, reviews, Asia"
31 |     language = "en"
32 |     publication_type = "magazine"
33 |     masthead_url = "https://i2.wp.com/asianreviewofbooks.com/content/wp-content/uploads/2016/09/ARBwidelogo.png"
34 | 
35 |     oldest_article = 30
36 |     max_articles_per_feed = 30
37 | 
38 |     conversion_options = {
39 |         "comment": description,
40 |         "tags": category,
41 |         "publisher": publisher,
42 |         "language": language,
43 |     }
44 | 
45 |     remove_attributes = ["width", "height"]
46 |     keep_only_tags = [
47 |         dict(name="main"),
48 |     ]
49 |     remove_tags = [
50 |         dict(class_=["entry-meta", "sharedaddy", "jp-relatedposts", "entry-footer"])
51 |     ]
52 | 
53 |     extra_css = """
54 |     blockquote { font-size: 1.2rem; margin-left: 0; font-style: italic; }
55 |     .wp-caption-text, .entry-featured__caption { display: block; font-size: 0.8rem; margin-top: 0.2rem; }
56 |     """
57 | 
58 |     feeds = [("Articles", "http://asianreviewofbooks.com/content/feed/")]
59 | 
60 |     def populate_article_metadata(self, article, soup, _):
61 |         if not self.pub_date or self.pub_date < article.utctime:
62 |             self.pub_date = article.utctime
63 |             self.title = format_title(_name, self.pub_date)
64 | 
65 |     def preprocess_html(self, soup):
66 |         # find empty <p>
67 |         paras = soup.find_all("p")
68 |         for p in paras:
69 |             if not p.text.strip():
70 |                 p.decompose()
71 | 
72 |         quotes = soup.find_all("h5")
73 |         for q in quotes:
74 |             q.name = "blockquote"
75 | 
76 |         bio = soup.find_all("h6")
77 |         for b in bio:
78 |             b.name = "div"
79 | 
80 |         return soup
81 | 


--------------------------------------------------------------------------------
/recipes/bookforum-magazine.recipe.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from urllib.parse import urljoin
 4 | 
 5 | # custom include to share code between recipes
 6 | sys.path.append(os.environ["recipes_includes"])
 7 | from recipes_shared import BasicNewsrackRecipe
 8 | 
 9 | from mechanize import Request
10 | from calibre.web.feeds.news import BasicNewsRecipe
11 | 
12 | _name = "Bookforum"
13 | _issue_url = ""
14 | 
15 | 
16 | class BookforumMagazine(BasicNewsrackRecipe, BasicNewsRecipe):
17 |     title = _name
18 |     description = (
19 |         "Bookforum is an American book review magazine devoted to books and "
20 |         "the discussion of literature. https://www.bookforum.com/print"
21 |     )
22 |     language = "en"
23 |     __author__ = "ping"
24 |     publication_type = "magazine"
25 |     compress_news_images_auto_size = 8
26 | 
27 |     keep_only_tags = [dict(class_="blog-article")]
28 |     remove_tags = [dict(name=["af-share-toggle", "af-related-articles"])]
29 | 
30 |     extra_css = """
31 |     .blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; }
32 |     .blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
33 |     .blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; }
34 |     .blog-article__book-info { margin: 1rem 0; }
35 |     .article-image-container img, .blog-article__publication-media img {
36 |         display: block; max-width: 100%; height: auto;
37 |     }
38 |     .blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; }
39 |     """
40 | 
41 |     def preprocess_html(self, soup):
42 |         # strip away links that's not needed
43 |         for ele in soup.select(".blog-article__header a"):
44 |             ele.unwrap()
45 |         return soup
46 | 
47 |     def parse_index(self):
48 |         soup = self.index_to_soup(
49 |             _issue_url if _issue_url else "https://www.bookforum.com/print"
50 |         )
51 |         meta_ele = soup.find("meta", property="og:title")
52 |         if meta_ele:
53 |             self.title = f'{_name}: {meta_ele["content"]}'
54 | 
55 |         cover_ele = soup.find("img", class_="toc-issue__cover")
56 |         if cover_ele:
57 |             self.cover_url = urljoin(
58 |                 "https://www.bookforum.com",
59 |                 soup.find("img", class_="toc-issue__cover")["src"],
60 |             )
61 |             # use cover image to get a published date
62 |             br = self.get_browser()
63 |             cover_res = br.open_novisit(
64 |                 Request(self.cover_url, timeout=self.timeout, method="HEAD")
65 |             )
66 |             cover_res_lastupdated = cover_res.get("last-modified", default=None)
67 |             if cover_res_lastupdated:
68 |                 self.pub_date = self.parse_date(cover_res_lastupdated)
69 | 
70 |         articles = {}
71 |         for sect_ele in soup.find_all("div", class_="toc-articles__section"):
72 |             section_name = self.tag_to_string(
73 |                 sect_ele.find("a", class_="toc__anchor-links__link")
74 |             )
75 |             for article_ele in sect_ele.find_all("article"):
76 |                 title_ele = article_ele.find("h1")
77 |                 sub_title_ele = article_ele.find(class_="toc-article__subtitle")
78 |                 articles.setdefault(section_name, []).append(
79 |                     {
80 |                         "title": self.tag_to_string(title_ele),
81 |                         "url": article_ele.find("a", class_="toc-article__link")[
82 |                             "href"
83 |                         ],
84 |                         "description": self.tag_to_string(sub_title_ele)
85 |                         if sub_title_ele
86 |                         else "",
87 |                     }
88 |                 )
89 |         return articles.items()
90 | 


--------------------------------------------------------------------------------
/recipes/channelnewsasia.recipe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 https://github.com/ping/
 2 | #
 3 | # This software is released under the GNU General Public License v3.0
 4 | # https://opensource.org/licenses/GPL-3.0
 5 | 
 6 | """
 7 | channelnewsasia.com
 8 | """
 9 | import os
10 | import sys
11 | 
12 | # custom include to share code between recipes
13 | sys.path.append(os.environ["recipes_includes"])
14 | from recipes_shared import BasicNewsrackRecipe, format_title
15 | 
16 | from calibre.web.feeds.news import BasicNewsRecipe
17 | 
18 | _name = "ChannelNewsAsia"
19 | 
20 | 
21 | class ChannelNewsAsia(BasicNewsrackRecipe, BasicNewsRecipe):
22 |     title = _name
23 |     __author__ = "ping"
24 |     description = "CNA: Breaking News, Singapore News, World and Asia https://www.channelnewsasia.com/"
25 |     publisher = "Mediacorp"
26 |     category = "news, Singapore"
27 |     publication_type = "newspaper"
28 |     language = "en"
29 |     masthead_url = "https://www.channelnewsasia.com/sites/default/themes/mc_cna_theme/images/logo.png"
30 | 
31 |     oldest_article = 1
32 |     max_articles_per_feed = 25
33 | 
34 |     remove_tags_before = [dict(class_=["h1--page-title"])]
35 |     remove_tags_after = [dict(class_=["content"])]
36 |     remove_attributes = ["style"]
37 |     remove_tags = [
38 |         dict(
39 |             class_=[
40 |                 "js-popup-content",
41 |                 "referenced-card",
42 |                 "block--related-topics",
43 |                 "block-ad-entity",
44 |                 "block-block-content",
45 |                 "from-library",
46 |                 "block-field-blocknodearticlefield-author",  # author bio
47 |                 "mobile_author_card",  # author bio
48 |                 "block-field-blocknodearticlefield-text-to-speech",  # article AI audio
49 |             ]
50 |         ),
51 |         dict(name="div", attrs={"data-ad-entity": True}),
52 |         dict(name="div", attrs={"data-js-options": True}),
53 |         dict(name=["script", "noscript", "style", "svg"]),
54 |     ]
55 | 
56 |     extra_css = """
57 |     .figure__caption { font-size: 0.8rem; }
58 |     .figure__caption p { margin-top: 0.2rem; margin-bottom: 1rem; }
59 |     """
60 | 
61 |     feeds = [
62 |         # (
63 |         #     "Latest News",
64 |         #     "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml",
65 |         # ),
66 |         (
67 |             "Singapore",
68 |             "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml&category=10416",
69 |         ),
70 |         (
71 |             "Asia",
72 |             "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml&category=6511",
73 |         ),
74 |         (
75 |             "Business",
76 |             "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml&category=6936",
77 |         ),
78 |         # (
79 |         #     "Sport",
80 |         #     "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml&category=10296",
81 |         # ),
82 |         # (
83 |         #     "World",
84 |         #     "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml&category=6311",
85 |         # ),
86 |     ]
87 | 
88 |     def populate_article_metadata(self, article, __, _):
89 |         if (not self.pub_date) or article.utctime > self.pub_date:
90 |             self.pub_date = article.utctime
91 |             self.title = format_title(_name, article.utctime)
92 | 


--------------------------------------------------------------------------------
/recipes/fivebooks.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | 
  6 | """
  7 | fivebooks.com
  8 | """
  9 | import os
 10 | import re
 11 | import sys
 12 | from datetime import datetime
 13 | 
 14 | # custom include to share code between recipes
 15 | sys.path.append(os.environ["recipes_includes"])
 16 | from recipes_shared import BasicNewsrackRecipe, format_title
 17 | 
 18 | from calibre.web.feeds.news import BasicNewsRecipe
 19 | 
 20 | _name = "Five Books"
 21 | 
 22 | 
 23 | class FiveBooks(BasicNewsrackRecipe, BasicNewsRecipe):
 24 |     title = _name
 25 |     __author__ = "ping"
 26 |     description = "Expert book recommendations https://fivebooks.com/"
 27 |     language = "en"
 28 |     category = "books"
 29 |     publication_type = "blog"
 30 |     max_articles_per_feed = 15
 31 |     masthead_url = "https://fivebooks.com/app/themes/five-books/assets/images/logo.png"
 32 |     scale_news_images = (400, 400)
 33 | 
 34 |     remove_attributes = ["style", "font"]
 35 |     remove_tags = [
 36 |         dict(id=["interview-related", "buyfive"]),
 37 |         dict(
 38 |             class_=[
 39 |                 "listen-button",
 40 |                 "buy-button",
 41 |                 "book-ad",
 42 |                 "-newsletter",
 43 |                 "read-later-and-social",
 44 |                 "further-reading",
 45 |                 "show-for-medium-up",
 46 |                 "hide-for-small",
 47 |                 "book-list-mobile",
 48 |                 "-donate",
 49 |                 "update",
 50 |                 "social-buttons",
 51 |                 "ebook-button",
 52 |                 "book-links",
 53 |                 "bio-component",
 54 |             ]
 55 |         ),
 56 |         dict(name=["script", "noscript", "style"]),
 57 |     ]
 58 |     remove_tags_before = [dict(class_=["main-content"])]
 59 |     remove_tags_after = [dict(class_=["main-content"])]
 60 | 
 61 |     extra_css = """
 62 |     p.book-number { font-weight: bold; font-size: 1.2rem; }
 63 |     ul.book-covers { list-style: none; list-style-type: none; padding-left: 0; }
 64 |     ul.book-covers li { display: block; margin-bottom: 1rem; }
 65 |     ul.book-covers li .cover-wrap { display: inline-block; vertical-align: top; }
 66 |     ul.book-covers li p.book-number { display: none; }
 67 |     ul.book-covers li h2 { display: inline-block; font-size: 0.8rem; margin-left: 1rem; }
 68 |     p.pullquote { margin-left: 3pt; font-size: 0.85rem; color: #333333; font-style: italic; }
 69 |     """
 70 |     feeds = [
 71 |         ("Newest", "https://fivebooks.com/interviews/?order=newest"),
 72 |         ("Popular", "https://fivebooks.com/interviews/?order=popular"),
 73 |     ]
 74 | 
 75 |     def populate_article_metadata(self, article, soup, first):
 76 |         post_date = None
 77 |         dt = soup.find(class_="date")
 78 |         if not dt:
 79 |             dated_tag = soup.find(attrs={"data-post-modified-date": True})
 80 |             if dated_tag:
 81 |                 post_date = datetime.fromisoformat(dated_tag["data-post-modified-date"])
 82 |         else:
 83 |             # "%B %d, %Y"
 84 |             post_date = self.parse_date(dt.text)
 85 |         if post_date:
 86 |             if not self.pub_date or post_date > self.pub_date:
 87 |                 self.pub_date = post_date
 88 |                 self.title = format_title(_name, post_date)
 89 |             article.utctime = post_date
 90 | 
 91 |         description_tag = soup.find(attrs={"data-post-description": True})
 92 |         if description_tag:
 93 |             article.text_summary = description_tag["data-post-description"]
 94 | 
 95 |     def preprocess_raw_html(self, raw_html, url):
 96 |         soup = self.soup(raw_html)
 97 |         content = soup.find(class_="main-content")
 98 |         data = self.get_ld_json(soup, lambda d: d.get("@graph", []))
 99 |         if not data:
100 |             return raw_html
101 |         graph = data.get("@graph", [])
102 |         if not graph:
103 |             return raw_html
104 |         for g in graph:
105 |             if g.get("@type") != "WebPage":
106 |                 continue
107 |             content["data-post-modified-date"] = (
108 |                 g.get("dateModified") or g["datePublished"]
109 |             )
110 |             content["data-post-description"] = g.get("description", "")
111 |             break
112 |         return str(soup)
113 | 
114 |     def parse_index(self):
115 |         br = self.get_browser()
116 |         articles = {}
117 |         for feed_name, feed_url in self.feeds:
118 |             articles[feed_name] = []
119 |             raw_html = (
120 |                 br.open_novisit(feed_url, timeout=self.timeout).read().decode("utf-8")
121 |             )
122 |             soup = self.soup(raw_html)
123 |             interviews = soup.find_all(class_="library-page")
124 |             if self.max_articles_per_feed < len(interviews):
125 |                 interviews = interviews[: self.max_articles_per_feed]
126 |             for interview in interviews:
127 |                 heading = interview.find("h2")
128 |                 title = re.sub(r"\s{2,}", " ", heading.text)
129 |                 link = heading.find("a")
130 |                 articles[feed_name].append(
131 |                     {
132 |                         "title": title,
133 |                         "url": link["href"],
134 |                         "date": "",
135 |                         "description": "",
136 |                     }
137 |                 )
138 |         return articles.items()
139 | 


--------------------------------------------------------------------------------
/recipes/fivethirtyeight.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | 
  6 | """
  7 | fivethirtyeight.com is no more
  8 | """
  9 | import json
 10 | import os
 11 | import sys
 12 | from datetime import timezone
 13 | from html import unescape
 14 | 
 15 | # custom include to share code between recipes
 16 | sys.path.append(os.environ["recipes_includes"])
 17 | from recipes_shared import WordPressNewsrackRecipe, format_title, get_date_format
 18 | 
 19 | from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
 20 | from calibre.web.feeds.news import BasicNewsRecipe
 21 | 
 22 | _name = "FiveThirtyEight"
 23 | 
 24 | 
 25 | class FiveThirtyEight(WordPressNewsrackRecipe, BasicNewsRecipe):
 26 |     title = _name
 27 |     description = "FiveThirtyEight uses statistical analysis — hard numbers — to tell compelling stories about politics, sports, science, economics and culture. https://fivethirtyeight.com/"
 28 |     language = "en"
 29 |     __author__ = "ping"
 30 | 
 31 |     oldest_article = 14
 32 |     max_articles_per_feed = 10
 33 |     masthead_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/1/13/FiveThirtyEight_Logo.svg/1024px-FiveThirtyEight_Logo.svg.png"
 34 | 
 35 |     reverse_article_order = False
 36 |     remove_attributes = ["style", "width", "height"]
 37 |     remove_tags = [dict(class_=["video-title", "videoplayer", "video-footer"])]
 38 | 
 39 |     extra_css = """
 40 |     h1.article-title { font-size: 1.8rem; margin-bottom: 0.4rem; }
 41 |     h2.article-subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; font-weight: normal; }
 42 |     .single-header-metadata-wrap { margin-bottom: 1rem; }
 43 |     .single-header-metadata-wrap .vcard {
 44 |         font-weight: bold; color: #444; margin-right: 0.5rem;
 45 |         margin-top: 0; margin-bottom: 0;
 46 |     }
 47 |     .single-topic { margin-top: 0; margin-bottom: 0; }
 48 |     .single-featured-image img, p img, .wp-block-image img { margin-bottom: 0.8rem; max-width: 100%; }
 49 |     .single-featured-image .caption { display: block; font-size: 0.8rem; margin-top: 0.2rem; }
 50 |     """
 51 | 
 52 |     feeds = [
 53 |         (_name, "https://fivethirtyeight.com/"),
 54 |     ]
 55 | 
 56 |     def preprocess_raw_html(self, raw_html, url):
 57 |         # formulate the api response into html
 58 |         post = json.loads(raw_html)
 59 | 
 60 |         return f"""<html>
 61 |         <head><title>{post["title"]["rendered"]}</title></head>
 62 |         <body>
 63 |             <article data-og-link="{post["link"]}">
 64 |             {post["content"]["rendered"]}
 65 |             </article>
 66 |         </body></html>"""
 67 | 
 68 |     def parse_index(self):
 69 |         br = self.get_browser()
 70 |         articles = {}
 71 |         self.temp_dir = PersistentTemporaryDirectory()
 72 | 
 73 |         for feed_name, feed_url in self.feeds:
 74 |             custom_params = {
 75 |                 "rest_route": "/wp/v2/fte_features",
 76 |                 "espn_verticals_exclude": 67,  # Sports
 77 |                 "tags_exclude": 329557888,  # Podcasts
 78 |             }
 79 |             posts = self.get_posts(feed_url, self.oldest_article, custom_params, br)
 80 | 
 81 |             latest_post_date = None
 82 |             for p in posts:
 83 |                 post_update_dt = self.parse_date(
 84 |                     p["modified_gmt"], tz_info=timezone.utc
 85 |                 )
 86 |                 if not self.pub_date or post_update_dt > self.pub_date:
 87 |                     self.pub_date = post_update_dt
 88 |                 post_date = self.parse_date(p["date"], tz_info=None, as_utc=False)
 89 |                 if not latest_post_date or post_date > latest_post_date:
 90 |                     latest_post_date = post_date
 91 |                     self.title = format_title(_name, post_date)
 92 | 
 93 |                 section_name = f"{post_date:{get_date_format()}}"
 94 |                 if len(self.get_feeds()) > 1:
 95 |                     section_name = f"{feed_name}: {post_date:{get_date_format()}}"
 96 |                 if section_name not in articles:
 97 |                     articles[section_name] = []
 98 | 
 99 |                 with PersistentTemporaryFile(suffix=".json", dir=self.temp_dir) as f:
100 |                     f.write(json.dumps(p).encode("utf-8"))
101 | 
102 |                 verticals = []
103 |                 if p.get("espn_verticals"):
104 |                     try:
105 |                         for terms in p.get("_embedded", {}).get("wp:term", []):
106 |                             verticals.extend(
107 |                                 [
108 |                                     t["name"]
109 |                                     for t in terms
110 |                                     if t["taxonomy"] == "espn_verticals"
111 |                                 ]
112 |                             )
113 | 
114 |                     except (KeyError, TypeError):
115 |                         pass
116 | 
117 |                 articles[section_name].append(
118 |                     {
119 |                         "title": unescape(p["title"]["rendered"]) or "Untitled",
120 |                         "url": "file://" + f.name,
121 |                         "date": f"{post_date:{get_date_format()}}",
122 |                         "description": unescape(" / ".join(verticals)),
123 |                     }
124 |                 )
125 |         return articles.items()
126 | 


--------------------------------------------------------------------------------
/recipes/forbes-editors-picks.recipe.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import sys
  4 | from datetime import datetime, timezone, timedelta
  5 | from urllib.parse import urlencode
  6 | 
  7 | # custom include to share code between recipes
  8 | sys.path.append(os.environ["recipes_includes"])
  9 | from recipes_shared import BasicNewsrackRecipe, format_title
 10 | 
 11 | from calibre.web.feeds.news import BasicNewsRecipe
 12 | 
 13 | _name = "Forbes - Editor's Picks"
 14 | 
 15 | 
 16 | class ForbesEditorsPicks(BasicNewsrackRecipe, BasicNewsRecipe):
 17 |     title = _name
 18 |     __author__ = "ping"
 19 |     description = "Forbe's Editors' Picks https://www.forbes.com/editors-picks/"
 20 |     language = "en"
 21 | 
 22 |     oldest_article = 7
 23 |     max_articles_per_feed = 10
 24 | 
 25 |     scale_news_images = (800, 1200)
 26 |     timeout = 10
 27 |     simultaneous_downloads = 1
 28 | 
 29 |     keep_only_tags = [dict(name="article")]
 30 |     remove_attributes = ["style", "height", "width"]
 31 | 
 32 |     remove_tags = [
 33 |         dict(
 34 |             class_=[
 35 |                 "story-package__nav-wrapper",
 36 |                 "container__subnav--outer",
 37 |                 "edit-story-container",
 38 |                 "article-sharing",
 39 |                 "vert-pipe",
 40 |                 "short-bio",
 41 |                 "bottom-contrib-block",
 42 |                 "article-footer",
 43 |                 "sigfile",
 44 |                 "hidden",
 45 |                 "link-embed",
 46 |                 "subhead3-embed",
 47 |                 "recirc-module",
 48 |                 "seo",
 49 |                 "top-ad-container",
 50 |                 "speakr-wrapper",
 51 |             ]
 52 |         ),
 53 |         dict(name=["fbs-cordial", "fbs-ad", "svg"]),
 54 |     ]
 55 | 
 56 |     extra_css = """
 57 |     .top-label-wrapper a { margin-right: 0.5rem; color: #444; }
 58 |     .issue { font-weight: bold; margin-bottom: 0.2rem; }
 59 |     h1 { font-size: 1.8rem; margin-bottom: 0.4rem; }
 60 |     h2.subhead-embed { font-size: 1.2rem; font-style: italic; font-weight: normal; margin-bottom: 0.5rem; }
 61 |     h2.subhead-embed strong { font-weight: normal; }
 62 |     .top-contrib-block { margin-top: 0.5rem; font-weight: bold; color: #444; }
 63 |     .content-data { margin-bottom: 1rem; font-weight: normal; color: unset; }
 64 |     .image-embed p { font-size: 0.8rem; margin-top: 0.2rem; margin-bottom: 0.5rem; }
 65 |     .image-embed img {
 66 |         display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto;
 67 |         box-sizing: border-box;
 68 |     }
 69 |     blockquote { font-size: 1.25rem; margin-left: 0; text-align: center; }
 70 |     blockquote .text-align { font-size: 1rem; }
 71 |     """
 72 | 
 73 |     def preprocess_raw_html(self, raw_html, url):
 74 |         soup = self.soup(raw_html)
 75 |         article = soup.find("article")
 76 |         meta = self.get_ld_json(soup, lambda d: d.get("@type", "") == "NewsArticle")
 77 |         modified_date = meta.get("dateModified") or meta.get("datePublished")
 78 |         article["data-og-modified-date"] = modified_date
 79 |         for img in soup.find_all("progressive-image"):
 80 |             img.name = "img"
 81 |         return str(soup)
 82 | 
 83 |     def populate_article_metadata(self, article, soup, first):
 84 |         article_date = soup.find(attrs={"data-og-modified-date": True})
 85 |         if article_date:
 86 |             modified_date = datetime.fromisoformat(
 87 |                 article_date["data-og-modified-date"]
 88 |             ).replace(tzinfo=timezone.utc)
 89 |             if (not self.pub_date) or modified_date > self.pub_date:
 90 |                 self.pub_date = modified_date
 91 |                 self.title = format_title(_name, self.pub_date)
 92 |             article.utctime = modified_date
 93 |             article.localtime = modified_date
 94 | 
 95 |     def parse_index(self):
 96 |         br = self.get_browser()
 97 |         cutoff_date = datetime.utcnow().replace(tzinfo=timezone.utc) - timedelta(
 98 |             days=self.oldest_article
 99 |         )
100 |         articles = []
101 | 
102 |         date_param = 0
103 |         content_ids = None
104 |         end_feed = False
105 |         while not end_feed:
106 |             query = {
107 |                 "limit": 25,
108 |                 "sourceValue": "editors-pick",
109 |                 "streamSourceType": "badge",
110 |             }
111 |             if content_ids:
112 |                 query["ids"] = content_ids
113 |             if date_param:
114 |                 query["date"] = date_param
115 | 
116 |             endpoint = (
117 |                 f"https://www.forbes.com/simple-data/chansec/stream/?{urlencode(query)}"
118 |             )
119 | 
120 |             res = br.open_novisit(endpoint, timeout=self.timeout)
121 |             res_obj = json.loads(res.read().decode("utf-8"))
122 |             items = res_obj.get("blocks", {}).get("items", [])
123 |             if not items:
124 |                 break
125 | 
126 |             for item in items:
127 |                 item_date = datetime.utcfromtimestamp(item["date"] / 1000.0).replace(
128 |                     tzinfo=timezone.utc
129 |                 )
130 |                 if item_date < cutoff_date:
131 |                     end_feed = True
132 |                     break
133 | 
134 |                 if (not self.pub_date) or item_date > self.pub_date:
135 |                     self.pub_date = item_date
136 |                     self.title = format_title(_name, self.pub_date)
137 | 
138 |                 articles.append(
139 |                     {
140 |                         "title": item["title"],
141 |                         "url": item["url"],
142 |                         "description": item["description"],
143 |                         "date": item_date,
144 |                     }
145 |                 )
146 |                 date_param = item["date"]
147 |                 content_ids = item["id"]
148 |                 if len(articles) >= self.max_articles_per_feed:
149 |                     end_feed = True
150 |                     break
151 | 
152 |         return [(_name, articles)]
153 | 


--------------------------------------------------------------------------------
/recipes/foreign-policy.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | import json
  6 | import os
  7 | import sys
  8 | 
  9 | # custom include to share code between recipes
 10 | sys.path.append(os.environ["recipes_includes"])
 11 | from recipes_shared import WordPressNewsrackRecipe, get_datetime_format
 12 | 
 13 | from calibre.web.feeds.news import BasicNewsRecipe
 14 | 
 15 | _name = "Foreign Policy"
 16 | _issue_url = ""
 17 | 
 18 | 
 19 | class ForeignPolicy(WordPressNewsrackRecipe, BasicNewsRecipe):
 20 |     title = _name
 21 |     __author__ = "ping"
 22 |     description = (
 23 |         "Foreign Policy is an American news publication, founded in 1970 and "
 24 |         "focused on global affairs, current events, and domestic and international "
 25 |         "policy. It produces content daily on its website and app, and in four "
 26 |         "print issues annually. https://foreignpolicy.com/"
 27 |     )
 28 |     language = "en"
 29 |     publication_type = "blog"
 30 |     oldest_article = 7  # days
 31 |     masthead_url = "https://foreignpolicy.com/wp-content/themes/foreign-policy-2017/assets/src/images/logos/favicon-256.png"
 32 |     reverse_article_order = False
 33 |     compress_news_images_auto_size = 12
 34 | 
 35 |     remove_tags = [
 36 |         dict(
 37 |             class_=[
 38 |                 "Apple-converted-space",
 39 |                 "graphic-chatter",
 40 |                 "fp_choose_placement_related_posts",
 41 |                 "sidebar-box_right",
 42 |                 "newsletter-unit-signup",
 43 |                 "newsletter-unit-signup--shortcode-fallback",
 44 |             ]
 45 |         ),
 46 |         dict(style="height:0;opacity:0;"),
 47 |         dict(name=["noscript"]),
 48 |     ]
 49 | 
 50 |     extra_css = """
 51 |     .headline { font-size: 1.8rem; margin-bottom: 0.4rem; }
 52 |     .article-meta {  margin-top: 1rem; margin-bottom: 1rem; }
 53 |     .article-meta .author { font-weight: bold; color: #444; margin-right: 0.5rem; }
 54 |     .article-section { display: block; font-weight: bold; color: #444; }
 55 |     .article-img img, img.attachment-full { display: block; max-width: 100%; height: auto; }
 56 |     .article-img p, .wp-caption-text {
 57 |         font-size: 0.8rem; display: block; margin-top: 0.2rem;
 58 |     }
 59 |     .pull-quote-sidebar {
 60 |         display: block; text-align: center;
 61 |         margin-left: 0; margin-bottom: 0.4rem; font-size: 1.25rem;
 62 |     }
 63 |     """
 64 | 
 65 |     feeds = [
 66 |         (_name, "https://www.foreignpolicy.com/"),
 67 |     ]
 68 | 
 69 |     def preprocess_raw_html(self, raw_html, url):
 70 |         # formulate the api response into html
 71 |         post = json.loads(raw_html)
 72 |         if not post:
 73 |             self.abort_article()
 74 |         date_published_loc = self.parse_date(post["date"], tz_info=None, as_utc=False)
 75 |         post_authors = self.extract_authors(post)
 76 |         categories = self.extract_categories(post)
 77 | 
 78 |         soup = self.soup(
 79 |             f"""<html>
 80 |         <head><title>{post["title"]["rendered"]}</title></head>
 81 |         <body>
 82 |             <article data-og-link="{post["link"]}">
 83 |             {f'<span class="article-section">{" / ".join(categories)}</span>' if categories else ''}
 84 |             <h1 class="headline">{post["title"]["rendered"]}</h1>
 85 |             <div class="article-meta">
 86 |                 {f'<span class="author">{", ".join(post_authors)}</span>' if post_authors else ''}
 87 |                 <span class="published-dt">
 88 |                     {date_published_loc:{get_datetime_format()}}
 89 |                 </span>
 90 |             </div>
 91 |             </article>
 92 |         </body></html>"""
 93 |         )
 94 | 
 95 |         content = self.soup(post["content"]["rendered"])
 96 |         # FP doesn't use featuremedia, the first attachment is the lede image
 97 |         attachment_endpoint = (
 98 |             post.get("_links", {}).get("wp:attachment", [{}])[0].get("href")
 99 |         )
100 |         if attachment_endpoint:
101 |             attachment = next(
102 |                 iter(json.loads(self.index_to_soup(attachment_endpoint, raw=True))), {}
103 |             )
104 |             if attachment:
105 |                 lede = soup.new_tag("div", attrs={"class": "image-attachment"})
106 |                 img = soup.new_tag("img", attrs={"src": attachment["source_url"]})
107 |                 lede.append(img)
108 |                 if attachment.get("caption", {}).get("rendered"):
109 |                     caption = soup.new_tag("div", attrs={"class": "wp-caption-text"})
110 |                     caption.append(self.soup(attachment["caption"]["rendered"]))
111 |                     lede.append(caption)
112 |                 soup.body.article.append(lede)
113 | 
114 |         soup.body.article.append(content)
115 | 
116 |         for img in soup.find_all("img", attrs={"data-lazy-src": True}):
117 |             img["src"] = img["data-lazy-src"]
118 |             # also cleanup a little
119 |             for attribute in (
120 |                 "data-lazy-src",
121 |                 "data-lazy-srcset",
122 |                 "data-lazy-sizes",
123 |                 "data-src",
124 |                 "loading",
125 |             ):
126 |                 if img.get(attribute):
127 |                     del img[attribute]
128 | 
129 |         return str(soup)
130 | 
131 |     def parse_index(self):
132 |         articles = {}
133 |         br = self.get_browser()
134 |         for feed_name, feed_url in self.feeds:
135 |             articles = self.get_articles(
136 |                 articles, feed_name, feed_url, self.oldest_article, {}, br
137 |             )
138 |         return articles.items()
139 | 


--------------------------------------------------------------------------------
/recipes/harvard-intl-review.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | 
  6 | """
  7 | hir.harvard.edu
  8 | """
  9 | import os
 10 | import sys
 11 | from datetime import timezone
 12 | 
 13 | # custom include to share code between recipes
 14 | sys.path.append(os.environ["recipes_includes"])
 15 | from recipes_shared import (
 16 |     BasicNewsrackRecipe,
 17 |     format_title,
 18 |     get_date_format,
 19 |     get_datetime_format,
 20 | )
 21 | 
 22 | from calibre.web.feeds import Feed
 23 | from calibre.web.feeds.news import BasicNewsRecipe
 24 | 
 25 | _name = "Harvard International Review"
 26 | 
 27 | 
 28 | class HarvardInternationalReview(BasicNewsrackRecipe, BasicNewsRecipe):
 29 |     title = _name
 30 |     description = "The Harvard International Review is a quarterly magazine offering insight on international affairs from the perspectives of scholars, leaders, and policymakers. https://hir.harvard.edu/"
 31 |     language = "en"
 32 |     __author__ = "ping"
 33 |     publication_type = "magazine"
 34 |     oldest_article = 30  # days
 35 |     max_articles_per_feed = 30
 36 |     use_embedded_content = True
 37 |     masthead_url = (
 38 |         "https://hir.harvard.edu/content/images/2020/12/HIRlogo_crimson-4.png"
 39 |     )
 40 |     compress_news_images_auto_size = 7
 41 |     auto_cleanup = True
 42 |     timeout = 60
 43 | 
 44 |     extra_css = """
 45 |     .article-meta { margin-bottom: 1rem; }
 46 |     .article-meta .author { font-weight: bold; color: #444; }
 47 |     .article-meta .published-dt { margin-left: 0.5rem; }
 48 |     """
 49 | 
 50 |     feeds = [
 51 |         (_name, "https://hir.harvard.edu/rss/"),
 52 |     ]
 53 | 
 54 |     def populate_article_metadata(self, article, __, _):
 55 |         if (not self.pub_date) or article.utctime > self.pub_date:
 56 |             self.pub_date = article.utctime
 57 |             self.title = format_title(_name, article.utctime)
 58 | 
 59 |     def parse_feeds(self):
 60 |         # convert single parsed feed into date-sectioned feed
 61 |         # use this only if there is just 1 feed
 62 |         parsed_feeds = super().parse_feeds()
 63 |         if len(parsed_feeds or []) != 1:
 64 |             return parsed_feeds
 65 | 
 66 |         articles = []
 67 |         for feed in parsed_feeds:
 68 |             articles.extend(feed.articles)
 69 |         articles = sorted(articles, key=lambda a: a.utctime, reverse=True)
 70 |         new_feeds = []
 71 |         curr_feed = None
 72 |         parsed_feed = parsed_feeds[0]
 73 |         for i, a in enumerate(articles, start=1):
 74 |             date_published = a.utctime.replace(tzinfo=timezone.utc)
 75 |             article_index = f"{date_published:{get_date_format()}}"
 76 |             # add author and pub date
 77 |             soup = self.soup(a.content)
 78 |             header = None
 79 |             if soup.body.contents[0].name in ["h1", "h2", "h3"]:
 80 |                 header = soup.body.contents[0]
 81 |             meta = soup.new_tag("div", attrs={"class": "article-meta"})
 82 |             if a.author:
 83 |                 author_ele = soup.new_tag("span", attrs={"class": "author"})
 84 |                 author_ele.append(a.author)
 85 |                 meta.append(author_ele)
 86 |             pub_ele = soup.new_tag("span", attrs={"class": "published-dt"})
 87 |             pub_ele.append(f"{date_published:{get_datetime_format()}}")
 88 |             meta.append(pub_ele)
 89 |             if header:
 90 |                 header.insert_after(meta)
 91 |             else:
 92 |                 soup.body.insert(0, meta)
 93 |             a.content = soup.body.decode_contents()
 94 |             if i == 1:
 95 |                 curr_feed = Feed(log=parsed_feed.logger)
 96 |                 curr_feed.title = article_index
 97 |                 curr_feed.description = parsed_feed.description
 98 |                 curr_feed.image_url = parsed_feed.image_url
 99 |                 curr_feed.image_height = parsed_feed.image_height
100 |                 curr_feed.image_alt = parsed_feed.image_alt
101 |                 curr_feed.oldest_article = parsed_feed.oldest_article
102 |                 curr_feed.articles = []
103 |                 curr_feed.articles.append(a)
104 |                 continue
105 |             if curr_feed.title == article_index:
106 |                 curr_feed.articles.append(a)
107 |             else:
108 |                 new_feeds.append(curr_feed)
109 |                 curr_feed = Feed(log=parsed_feed.logger)
110 |                 curr_feed.title = article_index
111 |                 curr_feed.description = parsed_feed.description
112 |                 curr_feed.image_url = parsed_feed.image_url
113 |                 curr_feed.image_height = parsed_feed.image_height
114 |                 curr_feed.image_alt = parsed_feed.image_alt
115 |                 curr_feed.oldest_article = parsed_feed.oldest_article
116 |                 curr_feed.articles = []
117 |                 curr_feed.articles.append(a)
118 |             if i == len(articles):
119 |                 # last article
120 |                 new_feeds.append(curr_feed)
121 | 
122 |         return new_feeds
123 | 


--------------------------------------------------------------------------------
/recipes/japan-times.recipe.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Original at https://github.com/kovidgoyal/calibre/blob/4a01a799f19c4d0711d826ec7c79821b4ea690b6/recipes/japan_times.recipe
  5 | #
  6 | # [!] Ad-blocked, requires login
  7 | #
  8 | """
  9 | japantimes.co.jp
 10 | """
 11 | 
 12 | __license__ = "GPL v3"
 13 | __copyright__ = (
 14 |     "2008-2013, Darko Miletic <darko.miletic at gmail.com>. "
 15 |     "2022, Albert Aparicio Isarn <aaparicio at posteo.net>"
 16 | )
 17 | 
 18 | import os
 19 | import sys
 20 | from datetime import datetime
 21 | 
 22 | # custom include to share code between recipes
 23 | sys.path.append(os.environ["recipes_includes"])
 24 | from recipes_shared import BasicNewsrackRecipe, format_title, get_datetime_format
 25 | 
 26 | from calibre.web.feeds.news import BasicNewsRecipe
 27 | 
 28 | _name = "Japan Times"
 29 | 
 30 | 
 31 | class JapanTimes(BasicNewsrackRecipe, BasicNewsRecipe):
 32 |     title = _name
 33 |     __author__ = "Albert Aparicio Isarn (original recipe by Darko Miletic)"
 34 |     description = "The latest news from Japan Times, Japan's leading English-language daily newspaper"
 35 |     language = "en_JP"
 36 |     category = "news, politics, japan"
 37 |     publisher = "The Japan Times"
 38 |     oldest_article = 1
 39 |     max_articles_per_feed = 60
 40 |     publication_type = "newspaper"
 41 |     masthead_url = "https://cdn-japantimes.com/wp-content/themes/jt_theme/library/img/japantimes-logo-tagline.png"
 42 | 
 43 |     auto_cleanup = False
 44 | 
 45 |     conversion_options = {
 46 |         "comment": description,
 47 |         "tags": category,
 48 |         "publisher": publisher,
 49 |         "language": language,
 50 |     }
 51 | 
 52 |     remove_attributes = ["style"]
 53 |     remove_tags_before = [dict(name="main")]
 54 |     remove_tags_after = [dict(name="main")]
 55 | 
 56 |     remove_tags = [
 57 |         dict(name=["script", "style"]),
 58 |         dict(
 59 |             id=[
 60 |                 "tpModal",
 61 |                 "site_header",
 62 |                 "nav_anchor_container",
 63 |                 "nav",
 64 |                 "no_js_blocker",
 65 |                 "menu",
 66 |                 "taboola-below-article-thumbnails",
 67 |                 "disqus_thread",
 68 |                 "piano-recommend",
 69 |             ]
 70 |         ),
 71 |         dict(
 72 |             class_=[
 73 |                 "clearfix",
 74 |                 "nav_search",
 75 |                 "sub_menu_container",
 76 |                 "sidebar",
 77 |                 "ad",
 78 |                 "site_footer",
 79 |                 "post-attachments",
 80 |                 "post-keywords",
 81 |                 "newsletter-signup",
 82 |                 "DisplayAd",
 83 |                 "jt-subscribe-box",
 84 |                 "single-sns-area",
 85 |                 "single-upper-meta",
 86 |                 "article_footer_ad",
 87 |                 "note-to-commenters",
 88 |                 "note-to-non-commenters",
 89 |                 "pagetop-wrap",
 90 |                 "jt-related-stories",
 91 |             ]
 92 |         ),
 93 |     ]
 94 | 
 95 |     extra_css = """
 96 |     .article-meta {  margin-top: 1rem; margin-bottom: 1rem; }
 97 |     .article-meta .author { font-weight: bold; color: #444; margin-right: 0.5rem; }
 98 |     ul.slides { list-style: none; }
 99 |     .slide_image img { max-width: 100%; height: auto; }
100 |     .slide_image div, .inline_image div { font-size: 0.8rem; margin-top: 0.2rem; }
101 |     """
102 | 
103 |     feeds = [
104 |         ("Top Stories", "https://www.japantimes.co.jp/feed/topstories/"),
105 |         ("News", "https://www.japantimes.co.jp/news/feed/"),
106 |         ("Opinion", "https://www.japantimes.co.jp/opinion/feed/"),
107 |         ("Life", "https://www.japantimes.co.jp/life/feed/"),
108 |         ("Community", "https://www.japantimes.co.jp/community/feed/"),
109 |         ("Culture", "https://www.japantimes.co.jp/culture/feed/"),
110 |         # ("Sports", "https://www.japantimes.co.jp/sports/feed/"),
111 |     ]
112 | 
113 |     def preprocess_html(self, soup):
114 |         # "unbullet" the images
115 |         slides = soup.find(name="ul", attrs={"class": "slides"})
116 |         if slides:
117 |             for img_div in slides.find_all(attrs={"class": "slide_image"}):
118 |                 slides.insert_after(img_div.extract())
119 |             slides.decompose()
120 | 
121 |         # embed the lazy loaded images
122 |         lazy_loaded_images = soup.find_all(name="img", attrs={"data-src": True})
123 |         for img in lazy_loaded_images:
124 |             img["src"] = img["data-src"]
125 | 
126 |         # reformat the article meta
127 |         meta = soup.new_tag("div", attrs={"class": "article-meta"})
128 |         credit = soup.find(name="meta", attrs={"name": "cXenseParse:jat-credit"})
129 |         if credit:
130 |             sep = credit.get("data-separator", ",")
131 |             authors = credit["content"].split(sep)
132 |             author_ele = soup.new_tag("span", attrs={"class": "author"})
133 |             author_ele.append(",".join(authors))
134 |             meta.append(author_ele)
135 |         pub_date = soup.find(name="meta", attrs={"property": "article:published_time"})
136 |         if pub_date:
137 |             pub_date = datetime.fromisoformat(pub_date["content"])
138 |             pub_date_ele = soup.new_tag("span", attrs={"class": "published-date"})
139 |             pub_date_ele.append(f"{pub_date:{get_datetime_format()}}")
140 |             meta.append(pub_date_ele)
141 |             if (not self.pub_date) or pub_date > self.pub_date:
142 |                 self.pub_date = pub_date
143 |                 self.title = format_title(_name, pub_date)
144 |         soup.body.h1.insert_after(meta)
145 |         return soup
146 | 
147 |     def parse_feeds(self):
148 |         # because feed is not sorted by date
149 |         parsed_feeds = super().parse_feeds()
150 |         for feed in parsed_feeds:
151 |             articles = feed.articles
152 |             articles = sorted(articles, key=lambda a: a.utctime, reverse=True)
153 |             feed.articles = articles
154 |         return parsed_feeds
155 | 


--------------------------------------------------------------------------------
/recipes/joongangdaily.recipe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 https://github.com/ping/
 2 | #
 3 | # This software is released under the GNU General Public License v3.0
 4 | # https://opensource.org/licenses/GPL-3.0
 5 | 
 6 | """
 7 | koreajoongangdaily.joins.com
 8 | """
 9 | import os
10 | import sys
11 | 
12 | # custom include to share code between recipes
13 | sys.path.append(os.environ["recipes_includes"])
14 | from recipes_shared import BasicNewsrackRecipe, format_title
15 | 
16 | from calibre.web.feeds.news import BasicNewsRecipe
17 | 
18 | _name = "JoongAng Daily"
19 | 
20 | 
21 | class KoreaJoongAngDaily(BasicNewsrackRecipe, BasicNewsRecipe):
22 |     title = _name
23 |     description = "The Korea JoongAng Daily is an English-language daily published by the JoongAng Group, Korea’s leading media group, in association with The New York Times. https://koreajoongangdaily.joins.com/"
24 |     language = "en"
25 |     __author__ = "ping"
26 |     publication_type = "newspaper"
27 |     masthead_url = (
28 |         "https://koreajoongangdaily.joins.com/resources/images/common/logo.png"
29 |     )
30 |     use_embedded_content = True
31 |     auto_cleanup = True
32 |     compress_news_images_auto_size = 10
33 | 
34 |     oldest_article = 1  # days
35 |     max_articles_per_feed = 60
36 | 
37 |     extra_css = """
38 |     .caption { font-size: 0.8rem; margin: 0.5rem 0; }
39 |     """
40 | 
41 |     feeds = [
42 |         ("Korea JoongAng Daily", "https://koreajoongangdaily.joins.com/xmls/joins"),
43 |     ]
44 | 
45 |     def populate_article_metadata(self, article, __, _):
46 |         if (not self.pub_date) or article.utctime > self.pub_date:
47 |             self.pub_date = article.utctime
48 |             self.title = format_title(_name, article.utctime)
49 | 
50 |     def parse_feeds(self):
51 |         return self.group_feeds_by_date(timezone_offset_hours=9)  # Seoul time
52 | 


--------------------------------------------------------------------------------
/recipes/kirkus.recipe.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | from urllib.parse import urljoin
  4 | 
  5 | # custom include to share code between recipes
  6 | sys.path.append(os.environ["recipes_includes"])
  7 | from recipes_shared import BasicNewsrackRecipe
  8 | 
  9 | from calibre.web.feeds.news import BasicNewsRecipe
 10 | 
 11 | _name = "Kirkus"
 12 | 
 13 | 
 14 | class Kirkus(BasicNewsrackRecipe, BasicNewsRecipe):
 15 |     title = _name
 16 |     description = "Kirkus Reviews is an American book review magazine founded in 1933 by Virginia Kirkus. The magazine is headquartered in New York City. https://www.kirkusreviews.com/magazine/current/"
 17 |     language = "en"
 18 |     __author__ = "ping"
 19 |     publication_type = "magazine"
 20 |     masthead_url = (
 21 |         "https://d1fd687oe6a92y.cloudfront.net/img/kir_images/logo/kirkus-nav-logo.svg"
 22 |     )
 23 |     max_articles_per_feed = 99
 24 |     compress_news_images_auto_size = 6
 25 |     keep_only_tags = [
 26 |         dict(
 27 |             class_=[
 28 |                 "article-author",
 29 |                 "article-author-img-start",
 30 |                 "article-author-description-start",
 31 |                 "single-review",
 32 |             ]
 33 |         )
 34 |     ]
 35 |     remove_tags = [
 36 |         dict(
 37 |             class_=[
 38 |                 "sidebar-content",
 39 |                 "article-social-share-desktop-first",
 40 |                 "article-social-share-desktop-pagination",
 41 |                 "article-social-share-mobile",
 42 |                 "share-review-text",
 43 |                 "like-dislike-article",
 44 |                 "rate-this-book-text",
 45 |                 "input-group",
 46 |                 "user-comments",
 47 |                 "show-all-response-text",
 48 |                 "button-row",
 49 |                 "hide-on-mobile",
 50 |                 "related-article",
 51 |                 "breadcrumb-row",
 52 |                 "shop-now-dropdown",
 53 |             ]
 54 |         )
 55 |     ]
 56 |     remove_tags_after = [dict(class_="single-review")]
 57 | 
 58 |     extra_css = """
 59 |     .image-container img { max-width: 100%; height: auto; margin-bottom: 0.2rem; }
 60 |     .photo-caption { font-size: 0.8rem; margin-bottom: 0.5rem; display: block; }
 61 |     .book-review-img .image-container { text-align: center; }
 62 |     .book-rating-module .description-title { font-size: 1.25rem; margin-left: 0; text-align: center; }
 63 |     """
 64 | 
 65 |     def preprocess_html(self, soup):
 66 |         h1 = soup.find(class_="article-title")
 67 |         book_cover = soup.find("ul", class_="book-review-img")
 68 |         if book_cover:
 69 |             for li in book_cover.find_all("li"):
 70 |                 li.name = "div"
 71 |             book_cover.name = "div"
 72 |             if h1:
 73 |                 book_cover.insert_before(h1.extract())
 74 | 
 75 |         return soup
 76 | 
 77 |     def parse_index(self):
 78 |         issue_url = "https://www.kirkusreviews.com/magazine/current/"
 79 |         soup = self.index_to_soup(issue_url)
 80 |         issue = soup.find(name="article", class_="issue-container")
 81 |         cover_img = issue.select(".issue-header .cover-image img")
 82 |         if cover_img:
 83 |             self.cover_url = cover_img[0]["src"]
 84 | 
 85 |         h1 = issue.find("h1")
 86 |         if h1:
 87 |             edition = self.tag_to_string(h1)
 88 |             self.title = f"{_name}: {edition}"
 89 |             # Example: April 1, 2023 "%B %d, %Y"
 90 |             self.pub_date = self.parse_date(edition)
 91 | 
 92 |         articles = {}
 93 |         for book_ele in soup.find_all(name="div", class_="issue-featured-book"):
 94 |             link = book_ele.find("a")
 95 |             if not link:
 96 |                 continue
 97 |             section = self.tag_to_string(book_ele.find("h3")).upper()
 98 |             articles.setdefault(section, []).append(
 99 |                 {"url": urljoin(issue_url, link["href"]), "title": link["title"]}
100 |             )
101 | 
102 |         for post_ele in issue.select("div.issue-more-posts ul li div.lead-text"):
103 |             link = post_ele.find("a")
104 |             if not link:
105 |                 continue
106 |             section = self.tag_to_string(post_ele.find(class_="lead-text-type")).upper()
107 |             articles.setdefault(section, []).append(
108 |                 {
109 |                     "url": urljoin(issue_url, link["href"]),
110 |                     "title": self.tag_to_string(link),
111 |                 }
112 |             )
113 | 
114 |         for section_ele in issue.select("section.reviews-section"):
115 |             section_articles = []
116 |             for review in section_ele.select("ul li.starred"):
117 |                 link = review.select("h4 a")
118 |                 if not link:
119 |                     continue
120 |                 description = review.find("p")
121 |                 section_articles.append(
122 |                     {
123 |                         "url": urljoin(issue_url, link[0]["href"]),
124 |                         "title": self.tag_to_string(link[0]),
125 |                         "description": ""
126 |                         if not description
127 |                         else self.tag_to_string(description),
128 |                     }
129 |                 )
130 |             if not section_articles:
131 |                 continue
132 |             section = self.tag_to_string(section_ele.find("h3")).upper()
133 |             if section not in articles:
134 |                 articles[section] = []
135 |             articles.setdefault(section, []).extend(section_articles)
136 | 
137 |         return articles.items()
138 | 


--------------------------------------------------------------------------------
/recipes/knowable-magazine.recipe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 https://github.com/ping/
 2 | #
 3 | # This software is released under the GNU General Public License v3.0
 4 | # https://opensource.org/licenses/GPL-3.0
 5 | 
 6 | """
 7 | knowablemagazine.org
 8 | """
 9 | import os
10 | import sys
11 | 
12 | # custom include to share code between recipes
13 | sys.path.append(os.environ["recipes_includes"])
14 | from recipes_shared import BasicNewsrackRecipe, format_title
15 | 
16 | from calibre.web.feeds.news import BasicNewsRecipe
17 | 
18 | _name = "Knowable Magazine"
19 | 
20 | 
21 | class KnowableMagazine(BasicNewsrackRecipe, BasicNewsRecipe):
22 |     title = _name
23 |     __author__ = "ping"
24 |     description = (
25 |         "Knowable Magazine explores the real-world significance of scholarly work "
26 |         "through a journalistic lens. We report on the current state of play across "
27 |         "a wide variety of fields — from agriculture to high-energy physics; "
28 |         "biochemistry to water security; the origins of the universe to psychology. "
29 |         "https://knowablemagazine.org/"
30 |     )
31 |     masthead_url = "https://knowablemagazine.org/pb-assets/knowable-assets/images/logo-1586554394067.svg"
32 |     language = "en"
33 |     publication_type = "magazine"
34 |     timeout = 60
35 | 
36 |     oldest_article = 45  # days
37 |     max_articles_per_feed = 15
38 |     scale_news_images = (800, 1200)
39 | 
40 |     keep_only_tags = [
41 |         dict(class_=["article-container"]),
42 |     ]
43 |     remove_attributes = ["style"]
44 |     remove_tags = [
45 |         dict(name=["script", "style", "svg"]),
46 |         dict(attrs={"data-widget-def": True}),
47 |         dict(id=["newsletter-promo-item"]),
48 |         dict(
49 |             class_=[
50 |                 "promo",
51 |                 "ember-view",
52 |                 "promo-article-dark",
53 |                 "share-icons-box",
54 |                 "article-tags",
55 |                 "article-republish",
56 |             ]
57 |         ),
58 |     ]
59 | 
60 |     extra_css = """
61 |     h1 { font-size: 1.8rem; margin-bottom: 0.4rem; }
62 |     .article-subhead { font-size: 1.2rem; font-style: italic; font-weight: normal; margin-bottom: 0.5rem; margin-top: 0; }
63 |     .article-byline {  margin-top: 0.5rem; margin-bottom: 1rem; }
64 |     .article-byline .author-byline {  font-weight: bold; color: #444; display: inline-block; }
65 |     .article-byline .pub-date {  display: inline-block; margin-left: 0.5rem; }
66 |     .article-image img {
67 |         display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto;
68 |         box-sizing: border-box;
69 |     }
70 |     .article-image .caption { font-size: 0.8rem; }
71 |     .pull-quote { font-size: 1.25rem; margin-left: 0; text-align: center; }
72 |     """
73 | 
74 |     feeds = [
75 |         (_name, "https://knowablemagazine.org/rss"),
76 |     ]
77 | 
78 |     def populate_article_metadata(self, article, __, _):
79 |         if (not self.pub_date) or article.utctime > self.pub_date:
80 |             self.pub_date = article.utctime
81 |             self.title = format_title(_name, article.utctime)
82 | 
83 |     def parse_feeds(self):
84 |         return self.group_feeds_by_date(timezone_offset_hours=-7)  # PST
85 | 


--------------------------------------------------------------------------------
/recipes/korea-herald.recipe.py:
--------------------------------------------------------------------------------
 1 | """
 2 | koreaherald.com
 3 | """
 4 | __license__ = "GPL v3"
 5 | __copyright__ = "2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>"
 6 | 
 7 | import os
 8 | import re
 9 | import sys
10 | 
11 | # custom include to share code between recipes
12 | sys.path.append(os.environ["recipes_includes"])
13 | from recipes_shared import BasicNewsrackRecipe, format_title
14 | 
15 | from calibre.web.feeds.news import BasicNewsRecipe
16 | 
17 | _name = "Korea Herald"
18 | 
19 | 
20 | class KoreaHerald(BasicNewsrackRecipe, BasicNewsRecipe):
21 |     title = _name
22 |     language = "en"
23 |     description = "Korea Herald News articles https://koreaherald.com/"
24 |     __author__ = "Seongkyoun Yoo"
25 |     publication_type = "newspaper"
26 |     masthead_url = "https://res.heraldm.com/new_201209/images/common/logo.gif"
27 | 
28 |     oldest_article = 1
29 |     max_articles_per_feed = 25
30 | 
31 |     keep_only_tags = [dict(class_="news_content")]
32 |     remove_attributes = ["style", "align"]
33 |     remove_tags = [
34 |         dict(name=["script", "style"]),
35 |         dict(class_=["news_btn_wrap", "news_journalist_area"]),
36 |     ]
37 | 
38 |     extra_css = """
39 |     h1.news_title { font-size: 1.8rem; margin-bottom: 0.4rem; }
40 |     h2.news_title { font-size: 1.2rem; font-style: italic; font-weight: normal; margin-bottom: 0.8rem; }
41 |     p.news_date { margin-top: 0.2rem; }
42 |     .img_caption { font-size: 0.8rem; margin-top: 0.2rem; display: block; }
43 |     """
44 | 
45 |     feeds = [
46 |         ("National", "http://www.koreaherald.com/common/rss_xml.php?ct=102"),
47 |         ("Business", "http://www.koreaherald.com/common/rss_xml.php?ct=103"),
48 |         ("Finance", "http://www.koreaherald.com/common/rss_xml.php?ct=305"),
49 |         ("Life & Style", "http://www.koreaherald.com/common/rss_xml.php?ct=104"),
50 |         ("Entertainment", "http://www.koreaherald.com/common/rss_xml.php?ct=105"),
51 |         # ("Sports", "http://www.koreaherald.com/common/rss_xml.php?ct=106"),
52 |         ("World", "http://www.koreaherald.com/common/rss_xml.php?ct=107"),
53 |         ("Opinion", "http://www.koreaherald.com/common/rss_xml.php?ct=108"),
54 |     ]
55 | 
56 |     def populate_article_metadata(self, article, __, _):
57 |         if (not self.pub_date) or article.utctime > self.pub_date:
58 |             self.pub_date = article.utctime
59 |             self.title = format_title(_name, article.utctime)
60 | 
61 |     def preprocess_html(self, soup):
62 |         byline_date = soup.find(attrs={"class": "view_tit_byline_r"})
63 |         if byline_date:
64 |             # format the published/updated date properly
65 |             date_elements = []
66 |             # Published : Apr 18, 2022 - 16:41       Updated : Apr 18, 2022 - 16:41
67 |             date_re = r"(Published|Updated).+?\:.+?(?P<date>[a-z]{3}\s\d+),.+?(?P<time>\d+\:\d+)"
68 |             for m in re.findall(date_re, byline_date.text, re.IGNORECASE):
69 |                 date_ele = soup.new_tag("span")
70 |                 date_ele.append(" ".join(m))
71 |                 date_elements.append(date_ele)
72 |             byline_date.clear()
73 |             for e in date_elements:
74 |                 byline_date.append(e)
75 |         return soup
76 | 
77 |     def print_version(self, url):
78 |         # Patch messed up url from rss
79 |         # Example: https://www.koreaherald.com/view.php?ud=/view.php?ud=20230814000600
80 |         return url.replace("?ud=/view.php", "")
81 | 


--------------------------------------------------------------------------------
/recipes/lithub.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | 
  6 | import json
  7 | import os
  8 | import sys
  9 | 
 10 | # custom include to share code between recipes
 11 | sys.path.append(os.environ["recipes_includes"])
 12 | from recipes_shared import WordPressNewsrackRecipe, get_datetime_format
 13 | 
 14 | from calibre.web.feeds.news import BasicNewsRecipe
 15 | 
 16 | _name = "Literary Hub"
 17 | 
 18 | 
 19 | class LitHub(WordPressNewsrackRecipe, BasicNewsRecipe):
 20 |     title = _name
 21 |     __author__ = "ping"
 22 |     description = (
 23 |         "Lit Hub is a site readers can rely on for smart, engaged, entertaining writing "
 24 |         "about all things books. https://lithub.com/"
 25 |     )
 26 |     language = "en"
 27 |     publication_type = "blog"
 28 |     masthead_url = (
 29 |         "https://s26162.pcdn.co/wp-content/themes/rigel/images/social_logo.png"
 30 |     )
 31 |     reverse_article_order = False
 32 |     compress_news_images_auto_size = 10
 33 | 
 34 |     oldest_article = 7  # days
 35 | 
 36 |     remove_tags = [
 37 |         dict(name=["script", "noscript", "style"]),
 38 |     ]
 39 | 
 40 |     extra_css = """
 41 |     .headline { font-size: 1.8rem; margin-bottom: 0.4rem; }
 42 |     .article-meta {  margin-top: 1rem; margin-bottom: 1rem; }
 43 |     .article-meta .author { font-weight: bold; color: #444; margin-right: 0.5rem; }
 44 |     .article-section { display: block; font-weight: bold; color: #444; }
 45 |     .article-img img, .block--article-image__image img, .wp-caption img { display: block; max-width: 100%; height: auto; }
 46 |     .article-img .caption, .block--article-image__caption, .wp-caption-text {
 47 |         font-size: 0.8rem; display: block; margin-top: 0.2rem;
 48 |     }
 49 | 
 50 |     .pullquote, blockquote { text-align: center; margin-left: 0; margin-bottom: 0.4rem; font-size: 1.25rem; }
 51 |     """
 52 | 
 53 |     feeds = [
 54 |         (_name, "https://www.lithub.com/wp-json/wp/v2/posts"),
 55 |     ]
 56 | 
 57 |     def _extract_featured_media(self, post, soup):
 58 |         """
 59 |         Include featured media with post content.
 60 | 
 61 |         :param post: post dict
 62 |         :param post_content: Extracted post content
 63 |         :return:
 64 |         """
 65 |         post_soup = self.soup(post["content"]["rendered"])
 66 |         for img in post_soup.find_all("img", attrs={"data-src": True}):
 67 |             img["src"] = img["data-src"]
 68 |         post_content = str(post_soup)
 69 |         if not post.get("featured_media"):
 70 |             return post_content
 71 | 
 72 |         feature_media_css = f"wp-image-{post['featured_media']}"
 73 |         if feature_media_css in post_content:
 74 |             # check already not embedded
 75 |             return post_content
 76 | 
 77 |         for feature_info in post.get("_embedded", {}).get("wp:featuredmedia", []):
 78 |             # put feature media at the start of the post
 79 |             if feature_info.get("source_url"):
 80 |                 # higher-res
 81 |                 container_ele = soup.new_tag("p", attrs={"class": "article-img"})
 82 |                 img_ele = soup.new_tag("img", src=feature_info["source_url"])
 83 |                 container_ele.append(img_ele)
 84 |                 if feature_info.get("title", {}).get("rendered"):
 85 |                     cap_ele = soup.new_tag("span", attrs={"class": "caption"})
 86 |                     cap_ele.append(feature_info["title"]["rendered"])
 87 |                     container_ele.append(cap_ele)
 88 |                 post_content = str(container_ele) + post_content
 89 |             else:
 90 |                 post_content = (
 91 |                     feature_info.get("description", {}).get("rendered", "")
 92 |                     + post_content
 93 |                 )
 94 |         return post_content
 95 | 
 96 |     def preprocess_raw_html(self, raw_html, url):
 97 |         # formulate the api response into html
 98 |         post = json.loads(raw_html)
 99 |         date_published_loc = self.parse_date(post["date"], tz_info=None, as_utc=False)
100 |         post_authors = self.extract_authors(post)
101 |         categories = self.extract_categories(post)
102 | 
103 |         soup = self.soup(
104 |             f"""<html>
105 |         <head><title>{post["title"]["rendered"]}</title></head>
106 |         <body>
107 |             <article data-og-link="{post["link"]}">
108 |             {f'<span class="article-section">{" / ".join(categories)}</span>' if categories else ''}
109 |             <h1 class="headline">{post["title"]["rendered"]}</h1>
110 |             <div class="article-meta">
111 |                 {f'<span class="author">{", ".join(post_authors)}</span>' if post_authors else ''}
112 |                 <span class="published-dt">
113 |                     {date_published_loc:{get_datetime_format()}}
114 |                 </span>
115 |             </div>
116 |             </article>
117 |         </body></html>"""
118 |         )
119 |         soup.body.article.append(self.soup(self._extract_featured_media(post, soup)))
120 |         return str(soup)
121 | 
122 |     def parse_index(self):
123 |         articles = {}
124 |         br = self.get_browser()
125 |         for feed_name, feed_url in self.feeds:
126 |             custom_params = {"rest_route": None, "categories_exclude": "43110"}
127 |             articles = self.get_articles(
128 |                 articles, feed_name, feed_url, self.oldest_article, custom_params, br
129 |             )
130 |         return articles.items()
131 | 


--------------------------------------------------------------------------------
/recipes/logos/knowable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ping/newsrack/26deac07801eda7aaf33395f0f5e3efc43c4507b/recipes/logos/knowable.png


--------------------------------------------------------------------------------
/recipes/logos/thirdpole.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ping/newsrack/26deac07801eda7aaf33395f0f5e3efc43c4507b/recipes/logos/thirdpole.png


--------------------------------------------------------------------------------
/recipes/longreads-features.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | 
  6 | import json
  7 | import os
  8 | import sys
  9 | 
 10 | # custom include to share code between recipes
 11 | sys.path.append(os.environ["recipes_includes"])
 12 | from recipes_shared import WordPressNewsrackRecipe, get_datetime_format
 13 | 
 14 | from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes
 15 | 
 16 | _name = "Longreads Features"
 17 | 
 18 | 
 19 | class LongreadsFeatures(WordPressNewsrackRecipe, BasicNewsRecipe):
 20 |     title = _name
 21 |     __author__ = "ping"
 22 |     description = "Sharing the best nonfiction storytelling on the web since 2011. https://longreads.com/features/"
 23 |     language = "en"
 24 |     oldest_article = 30  # days
 25 |     masthead_url = "https://i0.wp.com/longreads.com/wp-content/uploads/2022/08/longreads-logo-1.png?w=600&ssl=1"
 26 |     reverse_article_order = False
 27 | 
 28 |     remove_tags = [
 29 |         prefixed_classes("subscribe__"),
 30 |         dict(name=["script", "noscript", "style"]),
 31 |         dict(class_=["wp-block-group"]),
 32 |     ]
 33 | 
 34 |     extra_css = """
 35 |     .headline { font-size: 1.8rem; margin-bottom: 0.4rem; }
 36 |     .article-meta {  margin-top: 1rem; margin-bottom: 1rem; }
 37 |     .article-meta .author { font-weight: bold; color: #444; margin-right: 0.5rem; }
 38 |     .article-section { display: block; font-weight: bold; color: #444; }
 39 |     .article-img img, .block--article-image__image img, .wp-caption img { display: block; max-width: 100%; height: auto; }
 40 |     .article-img .caption, .block--article-image__caption, .wp-caption-text {
 41 |         font-size: 0.8rem; display: block; margin-top: 0.2rem;
 42 |     }
 43 |     p.has-text-align-center { text-align: center; }
 44 |     blockquote.wp-block-quote, .wp-block-pullquote blockquote { text-align: center; margin-left: 0; margin-bottom: 0.4rem; font-size: 1.25rem; }
 45 |     """
 46 | 
 47 |     feeds = [
 48 |         (_name, "https://longreads.com/wp-json/wp/v2/posts"),
 49 |     ]
 50 | 
 51 |     def _extract_featured_media(self, post, soup):
 52 |         """
 53 |         Include featured media with post content.
 54 | 
 55 |         :param post: post dict
 56 |         :param post_content: Extracted post content
 57 |         :return:
 58 |         """
 59 |         post_soup = self.soup(post["content"]["rendered"])
 60 |         for img in post_soup.find_all("img", attrs={"data-src": True}):
 61 |             img["src"] = img["data-src"]
 62 |         post_content = str(post_soup)
 63 |         if not post.get("featured_media"):
 64 |             return post_content
 65 | 
 66 |         feature_media_css = f"wp-image-{post['featured_media']}"
 67 |         if feature_media_css in post_content:
 68 |             # check already not embedded
 69 |             return post_content
 70 | 
 71 |         for feature_info in post.get("_embedded", {}).get("wp:featuredmedia", []):
 72 |             # put feature media at the start of the post
 73 |             if feature_info.get("source_url"):
 74 |                 # higher-res
 75 |                 container_ele = soup.new_tag("p", attrs={"class": "article-img"})
 76 |                 img_ele = soup.new_tag("img", src=feature_info["source_url"])
 77 |                 container_ele.append(img_ele)
 78 |                 if feature_info.get("title", {}).get("rendered"):
 79 |                     cap_ele = soup.new_tag("span", attrs={"class": "caption"})
 80 |                     cap_ele.append(feature_info["title"]["rendered"])
 81 |                     container_ele.append(cap_ele)
 82 |                 post_content = str(container_ele) + post_content
 83 |             else:
 84 |                 post_content = (
 85 |                     feature_info.get("description", {}).get("rendered", "")
 86 |                     + post_content
 87 |                 )
 88 |         return post_content
 89 | 
 90 |     def preprocess_raw_html(self, raw_html, url):
 91 |         # formulate the api response into html
 92 |         post = json.loads(raw_html)
 93 |         date_published_loc = self.parse_date(post["date"], tz_info=None, as_utc=False)
 94 |         post_authors = self.extract_authors(post)
 95 |         categories = self.extract_categories(post)
 96 | 
 97 |         soup = self.soup(
 98 |             f"""<html>
 99 |         <head><title>{post["title"]["rendered"]}</title></head>
100 |         <body>
101 |             <article data-og-link="{post["link"]}">
102 |             {f'<span class="article-section">{" / ".join(categories)}</span>' if categories else ''}
103 |             <h1 class="headline">{post["title"]["rendered"]}</h1>
104 |             <div class="article-meta">
105 |                 {f'<span class="author">{", ".join(post_authors)}</span>' if post_authors else ''}
106 |                 <span class="published-dt">
107 |                     {date_published_loc:{get_datetime_format()}}
108 |                 </span>
109 |             </div>
110 |             </article>
111 |         </body></html>"""
112 |         )
113 |         soup.body.article.append(self.soup(self._extract_featured_media(post, soup)))
114 |         return str(soup)
115 | 
116 |     def parse_index(self):
117 |         articles = {}
118 |         br = self.get_browser()
119 |         for feed_name, feed_url in self.feeds:
120 |             custom_params = {
121 |                 "rest_route": None,
122 |                 "categories": ",".join(
123 |                     [str(i) for i in [6, 15974]]
124 |                 ),  # 6 - Essays, 15974 - Features
125 |                 "tags_exclude": ",".join([str(i) for i in [3643]]),  # 3643 - Podcast
126 |             }
127 |             articles = self.get_articles(
128 |                 articles, feed_name, feed_url, self.oldest_article, custom_params, br
129 |             )
130 |         return articles.items()
131 | 


--------------------------------------------------------------------------------
/recipes/mit-press-reader.recipe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 https://github.com/ping/
 2 | #
 3 | # This software is released under the GNU General Public License v3.0
 4 | # https://opensource.org/licenses/GPL-3.0
 5 | import os
 6 | import sys
 7 | 
 8 | # custom include to share code between recipes
 9 | sys.path.append(os.environ["recipes_includes"])
10 | from recipes_shared import BasicNewsrackRecipe, format_title, get_datetime_format
11 | 
12 | from calibre.utils.date import parse_date
13 | from calibre.web.feeds.news import BasicNewsRecipe
14 | 
15 | _name = "The MIT Press Reader"
16 | 
17 | 
18 | class MITPressReader(BasicNewsrackRecipe, BasicNewsRecipe):
19 |     title = _name
20 |     __author__ = "ping"
21 |     description = "Thought-provoking excerpts, interviews and essays backed by academic rigor written by MIT Press authors. https://thereader.mitpress.mit.edu/"
22 |     language = "en"
23 |     publication_type = "blog"
24 |     oldest_article = 30  # days
25 |     compress_news_images = False
26 |     masthead_url = "https://thereader.mitpress.mit.edu/wp-content/themes/ta/img/log.png"
27 |     reverse_article_order = False
28 | 
29 |     keep_only_tags = [dict(class_=["article-entry"])]
30 |     remove_tags = [
31 |         dict(name=["script", "noscript", "style"]),
32 |         dict(
33 |             class_=[
34 |                 "ma-top-shares-right",
35 |                 "ma-txt-customizer-cont",
36 |                 "social-cont",
37 |                 "ma-related-posts",
38 |                 "tags-cont",
39 |             ]
40 |         ),
41 |     ]
42 | 
43 |     extra_css = """
44 |     h1 { font-size: 1.8rem; margin-bottom: 0.4rem; }
45 |     .ma-subheading { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
46 |     .ma-authors { font-weight: bold; color: #444; margin-right: 0.5rem; }
47 |     .article-section { display: block; font-weight: bold; color: #444; }
48 |     .wp-block-image { margin-bottom: 0.5rem; }
49 |     .wp-block-image img {
50 |         display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto;
51 |         box-sizing: border-box;
52 |     }
53 |     .wp-block-image div, .image-credit { font-size: 0.8rem; }
54 |     .wp-block-pullquote blockquote { font-size: 1.25rem; margin-left: 0; text-align: center; }
55 |     .wp-block-pullquote blockquote cite { font-size: 1rem; margin-left: 0; text-align: center; }
56 |     """
57 | 
58 |     feeds = [
59 |         (_name, "https://thereader.mitpress.mit.edu/feed/"),
60 |     ]
61 | 
62 |     def postprocess_html(self, soup, _):
63 |         time_ele = soup.find(name="time", attrs={"datetime": True})
64 |         post_date = parse_date(time_ele["datetime"])
65 |         if (not self.pub_date) or post_date > self.pub_date:
66 |             self.pub_date = post_date
67 |             self.title = format_title(_name, post_date)
68 |         athor_ele = soup.find(class_="ma-top-shares-left")
69 |         if athor_ele:
70 |             post_date_ele = soup.new_tag("div")
71 |             post_date_ele.append(f"{post_date:{get_datetime_format()}}")
72 |             athor_ele.append(post_date_ele)
73 |             time_parent_ele = soup.find("div", class_="author-post-cont")
74 |             if time_parent_ele:
75 |                 time_parent_ele.decompose()
76 |         return soup
77 | 


--------------------------------------------------------------------------------
/recipes/mit-tech-review.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | 
  6 | import json
  7 | import os
  8 | import sys
  9 | 
 10 | # custom include to share code between recipes
 11 | sys.path.append(os.environ["recipes_includes"])
 12 | from recipes_shared import WordPressNewsrackRecipe, get_datetime_format
 13 | 
 14 | from calibre.web.feeds.news import BasicNewsRecipe
 15 | 
 16 | _name = "MIT Technology Review"
 17 | 
 18 | 
 19 | class MITTechologyReview(WordPressNewsrackRecipe, BasicNewsRecipe):
 20 |     title = _name
 21 |     __author__ = "ping"
 22 |     description = "MIT Technology articles. https://www.technologyreview.com/"
 23 |     language = "en"
 24 |     publication_type = "blog"
 25 |     oldest_article = 3  # days
 26 |     masthead_url = "https://wp.technologyreview.com/wp-content/uploads/custom-story/1026960/images/logo-MIT-tecnology-review2.svg"
 27 |     compress_news_images = False
 28 |     compress_news_images_auto_size = 8
 29 |     reverse_article_order = False
 30 | 
 31 |     remove_tags = [
 32 |         dict(name=["script", "noscript", "style"]),
 33 |     ]
 34 | 
 35 |     extra_css = """
 36 |     .headline { font-size: 1.8rem; margin-bottom: 0.4rem; }
 37 |     .article-meta { padding-bottom: 0.5rem; }
 38 |     .article-meta .author { font-weight: bold; color: #444; margin-right: 0.5rem; }
 39 |     .article-section { display: block; font-weight: bold; color: #444; }
 40 |     .wp-block-image img {
 41 |         display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto;
 42 |         box-sizing: border-box;
 43 |     }
 44 |     .wp-block-image div, .image-credit { font-size: 0.8rem; }
 45 |     .wp-block-pullquote blockquote { font-size: 1.25rem; margin-left: 0; text-align: center; }
 46 |     .wp-block-pullquote blockquote cite { font-size: 1rem; margin-left: 0; text-align: center; }
 47 |     """
 48 | 
 49 |     feeds = [
 50 |         (_name, "https://www.technologyreview.com/wp-json/wp/v2/posts"),
 51 |     ]
 52 | 
 53 |     def _extract_featured_media(self, post):
 54 |         """
 55 |         Include featured media with post content.
 56 | 
 57 |         :param post: post dict
 58 |         :param post_content: Extracted post content
 59 |         :return:
 60 |         """
 61 |         post_soup = self.soup(post["content"]["rendered"])
 62 |         for img in post_soup.find_all("img", attrs={"data-src": True}):
 63 |             img["src"] = img["data-src"]
 64 |         post_content = str(post_soup)
 65 |         if not post.get("featured_media"):
 66 |             return post_content
 67 | 
 68 |         feature_media_css = f"wp-image-{post['featured_media']}"
 69 |         if feature_media_css in post_content:
 70 |             # check already not embedded
 71 |             return post_content
 72 | 
 73 |         for feature_info in post.get("_embedded", {}).get("wp:featuredmedia", []):
 74 |             # put feature media at the start of the post
 75 |             if feature_info.get("source_url"):
 76 |                 caption = feature_info.get("caption", {}).get("rendered", "")
 77 |                 # higher-res
 78 |                 image_src = f"""
 79 |                 <div class="wp-block-image">
 80 |                     <img src="{feature_info["source_url"]}">
 81 |                     <div class="image-credit">{caption}</div>
 82 |                 </div>"""
 83 |                 post_content = image_src + post_content
 84 |             else:
 85 |                 post_content = (
 86 |                     feature_info.get("description", {}).get("rendered", "")
 87 |                     + post_content
 88 |                 )
 89 |         return post_content
 90 | 
 91 |     def preprocess_raw_html(self, raw_html, url):
 92 |         # formulate the api response into html
 93 |         post = json.loads(raw_html)
 94 |         date_published_loc = self.parse_date(post["date"], tz_info=None, as_utc=False)
 95 |         post_authors = self.extract_authors(post)
 96 |         categories = self.extract_categories(post)
 97 | 
 98 |         soup = self.soup(
 99 |             f"""<html>
100 |         <head><title>{post["title"]["rendered"]}</title></head>
101 |         <body>
102 |             <article data-og-link="{post["link"]}">
103 |             {f'<span class="article-section">{" / ".join(categories)}</span>' if categories else ''}
104 |             <h1 class="headline">{post["title"]["rendered"]}</h1>
105 |             <div class="article-meta">
106 |                 {f'<span class="author">{", ".join(post_authors)}</span>' if post_authors else ''}
107 |                 <span class="published-dt">
108 |                     {date_published_loc:{get_datetime_format()}}
109 |                 </span>
110 |             </div>
111 |             {self._extract_featured_media(post)}
112 |             </article>
113 |         </body></html>"""
114 |         )
115 |         for bq in soup.find_all("blockquote"):
116 |             for strong in bq.find_all("strong"):
117 |                 strong.name = "span"
118 |         # for img in soup.find_all(srcset=True):
119 |         #     img["src"] = absurl(img["srcset"].split()[0])
120 |         #     del img["srcset"]
121 |         for img in soup.find_all("img", attrs={"src": True}):
122 |             img["src"] = img["src"].split("?")[0] + "?w=800"
123 |         return str(soup)
124 | 
125 |     def parse_index(self):
126 |         articles = {}
127 |         br = self.get_browser()
128 |         for feed_name, feed_url in self.feeds:
129 |             custom_params = {"rest_route": None}
130 |             articles = self.get_articles(
131 |                 articles, feed_name, feed_url, self.oldest_article, custom_params, br
132 |             )
133 |         return articles.items()
134 | 


--------------------------------------------------------------------------------
/recipes/mollywhite-newsletter.recipe.py:
--------------------------------------------------------------------------------
 1 | """
 2 | newsletter.mollywhite.net
 3 | """
 4 | import os
 5 | import sys
 6 | from datetime import timezone, timedelta
 7 | 
 8 | # custom include to share code between recipes
 9 | sys.path.append(os.environ["recipes_includes"])
10 | from recipes_shared import BasicNewsrackRecipe, format_title, get_date_format
11 | 
12 | from calibre.web.feeds.news import BasicNewsRecipe
13 | 
14 | _name = "Molly White"
15 | 
16 | 
17 | class MollyWhiteNewsletter(BasicNewsrackRecipe, BasicNewsRecipe):
18 |     title = _name
19 |     description = "Keep up with the happenings in the tech world without all the boosterism. Cryptocurrency critic, technology researcher, and software engineer Molly White publishes a weekly explainer of the latest news and developments in the cryptocurrency industry, with summaries of the latest disasters featured on her well-known project Web3 is Going Just Great. https://newsletter.mollywhite.net/"
20 |     language = "en"
21 |     __author__ = "ping"
22 |     publication_type = "blog"
23 |     use_embedded_content = True
24 |     auto_cleanup = False
25 | 
26 |     oldest_article = 30  # days
27 |     max_articles_per_feed = 30
28 | 
29 |     keep_only_tags = [dict(name="article")]
30 |     remove_tags = [dict(class_=["subscription-widget-wrap", "image-link-expand"])]
31 |     remove_attributes = ["width"]
32 | 
33 |     extra_css = """
34 |     .article-meta {  margin-top: 1rem; margin-bottom: 1rem; }
35 |     .article-meta .author { font-weight: bold; color: #444; margin-right: 0.5rem; }
36 |     .captioned-image-container img {
37 |         display: block;
38 |         max-width: 100%;
39 |         height: auto;
40 |         box-sizing: border-box;
41 |     }
42 |     .captioned-image-container .image-caption { font-size: 0.8rem; margin-top: 0.2rem; }
43 |     blockquote { font-size: 1.25rem; margin-left: 0; text-align: center; }
44 |     blockquote p { margin: 0.4rem 0; }
45 |     
46 |     .footnote { color: dimgray; }
47 |     .footnote .footnote-content p { margin-top: 0; }
48 |     """
49 | 
50 |     feeds = [
51 |         (_name, "https://newsletter.mollywhite.net/feed"),
52 |     ]
53 | 
54 |     def populate_article_metadata(self, article, __, _):
55 |         if (not self.pub_date) or article.utctime > self.pub_date:
56 |             self.pub_date = article.utctime
57 |             self.title = format_title(_name, article.utctime)
58 | 
59 |     def parse_feeds(self):
60 |         timezone_offset_hours = -6
61 |         feeds = self.group_feeds_by_date(timezone_offset_hours=timezone_offset_hours)
62 |         for feed in feeds:
63 |             for article in feed.articles:
64 |                 # inject title and pub date
65 |                 date_published = article.utctime.replace(tzinfo=timezone.utc)
66 |                 date_published_loc = date_published.astimezone(
67 |                     timezone(offset=timedelta(hours=timezone_offset_hours))
68 |                 )
69 |                 article_soup = self.soup(
70 |                     f'<article><h1>{article.title}</h1><div class="article-meta">'
71 |                     f'<span class="author">{article.author}</span>'
72 |                     f'<span class="pub-date">{date_published_loc:{get_date_format()}}</span>'
73 |                     f"</div><div>{article.content}</div></article>"
74 |                 )
75 |                 article.content = str(article_soup)
76 |         return feeds
77 | 


--------------------------------------------------------------------------------
/recipes/natesilver.recipe.py:
--------------------------------------------------------------------------------
 1 | """
 2 | natesilver.net
 3 | """
 4 | import os
 5 | import sys
 6 | from datetime import timezone, timedelta
 7 | 
 8 | # custom include to share code between recipes
 9 | sys.path.append(os.environ["recipes_includes"])
10 | from recipes_shared import BasicNewsrackRecipe, format_title, get_date_format
11 | 
12 | from calibre.web.feeds.news import BasicNewsRecipe
13 | 
14 | _name = "Nate Silver"
15 | 
16 | 
17 | class NateSilver(BasicNewsrackRecipe, BasicNewsRecipe):
18 |     title = _name
19 |     description = "Nate Silver is the founder and editor in chief of FiveThirtyEight. https://www.natesilver.net/"
20 |     language = "en"
21 |     __author__ = "ping"
22 |     publication_type = "blog"
23 |     masthead_url = "https://substackcdn.com/image/fetch/w_256,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9798f361-e880-406c-9ed4-29229df02c27_256x256.png"
24 |     use_embedded_content = True
25 |     auto_cleanup = False
26 | 
27 |     oldest_article = 30  # days
28 |     max_articles_per_feed = 30
29 | 
30 |     keep_only_tags = [dict(name="article")]
31 |     remove_tags = [
32 |         dict(class_=["subscription-widget-wrap", "image-link-expand", "button-wrapper"])
33 |     ]
34 |     remove_attributes = ["width"]
35 | 
36 |     extra_css = """
37 |     .article-meta {  margin-top: 1rem; margin-bottom: 1rem; }
38 |     .article-meta .author { font-weight: bold; color: #444; margin-right: 0.5rem; }
39 |     .captioned-image-container img {
40 |         display: block;
41 |         max-width: 100%;
42 |         height: auto;
43 |         box-sizing: border-box;
44 |     }
45 |     .captioned-image-container .image-caption { font-size: 0.8rem; margin-top: 0.2rem; }
46 |     blockquote { font-size: 1.25rem; margin-left: 0; text-align: center; }
47 |     blockquote p { margin: 0.4rem 0; }
48 |     
49 |     .footnote { color: dimgray; }
50 |     .footnote .footnote-content p { margin-top: 0; }
51 |     """
52 | 
53 |     feeds = [
54 |         (_name, "https://www.natesilver.net/feed"),
55 |     ]
56 | 
57 |     def preprocess_html(self, soup):
58 |         paywall_ele = soup.find(attrs={"data-component-name": "Paywall"})
59 |         if paywall_ele:
60 |             err_msg = f'Article is paywalled: "{self.tag_to_string(soup.find("h1"))}"'
61 |             self.log.warning(err_msg)
62 |             self.abort_article(err_msg)
63 |         return soup
64 | 
65 |     def populate_article_metadata(self, article, __, _):
66 |         if (not self.pub_date) or article.utctime > self.pub_date:
67 |             self.pub_date = article.utctime
68 |             self.title = format_title(_name, article.utctime)
69 | 
70 |     def parse_feeds(self):
71 |         timezone_offset_hours = -6
72 |         feeds = self.group_feeds_by_date(timezone_offset_hours=timezone_offset_hours)
73 |         for feed in feeds:
74 |             for article in feed.articles:
75 |                 # inject title and pub date
76 |                 date_published = article.utctime.replace(tzinfo=timezone.utc)
77 |                 date_published_loc = date_published.astimezone(
78 |                     timezone(offset=timedelta(hours=timezone_offset_hours))
79 |                 )
80 |                 article_soup = self.soup(
81 |                     f'<article><h1>{article.title}</h1><div class="article-meta">'
82 |                     f'<span class="author">{article.author}</span>'
83 |                     f'<span class="pub-date">{date_published_loc:{get_date_format()}}</span>'
84 |                     f"</div><div>{article.content}</div></article>"
85 |                 )
86 |                 article.content = str(article_soup)
87 |         return feeds
88 | 


--------------------------------------------------------------------------------
/recipes/nautilus.recipe.py:
--------------------------------------------------------------------------------
  1 | """
  2 | nautil.us
  3 | """
  4 | # Original from https://github.com/kovidgoyal/calibre/blob/946ae082e1291f61d88638ff3f3723df591da835/recipes/nautilus.recipe
  5 | import os
  6 | import sys
  7 | from urllib.parse import urljoin
  8 | 
  9 | # custom include to share code between recipes
 10 | sys.path.append(os.environ["recipes_includes"])
 11 | from recipes_shared import BasicNewsrackRecipe, format_title
 12 | 
 13 | from calibre.web.feeds.news import BasicNewsRecipe, classes
 14 | 
 15 | _name = "Nautilus"
 16 | 
 17 | 
 18 | class Nautilus(BasicNewsrackRecipe, BasicNewsRecipe):
 19 |     title = _name
 20 |     language = "en"
 21 |     __author__ = "unkn0wn"
 22 |     oldest_article = 45  # days
 23 |     max_articles_per_feed = 50
 24 |     description = (
 25 |         "Nautilus is a different kind of science magazine. Our stories take you into the depths"
 26 |         " of science and spotlight its ripples in our lives and cultures. We believe any subject in science,"
 27 |         " no matter how complex, can be explained with clarity and vitality."
 28 |         " https://nautil.us/"
 29 |     )
 30 |     masthead_url = "https://assets.nautil.us/13891_bb83b72bf545e376f3ff9443bda39421.png"
 31 |     remove_attributes = ["height", "width"]
 32 |     ignore_duplicate_articles = {"title", "url"}
 33 | 
 34 |     compress_news_images_auto_size = 10
 35 | 
 36 |     keep_only_tags = [classes("article-left-col feature-image article-content")]
 37 | 
 38 |     remove_tags = [
 39 |         classes(
 40 |             "article-action-list article-bottom-newsletter_box main-post-comments-toggle-wrap "
 41 |             "main-post-comments-wrapper social-share supported-one article-collection_box "
 42 |             "primis-ad browsi-ad"
 43 |         )
 44 |     ]
 45 |     extra_css = """
 46 |     .breadcrumb div { margin-right: 0.5rem; }
 47 |     h1.article-title { font-size: 1.8rem; margin-bottom: 0.4rem; }
 48 |     .article-left-col p { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
 49 |     .article-meta {  margin-bottom: 1rem; }
 50 |     .article-meta div { display: inline-block; font-weight: bold; color: #444; margin-right: 0.5rem; }
 51 |     .article-meta div:last-child { font-weight: normal; }
 52 |     div.wp-block-image div { font-size: 0.8rem; }
 53 |     blockquote.wp-block-quote { font-size: 1.25rem; margin-left: 0; text-align: center; }
 54 |     div.feature-image img, div.wp-block-image img { display: block; max-width: 100%; height: auto; }
 55 |     .article-author { margin-top: 2rem; border-top: solid 1px; padding-top: 0.5rem; font-style: italic; }
 56 |     """
 57 | 
 58 |     def get_feeds(self):
 59 |         soup = self.index_to_soup("https://nautil.us/")
 60 |         topics = soup.find_all(
 61 |             name="a",
 62 |             attrs={"data-ev-act": "topics", "data-ev-label": True, "href": True},
 63 |         )
 64 |         if not topics:
 65 |             return self.feeds
 66 |         feeds = [(t["data-ev-label"], urljoin(t["href"], "feed/")) for t in topics]
 67 |         return feeds
 68 | 
 69 |     def populate_article_metadata(self, article, __, _):
 70 |         if (not self.pub_date) or article.utctime > self.pub_date:
 71 |             self.pub_date = article.utctime
 72 |             self.title = format_title(_name, article.utctime)
 73 | 
 74 |     def preprocess_html(self, soup):
 75 |         breadcrumb = soup.find("ul", attrs={"class": "breadcrumb"})
 76 |         if breadcrumb:
 77 |             for li in breadcrumb.find_all("li"):
 78 |                 li.name = "div"
 79 |             breadcrumb.name = "div"
 80 | 
 81 |         byline = soup.find("ul", attrs={"class": "article-list_item-byline"})
 82 |         if byline:
 83 |             byline["class"] = "article-meta"
 84 |             for li in byline.find_all("li"):
 85 |                 li.name = "div"
 86 |             byline.name = "div"
 87 | 
 88 |         author_names = soup.find_all("h6", attrs={"class": "article-author-name"})
 89 |         for a in author_names:
 90 |             a.name = "div"
 91 | 
 92 |         # remove empty p tags
 93 |         for p in soup.find_all("p"):
 94 |             if len(p.get_text(strip=True)) == 0:
 95 |                 p.decompose()
 96 | 
 97 |         for img in soup.find_all("img", attrs={"data-src": True}):
 98 |             img["src"] = img["data-src"].split("?")[0]
 99 | 
100 |         # convert author ul/li
101 |         for ul in soup.find_all("ul", class_="article-author"):
102 |             for li in ul.find_all("li", class_="article-author-box"):
103 |                 for p in li.find_all("p"):
104 |                     p.name = "div"
105 |                 li.name = "div"
106 |             ul.name = "div"
107 | 
108 |         return soup
109 | 


--------------------------------------------------------------------------------
/recipes/nine-dashline.recipe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 https://github.com/ping/
 2 | #
 3 | # This software is released under the GNU General Public License v3.0
 4 | # https://opensource.org/licenses/GPL-3.0
 5 | 
 6 | """
 7 | fivebooks.com
 8 | """
 9 | import os
10 | import sys
11 | from datetime import datetime, timedelta, timezone
12 | from urllib.parse import urljoin
13 | 
14 | # custom include to share code between recipes
15 | sys.path.append(os.environ["recipes_includes"])
16 | from recipes_shared import BasicNewsrackRecipe, format_title
17 | 
18 | from calibre.web.feeds.news import BasicNewsRecipe
19 | 
20 | _name = "9DASHLINE"
21 | 
22 | 
23 | class NineDashLine(BasicNewsrackRecipe, BasicNewsRecipe):
24 |     title = _name
25 |     __author__ = "ping"
26 |     description = "9DASHLINE is a digital platform designed to host expert analysis focused on the key issues and dynamics shaping the Indo-Pacific — the world's most dynamic region. https://www.9dashline.com/"
27 |     language = "en"
28 |     publication_type = "blog"
29 |     masthead_url = "https://images.squarespace-cdn.com/content/v1/5d57de05f940b100012af924/1668015012563-F5HGIPIR9FWXODQXN69S/9DASHLINE_Email+Signature.png?format=2500w"
30 |     oldest_article = 30
31 |     compress_news_images_auto_size = 8
32 |     INDEX = "https://www.9dashline.com/"
33 | 
34 |     keep_only_tags = [dict(class_="Content-outer")]
35 |     remove_tags = [
36 |         dict(
37 |             class_=[
38 |                 "BlogItem-meta",
39 |                 "BlogItem-share",
40 |                 "BlogItem-comments",
41 |                 "BlogItem-pagination",
42 |             ]
43 |         )
44 |     ]
45 |     remove_attributes = ["align", "style", "width", "height"]
46 |     extra_css = """
47 |     .image-title-wrapper { font-size: 0.8rem; margin-top: 0.2rem; margin-bottom: 0.5rem; }
48 |     .image-title-wrapper strong { font-weight: normal; }
49 |     blockquote { font-size: 1.25rem; margin-left: 0; text-align: center; }
50 |     blockquote strong { font-weight: normal; }
51 |     blockquote em { font-style: normal; }
52 |     """
53 | 
54 |     def preprocess_html(self, soup):
55 |         date_ele = soup.find(
56 |             "meta", itemprop="dateModified", content=True
57 |         ) or soup.find("meta", itemprop="datePublished", content=True)
58 |         article_dt = self.parse_date(date_ele["content"])
59 |         if not self.pub_date or article_dt > self.pub_date:
60 |             self.pub_date = article_dt
61 |             self.title = format_title(_name, article_dt)
62 |         return soup
63 | 
64 |     def parse_index(self):
65 |         cutoff_date = datetime.now(timezone.utc).replace(
66 |             hour=0, minute=0, second=0, microsecond=0
67 |         ) - timedelta(days=self.oldest_article)
68 | 
69 |         soup = self.index_to_soup(self.INDEX)
70 |         sections = soup.find_all(class_="summary-v2-block")
71 |         section_articles = {}
72 |         for sect in sections:
73 |             articles = sect.find_all(attrs={"data-animation-role": "content"})
74 |             for article in articles:
75 |                 if not article.find("time", attrs={"datetime": True}):
76 |                     continue
77 |                 article_dt = self.parse_date(
78 |                     article.find("time", attrs={"datetime": True})["datetime"]
79 |                 )
80 |                 if article_dt < cutoff_date:
81 |                     continue
82 |                 title_link = article.find("a", class_="summary-title-link", href=True)
83 |                 a = {
84 |                     "title": self.tag_to_string(title_link),
85 |                     "url": urljoin(self.INDEX, title_link["href"]),
86 |                     "description": self.tag_to_string(
87 |                         article.find(class_="summary-excerpt")
88 |                     ),
89 |                     "date": article_dt,
90 |                 }
91 |                 for cat_ele in soup.select(".summary-metadata-item--cats a"):
92 |                     category = self.tag_to_string(cat_ele)
93 |                     section_articles.setdefault(category, []).append(a)
94 | 
95 |         for k in list(section_articles.keys()):
96 |             if not section_articles[k]:
97 |                 del section_articles[k]
98 |         return section_articles.items()
99 | 


--------------------------------------------------------------------------------
/recipes/noema-magazine.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | import json
  6 | import os
  7 | import sys
  8 | 
  9 | # custom include to share code between recipes
 10 | sys.path.append(os.environ["recipes_includes"])
 11 | from recipes_shared import WordPressNewsrackRecipe, get_datetime_format
 12 | 
 13 | from calibre.web.feeds.news import BasicNewsRecipe
 14 | 
 15 | _name = "Noema Magazine"
 16 | 
 17 | 
 18 | class NoemaMagazine(WordPressNewsrackRecipe, BasicNewsRecipe):
 19 |     title = _name
 20 |     __author__ = "ping"
 21 |     description = "Noema is an award-winning magazine exploring the transformations sweeping our world. We publish essays, interviews, reportage, videos and art on the overlapping realms of philosophy, governance, geopolitics, economics, technology and culture. https://www.noemamag.com/"
 22 |     language = "en"
 23 |     publication_type = "magazine"
 24 |     oldest_article = 30  # days
 25 |     masthead_url = "https://www.noemamag.com/wp-content/uploads/2020/04/noema-logo.png"
 26 |     reverse_article_order = False
 27 |     compress_news_images_auto_size = 6
 28 | 
 29 |     remove_tags = [
 30 |         dict(class_=["eos-subscribe-push", "quote__social-media"]),
 31 |         dict(name=["script", "noscript", "style"]),
 32 |     ]
 33 | 
 34 |     extra_css = """
 35 |     .headline { font-size: 1.8rem; margin-bottom: 0.4rem; }
 36 |     .article-meta {  margin-top: 1rem; margin-bottom: 1rem; }
 37 |     .article-meta .author { font-weight: bold; color: #444; margin-right: 0.5rem; }
 38 |     .article-section { display: block; font-weight: bold; color: #444; }
 39 |     .article-img img, img.attachment-full { display: block; max-width: 100%; height: auto; }
 40 |     .article-img p, .wp-caption-text div {
 41 |         font-size: 0.8rem; display: block; margin-top: 0.2rem;
 42 |     }
 43 |     .quote { text-align: center; }
 44 |     .quote .quote__text { margin-left: 0; margin-bottom: 0.4rem; font-size: 1.25rem; }
 45 |     """
 46 | 
 47 |     feeds = [
 48 |         (_name, "https://www.noemamag.com/wp-json/wp/v2/wpm-article"),
 49 |     ]
 50 | 
 51 |     def extract_categories(self, post):
 52 |         categories = []
 53 |         if post.get("categories"):
 54 |             try:
 55 |                 for terms in post.get("_embedded", {}).get("wp:term", []):
 56 |                     categories.extend(
 57 |                         [
 58 |                             t["name"]
 59 |                             for t in terms
 60 |                             if t["taxonomy"] == "wpm-article-topic"
 61 |                         ]
 62 |                     )
 63 |             except (KeyError, TypeError):
 64 |                 pass
 65 |         return categories
 66 | 
 67 |     def _extract_featured_media(self, post, soup):
 68 |         """
 69 |         Include featured media with post content.
 70 | 
 71 |         :param post: post dict
 72 |         :param post_content: Extracted post content
 73 |         :return:
 74 |         """
 75 |         post_soup = self.soup(post["content"]["rendered"])
 76 |         for h in post_soup.find_all("h5"):
 77 |             h.name = "h3"
 78 |         post_content = str(post_soup)
 79 |         if not post.get("featured_media"):
 80 |             return post_content
 81 | 
 82 |         feature_media_css = f"wp-image-{post['featured_media']}"
 83 |         if feature_media_css in post_content:
 84 |             # check already not embedded
 85 |             return post_content
 86 | 
 87 |         for feature_info in post.get("_embedded", {}).get("wp:featuredmedia", []):
 88 |             # put feature media at the start of the post
 89 |             if feature_info.get("source_url"):
 90 |                 # higher-res
 91 |                 container_ele = soup.new_tag("div", attrs={"class": "article-img"})
 92 |                 img_ele = soup.new_tag("img", src=feature_info["source_url"])
 93 |                 container_ele.append(img_ele)
 94 |                 if feature_info.get("caption", {}).get("rendered"):
 95 |                     container_ele.append(self.soup(feature_info["caption"]["rendered"]))
 96 |                 post_content = str(container_ele) + post_content
 97 |             else:
 98 |                 post_content = (
 99 |                     feature_info.get("description", {}).get("rendered", "")
100 |                     + post_content
101 |                 )
102 |         return post_content
103 | 
104 |     def preprocess_raw_html(self, raw_html, url):
105 |         # formulate the api response into html
106 |         post = json.loads(raw_html)
107 |         date_published_loc = self.parse_date(post["date"], tz_info=None, as_utc=False)
108 |         post_authors = self.extract_authors(post)
109 |         categories = self.extract_categories(post)
110 | 
111 |         soup = self.soup(
112 |             f"""<html>
113 |         <head><title>{post["title"]["rendered"]}</title></head>
114 |         <body>
115 |             <article data-og-link="{post["link"]}">
116 |             {f'<span class="article-section">{" / ".join(categories)}</span>' if categories else ''}
117 |             <h1 class="headline">{post["title"]["rendered"]}</h1>
118 |             <div class="article-meta">
119 |                 {f'<span class="author">{", ".join(post_authors)}</span>' if post_authors else ''}
120 |                 <span class="published-dt">
121 |                     {date_published_loc:{get_datetime_format()}}
122 |                 </span>
123 |             </div>
124 |             </article>
125 |         </body></html>"""
126 |         )
127 |         soup.body.article.append(self.soup(self._extract_featured_media(post, soup)))
128 |         return str(soup)
129 | 
130 |     def parse_index(self):
131 |         articles = {}
132 |         br = self.get_browser()
133 |         for feed_name, feed_url in self.feeds:
134 |             custom_params = {"rest_route": None}
135 |             articles = self.get_articles(
136 |                 articles, feed_name, feed_url, self.oldest_article, custom_params, br
137 |             )
138 |         return articles.items()
139 | 


--------------------------------------------------------------------------------
/recipes/nytimes-books.recipe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 https://github.com/ping/
 2 | #
 3 | # This software is released under the GNU General Public License v3.0
 4 | # https://opensource.org/licenses/GPL-3.0
 5 | 
 6 | import os
 7 | import sys
 8 | 
 9 | # custom include to share code between recipes
10 | sys.path.append(os.environ["recipes_includes"])
11 | from recipes_shared import BasicNewsrackRecipe, format_title
12 | from nyt import NYTRecipe
13 | 
14 | from calibre.web.feeds.news import BasicNewsRecipe
15 | 
16 | _name = "New York Times Books"
17 | 
18 | 
19 | class NYTimesBooks(NYTRecipe, BasicNewsrackRecipe, BasicNewsRecipe):
20 |     title = _name
21 |     language = "en"
22 |     description = (
23 |         "The latest book reviews, best sellers, news and features from "
24 |         "The NY TImes critics and reporters. https://www.nytimes.com/section/books"
25 |     )
26 |     __author__ = "ping"
27 |     publication_type = "newspaper"
28 |     oldest_article = 7  # days
29 |     max_articles_per_feed = 25
30 | 
31 |     remove_attributes = ["style", "font"]
32 |     remove_tags_before = [dict(id="story")]
33 |     remove_tags_after = [dict(id="story")]
34 |     remove_tags = [
35 |         dict(
36 |             id=["in-story-masthead", "sponsor-wrapper", "top-wrapper", "bottom-wrapper"]
37 |         ),
38 |         dict(
39 |             class_=[
40 |                 "NYTAppHideMasthead",
41 |                 "css-170u9t6",  # book affliate links
42 |             ]
43 |         ),
44 |         dict(role=["toolbar", "navigation"]),
45 |         dict(name=["script", "noscript", "style"]),
46 |     ]
47 | 
48 |     extra_css = """
49 |     time > span { margin-right: 0.5rem; }
50 |     [data-testid="photoviewer-children"] span {
51 |         font-size: 0.8rem;
52 |     }
53 | 
54 |     .headline { font-size: 1.8rem; margin-bottom: 0.4rem; }
55 |     .sub-headline { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
56 |     .article-meta { margin-bottom: 1rem; }
57 |     .author { font-weight: bold; color: #444; display: inline-block; }
58 |     .published-dt { margin-left: 0.5rem; }
59 |     .article-img { margin-bottom: 0.8rem; max-width: 100%; }
60 |     .article-img img {
61 |         display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto;
62 |         box-sizing: border-box; }
63 |     .article-img .caption { font-size: 0.8rem; }
64 |     div.summary { font-size: 1.2rem; margin: 1rem 0; }
65 |     """
66 | 
67 |     feeds = [
68 |         ("NYTimes Books", "https://rss.nytimes.com/services/xml/rss/nyt/Books.xml"),
69 |     ]
70 | 
71 |     def populate_article_metadata(self, article, __, _):
72 |         if (not self.pub_date) or article.utctime > self.pub_date:
73 |             self.pub_date = article.utctime
74 |             self.title = format_title(_name, article.utctime)
75 | 
76 |     def parse_feeds(self):
77 |         return self.group_feeds_by_date()
78 | 


--------------------------------------------------------------------------------
/recipes/nytimes-global.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | 
  6 | """
  7 | nytimes.com
  8 | """
  9 | import os
 10 | import sys
 11 | 
 12 | # custom include to share code between recipes
 13 | sys.path.append(os.environ["recipes_includes"])
 14 | from recipes_shared import BasicNewsrackRecipe, format_title
 15 | from nyt import NYTRecipe
 16 | 
 17 | from calibre.web.feeds.news import BasicNewsRecipe
 18 | 
 19 | _name = "New York Times"
 20 | 
 21 | 
 22 | class NYTimesGlobal(NYTRecipe, BasicNewsrackRecipe, BasicNewsRecipe):
 23 |     title = _name
 24 |     language = "en"
 25 |     __author__ = "ping"
 26 |     publication_type = "newspaper"
 27 |     description = "News from the New York Times https://www.nytimes.com/"
 28 |     masthead_url = "https://mwcm.nyt.com/.resources/mkt-wcm/dist/libs/assets/img/logo-nyt-header.svg"
 29 | 
 30 |     oldest_article = 1  # days
 31 |     max_articles_per_feed = 20
 32 | 
 33 |     remove_attributes = ["style", "font"]
 34 |     remove_tags_before = [dict(id="story")]
 35 |     remove_tags_after = [dict(id="story")]
 36 | 
 37 |     remove_tags = [
 38 |         dict(
 39 |             id=["in-story-masthead", "sponsor-wrapper", "top-wrapper", "bottom-wrapper"]
 40 |         ),
 41 |         dict(
 42 |             class_=[
 43 |                 "NYTAppHideMasthead",
 44 |                 "live-blog-meta",
 45 |                 "css-13xl2ke",  # nyt logo in live-blog-byline
 46 |                 "css-8r08w0",  # after storyline-context-container
 47 |             ]
 48 |         ),
 49 |         dict(role=["toolbar", "navigation", "contentinfo"]),
 50 |         dict(name=["script", "noscript", "style", "button", "svg"]),
 51 |     ]
 52 | 
 53 |     extra_css = """
 54 |     .live-blog-reporter-update {
 55 |         font-size: 0.8rem;
 56 |         padding: 0.2rem;
 57 |         margin-bottom: 0.5rem;
 58 |     }
 59 |     [data-testid="live-blog-byline"] {
 60 |         color: #444;
 61 |         font-style: italic;
 62 |     }
 63 |     [datetime] > span {
 64 |         margin-right: 0.6rem;
 65 |     }
 66 |     picture img {
 67 |         display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto;
 68 |         box-sizing: border-box;
 69 |     }
 70 |     [aria-label="media"] {
 71 |         font-size: 0.8rem;
 72 |         display: block;
 73 |         margin-bottom: 1rem;
 74 |     }
 75 |     [role="complementary"] {
 76 |         font-size: 0.8rem;
 77 |         padding: 0.2rem;
 78 |     }
 79 |     [role="complementary"] h2 {
 80 |         font-size: 0.85rem;
 81 |         margin-bottom: 0.2rem;
 82 |      }
 83 | 
 84 |     .headline { font-size: 1.8rem; margin-bottom: 0.4rem; }
 85 |     .sub-headline { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
 86 |     .article-meta { margin-bottom: 1rem; }
 87 |     .author { font-weight: bold; color: #444; display: inline-block; }
 88 |     .published-dt { margin-left: 0.5rem; }
 89 |     .article-img { margin-bottom: 0.8rem; max-width: 100%; }
 90 |     .article-img img {
 91 |         display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto;
 92 |         box-sizing: border-box; }
 93 |     .article-img .caption { font-size: 0.8rem; }
 94 |     div.summary { font-size: 1.2rem; margin: 1rem 0; }
 95 |     """
 96 | 
 97 |     feeds = [
 98 |         ("Home", "https://www.nytimes.com/services/xml/rss/nyt/HomePage.xml"),
 99 |         # (
100 |         #     "Global Home",
101 |         #     "https://www.nytimes.com/services/xml/rss/nyt/GlobalHome.xml",
102 |         # ),
103 |         ("World", "https://www.nytimes.com/services/xml/rss/nyt/World.xml"),
104 |         ("US", "https://www.nytimes.com/services/xml/rss/nyt/US.xml"),
105 |         ("Business", "https://feeds.nytimes.com/nyt/rss/Business"),
106 |         # ("Sports", "https://www.nytimes.com/services/xml/rss/nyt/Sports.xml"),
107 |         ("Technology", "https://feeds.nytimes.com/nyt/rss/Technology"),
108 |     ]
109 | 
110 |     def populate_article_metadata(self, article, __, _):
111 |         if (not self.pub_date) or article.utctime > self.pub_date:
112 |             self.pub_date = article.utctime
113 |             self.title = format_title(_name, article.utctime)
114 | 


--------------------------------------------------------------------------------
/recipes/paris-review-blog.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | 
  6 | """
  7 | theparisreview.org
  8 | """
  9 | import json
 10 | import os
 11 | import sys
 12 | 
 13 | # custom include to share code between recipes
 14 | sys.path.append(os.environ["recipes_includes"])
 15 | from recipes_shared import WordPressNewsrackRecipe, get_datetime_format
 16 | 
 17 | from calibre.web.feeds.news import BasicNewsRecipe
 18 | 
 19 | _name = "The Paris Review - Daily"
 20 | 
 21 | 
 22 | class ParisReviewBlog(WordPressNewsrackRecipe, BasicNewsRecipe):
 23 |     title = _name
 24 |     description = (
 25 |         "The Paris Review is a quarterly English-language literary magazine established in Paris in 1953. "
 26 |         "This is a compilation of the daily feed at https://www.theparisreview.org/blog/"
 27 |     )
 28 |     language = "en"
 29 |     __author__ = "ping"
 30 | 
 31 |     oldest_article = 10
 32 |     max_articles_per_feed = 14
 33 |     encoding = "utf-8"
 34 |     masthead_url = (
 35 |         "https://www.theparisreview.org/il/7d2a53fbaa/medium/Hadada-Circle-holding.png"
 36 |     )
 37 |     reverse_article_order = False
 38 | 
 39 |     remove_attributes = ["style", "width", "height"]
 40 |     remove_tags = [dict(class_=["video-title", "videoplayer", "video-footer"])]
 41 | 
 42 |     extra_css = """
 43 |     .headline { font-size: 1.8rem; margin-bottom: 0.4rem; }
 44 |     .article-meta { padding-bottom: 0.5rem; }
 45 |     .article-meta .author { font-weight: bold; color: #444; margin-right: 0.5rem; }
 46 |     .article-section { display: block; font-weight: bold; color: #444; }
 47 |     p.featured-media img, p img { display: block; max-width: 100%; height: auto; }
 48 |     .wp-caption-text { display: block; font-size: 0.8rem; margin-top: 0.2rem; }
 49 |     """
 50 | 
 51 |     feeds = [
 52 |         (_name, "https://www.theparisreview.org/blog/"),
 53 |     ]
 54 | 
 55 |     def _extract_featured_media(self, post):
 56 |         """
 57 |         Include featured media with post content.
 58 | 
 59 |         :param post: post dict
 60 |         :param post_content: Extracted post content
 61 |         :return:
 62 |         """
 63 |         post_content = post["content"]["rendered"]
 64 |         if not post.get("featured_media"):
 65 |             return post_content
 66 | 
 67 |         # featured media post - Kuensel, BBS
 68 |         feature_media_css = f"wp-image-{post['featured_media']}"
 69 |         if feature_media_css in post_content:
 70 |             return post_content
 71 | 
 72 |         for feature_info in post.get("_embedded", {}).get("wp:featuredmedia", []):
 73 |             # put feature media at the start of the post
 74 |             if feature_info.get("source_url"):
 75 |                 # higher-res
 76 |                 image_src = f'<p class="featured-media"><img src="{feature_info["source_url"]}"></p>'
 77 |                 post_content = image_src + post_content
 78 |             else:
 79 |                 post_content = (
 80 |                     feature_info.get("description", {}).get("rendered", "")
 81 |                     + post_content
 82 |                 )
 83 |         return post_content
 84 | 
 85 |     def preprocess_raw_html(self, raw_html, url):
 86 |         # formulate the api response into html
 87 |         post = json.loads(raw_html)
 88 |         date_published_loc = self.parse_date(post["date"], tz_info=None, as_utc=False)
 89 |         post_authors = self.extract_authors(post)
 90 |         categories = self.extract_categories(post)
 91 | 
 92 |         return f"""<html>
 93 |         <head><title>{post["title"]["rendered"]}</title></head>
 94 |         <body>
 95 |             <article data-og-link="{post["link"]}">
 96 |             {f'<span class="article-section">{" / ".join(categories)}</span>' if categories else ''}
 97 |             <h1 class="headline">{post["title"]["rendered"]}</h1>
 98 |             <div class="article-meta">
 99 |                 {f'<span class="author">{", ".join(post_authors)}</span>' if post_authors else ''}
100 |                 <span class="published-dt">
101 |                     {date_published_loc:{get_datetime_format()}}
102 |                 </span>
103 |             </div>
104 |             {self._extract_featured_media(post)}
105 |             </article>
106 |         </body></html>"""
107 | 
108 |     def parse_index(self):
109 |         articles = {}
110 |         br = self.get_browser()
111 |         for feed_name, feed_url in self.feeds:
112 |             articles = self.get_articles(
113 |                 articles, feed_name, feed_url, self.oldest_article, {}, br
114 |             )
115 |         return articles.items()
116 | 


--------------------------------------------------------------------------------
/recipes/poetry.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | import os
  6 | import re
  7 | import sys
  8 | from collections import OrderedDict
  9 | from urllib.parse import urlparse
 10 | 
 11 | # custom include to share code between recipes
 12 | sys.path.append(os.environ["recipes_includes"])
 13 | from recipes_shared import BasicNewsrackRecipe
 14 | 
 15 | from calibre.web.feeds.news import BasicNewsRecipe
 16 | 
 17 | _issue_url = ""
 18 | _name = "Poetry"
 19 | 
 20 | 
 21 | class Poetry(BasicNewsrackRecipe, BasicNewsRecipe):
 22 |     title = _name
 23 |     __author__ = "ping"
 24 |     description = (
 25 |         "Founded in Chicago by Harriet Monroe in 1912, Poetry is the oldest monthly "
 26 |         "devoted to verse in the English-speaking world. https://www.poetryfoundation.org/poetrymagazine"
 27 |     )
 28 |     publication_type = "magazine"
 29 |     language = "en"
 30 |     compress_news_images = False
 31 |     scale_news_images = (800, 1200)
 32 | 
 33 |     remove_attributes = ["style", "font"]
 34 |     keep_only_tags = [dict(name="article")]
 35 | 
 36 |     remove_tags = [
 37 |         dict(name="button"),
 38 |         dict(
 39 |             attrs={
 40 |                 "class": [
 41 |                     "c-socialBlocks",
 42 |                     "c-index",
 43 |                     "o-stereo",
 44 |                     "u-hideAboveSmall",
 45 |                     "c-slideTrigger",
 46 |                     "js-slideshow",
 47 |                 ]
 48 |             }
 49 |         ),
 50 |     ]
 51 | 
 52 |     extra_css = """
 53 |     h1 { font-size: 1.8rem; margin-bottom: 0.5rem; }
 54 |     .o-titleBar-summary { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
 55 |     div.o-titleBar-meta, div.c-feature-sub { font-weight: bold; color: #444; margin-bottom: 1.5rem; }
 56 |     div.pcms_media img, div.o-mediaEnclosure img { max-width: 100%; height: auto; }
 57 |     div.o-mediaEnclosure .o-mediaEnclosure-metadata { font-size: 0.8rem; margin-top: 0.2rem; }
 58 |     div.c-feature-bd { margin-bottom: 2rem; }
 59 |     div.c-auxContent { color: #222; font-size: 0.85rem; margin-top: 2rem; }
 60 |     """
 61 | 
 62 |     def preprocess_html(self, soup):
 63 |         for img in soup.select("div.o-mediaEnclosure img"):
 64 |             if not img.get("srcset"):
 65 |                 continue
 66 |             img["src"] = self.extract_from_img_srcset(img["srcset"], max_width=1000)
 67 |         return soup
 68 | 
 69 |     def parse_index(self):
 70 |         if _issue_url:
 71 |             soup = self.index_to_soup(_issue_url)
 72 |         else:
 73 |             soup = self.index_to_soup("https://www.poetryfoundation.org/poetrymagazine")
 74 |             current_issue = soup.select("div.c-cover-media a")
 75 |             if not current_issue:
 76 |                 self.abort_recipe_processing("Unable to find latest issue")
 77 |             current_issue = current_issue[0]
 78 |             soup = self.index_to_soup(current_issue["href"])
 79 | 
 80 |         issue_edition = self.tag_to_string(soup.find("h1"))
 81 |         self.title = f"{_name}: {issue_edition}"
 82 |         try:
 83 |             # "%B %Y"
 84 |             self.pub_date = self.parse_date(issue_edition)
 85 |         except ValueError:
 86 |             # 2-month issue e.g. "July/August 2021"
 87 |             mobj = re.match(
 88 |                 r"(?P<mth>\w+)/\w+ (?P<yr>\d{4})", issue_edition, re.IGNORECASE
 89 |             )
 90 |             if not mobj:
 91 |                 self.abort_recipe_processing("Unable to parse issue date")
 92 |             self.pub_date = self.parse_date(f'{mobj.group("mth")} {mobj.group("yr")}')
 93 | 
 94 |         cover_image = soup.select("div.c-issueBillboard-cover-media img")[0]
 95 |         parsed_cover_url = urlparse(
 96 |             cover_image["srcset"].split(",")[-1].strip().split(" ")[0]
 97 |         )
 98 |         self.cover_url = f"{parsed_cover_url.scheme}://{parsed_cover_url.netloc}{parsed_cover_url.path}"
 99 | 
100 |         sectioned_feeds = OrderedDict()
101 | 
102 |         tabs = soup.find_all("div", attrs={"class": "c-tier_tabbed"})
103 |         for tab in tabs:
104 |             tab_title = tab.find("div", attrs={"class": "c-tier-tab"})
105 |             tab_content = tab.find("div", attrs={"class": "c-tier-content"})
106 |             if not (tab_title and tab_content):
107 |                 continue
108 |             tab_title = self.tag_to_string(tab_title)
109 |             sectioned_feeds[tab_title] = []
110 |             for li in tab_content.select("ul.o-blocks > li"):
111 |                 author = self.tag_to_string(
112 |                     li.find("span", attrs={"class": "c-txt_attribution"})
113 |                 )
114 |                 for link in li.find_all("a", attrs={"class": "c-txt_abstract"}):
115 |                     self.log("Found article:", self.tag_to_string(link))
116 |                     sectioned_feeds[tab_title].append(
117 |                         {
118 |                             "title": self.tag_to_string(link),
119 |                             "url": link["href"],
120 |                             "author": author,
121 |                             "description": author,
122 |                         }
123 |                     )
124 | 
125 |         return sectioned_feeds.items()
126 | 


--------------------------------------------------------------------------------
/recipes/politico-magazine.recipe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 https://github.com/ping/
 2 | #
 3 | # This software is released under the GNU General Public License v3.0
 4 | # https://opensource.org/licenses/GPL-3.0
 5 | 
 6 | """
 7 | politico.com
 8 | """
 9 | import os
10 | import sys
11 | 
12 | # custom include to share code between recipes
13 | sys.path.append(os.environ["recipes_includes"])
14 | from recipes_shared import BasicNewsrackRecipe, format_title
15 | 
16 | from calibre.web.feeds.news import BasicNewsRecipe
17 | 
18 | _name = "POLITICO Magazine"
19 | 
20 | 
21 | class PoliticoMagazine(BasicNewsrackRecipe, BasicNewsRecipe):
22 |     title = _name
23 |     __author__ = "ping"
24 |     description = "News, Analysis and Opinion from POLITICO https://www.politico.com/"
25 |     publisher = "Capitol News Company, LLC"
26 |     category = "news, politics, USA"
27 |     publication_type = "magazine"
28 |     language = "en"
29 |     masthead_url = "https://www.politico.com/dims4/default/bbb0fd2/2147483647/resize/1160x%3E/quality/90/?url=https%3A%2F%2Fstatic.politico.com%2F0e%2F5b%2F3cf3e0f04ca58370112ab667c255%2Fpolitico-logo.png"
30 | 
31 |     oldest_article = 7
32 |     max_articles_per_feed = 25
33 | 
34 |     keep_only_tags = [dict(name=["main"])]
35 |     remove_tags = [
36 |         dict(
37 |             class_=[
38 |                 "story-section",
39 |                 "social-tools",
40 |                 "below-article-section",
41 |                 "pop-up-bar",
42 |                 "inline-super-footer",
43 |             ]
44 |         ),
45 |         dict(id=["weekend-promo"]),
46 |         dict(name=["source"]),
47 |     ]
48 | 
49 |     extra_css = """
50 |     .media-item__summary h2.headline { font-size: 1.8rem; margin-bottom: 0.4rem; }
51 |     .media-item__summary p.dek { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; margin-top: 0; }
52 |     .fig-graphic img, .story-photo__image img { max-width: 100%; height: auto; }
53 |     .story-meta__credit, .story-photo__caption { font-size: 0.8rem; margin-top: 0.2rem; }
54 |     """
55 | 
56 |     feeds = [("Magazine", "https://rss.politico.com/magazine.xml")]
57 | 
58 |     def populate_article_metadata(self, article, __, _):
59 |         if (not self.pub_date) or article.utctime > self.pub_date:
60 |             self.pub_date = article.utctime
61 |             self.title = format_title(_name, article.utctime)
62 | 
63 |     def parse_feeds(self):
64 |         return self.group_feeds_by_date()
65 | 


--------------------------------------------------------------------------------
/recipes/propublica.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | 
  6 | """
  7 | propublica.org
  8 | """
  9 | import os
 10 | import sys
 11 | 
 12 | # custom include to share code between recipes
 13 | sys.path.append(os.environ["recipes_includes"])
 14 | from recipes_shared import BasicNewsrackRecipe, format_title
 15 | 
 16 | from calibre.web.feeds.news import BasicNewsRecipe
 17 | 
 18 | _name = "ProPublica"
 19 | 
 20 | 
 21 | class ProPublica(BasicNewsrackRecipe, BasicNewsRecipe):
 22 |     title = _name
 23 |     description = (
 24 |         "ProPublica is an independent, nonprofit newsroom that produces investigative "
 25 |         "journalism with moral force. https://www.propublica.org/"
 26 |     )
 27 |     language = "en"
 28 |     __author__ = "ping"
 29 |     publication_type = "newspaper"
 30 |     oldest_article = 30  # days
 31 |     max_articles_per_feed = 25
 32 |     use_embedded_content = False
 33 |     masthead_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/4e/ProPublica_text_logo.svg/1280px-ProPublica_text_logo.svg.png"
 34 | 
 35 |     scale_news_images = (800, 1200)
 36 |     timeout = 60
 37 | 
 38 |     keep_only_tags = [dict(name="article")]
 39 |     remove_attributes = ["width", "height"]
 40 |     remove_tags = [
 41 |         dict(id=["newsletter-txt-note"]),
 42 |         dict(
 43 |             attrs={
 44 |                 "class": [
 45 |                     "article-meta-1__section-actions",
 46 |                     "share-tools",
 47 |                     "story-tools",
 48 |                     "promo-newsletter-signup-2",
 49 |                     "promo-newsletter-see-all-2",
 50 |                     "promo-donate-2",
 51 |                     "bb-promo-story",
 52 |                     "bb-callout",
 53 |                     "promo-series",
 54 |                     "rich-byline__headshot",
 55 |                     "rich-byline__name",
 56 |                     "rich-byline__contact-list",
 57 |                 ]
 58 |             }
 59 |         ),
 60 |         dict(name=["script", "noscript", "style", "svg", "form"]),
 61 |     ]
 62 | 
 63 |     extra_css = """
 64 |     h1 { font-size: 1.8rem; margin-bottom: 0.4rem; }
 65 |     h2 { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; font-weight: normal; }
 66 |     .article-meta-1__byline { font-weight: bold; color: #444; }
 67 |     .article-meta-1 { margin-bottom: 1rem; }
 68 |     .article img {
 69 |         display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto;
 70 |         box-sizing: border-box; }
 71 |     .article .attribution { font-size: 0.8rem; }
 72 |     .article .article-body__note { font-style: italic; }
 73 |     .topics-list {
 74 |         border-top: 1px solid #444;
 75 |         color: #444; margin-top: 1.5rem;
 76 |     }
 77 |     .rich-byline__info { color: #444; margin-top: 2rem; }
 78 |     """
 79 | 
 80 |     feeds = [
 81 |         ("ProPublica", "https://www.propublica.org/feeds/propublica/main"),
 82 |     ]
 83 | 
 84 |     def populate_article_metadata(self, article, __, _):
 85 |         if (not self.pub_date) or article.utctime > self.pub_date:
 86 |             self.pub_date = article.utctime
 87 |             self.title = format_title(_name, article.utctime)
 88 | 
 89 |     def preprocess_html(self, soup):
 90 |         for img in soup.select("img[srcset]"):
 91 |             img["src"] = self.extract_from_img_srcset(img["srcset"], max_width=1000)
 92 |         lead_img = soup.find(class_="opener__art-wrapper")
 93 |         if lead_img:
 94 |             soup.find(class_="article-body").insert_before(lead_img)
 95 |         for picture in soup.find_all("picture"):
 96 |             src_set = ",".join(
 97 |                 [
 98 |                     src["srcset"]
 99 |                     for src in picture.find_all("source", attrs={"srcset": True})
100 |                 ]
101 |             )
102 |             if src_set:
103 |                 for img in picture.find_all("img"):
104 |                     img.decompose()
105 |                 for src in picture.find_all("source"):
106 |                     src.decompose()
107 |                 img = soup.new_tag("img")
108 |                 img["src"] = self.extract_from_img_srcset(src_set)
109 |                 picture.append(img)
110 |         return soup
111 | 


--------------------------------------------------------------------------------
/recipes/quanta-magazine.recipe.py:
--------------------------------------------------------------------------------
 1 | # Original at https://raw.githubusercontent.com/kovidgoyal/calibre/1ca6887e6c9f83a05cafe1fba8bae6de9bd2773c/recipes/quanta_magazine.recipe
 2 | 
 3 | import os
 4 | import sys
 5 | 
 6 | # custom include to share code between recipes
 7 | sys.path.append(os.environ["recipes_includes"])
 8 | from recipes_shared import BasicNewsrackRecipe, format_title
 9 | 
10 | from calibre.web.feeds.news import BasicNewsRecipe
11 | 
12 | _name = "Quanta Magazine"
13 | 
14 | 
15 | class QuantaMagazine(BasicNewsrackRecipe, BasicNewsRecipe):
16 |     title = _name
17 |     __author__ = "lui1"
18 |     description = (
19 |         "Quanta Magazine is committed to in-depth, accurate journalism that "
20 |         "serves the public interest. Each article braids the complexities of "
21 |         "science with the malleable art of storytelling and is meticulously "
22 |         "reported, edited and fact-checked. https://www.quantamagazine.org/"
23 |     )
24 |     publication_type = "magazine"
25 |     language = "en"
26 |     masthead_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/1/1f/Quanta_Magazine_Logo_05.2022.svg/320px-Quanta_Magazine_Logo_05.2022.svg.png"
27 | 
28 |     oldest_article = 30
29 |     max_articles_per_feed = 100
30 | 
31 |     keep_only_tags = [
32 |         dict(name="div", attrs={"id": "postBody"}),
33 |     ]
34 |     remove_tags = [
35 |         dict(name=["script", "noscript", "style", "svg", "form", "button"]),
36 |         dict(class_=["post__title__actions", "post__sidebar__content", "video"]),
37 |     ]
38 | 
39 |     extra_css = """
40 |     .component-img img {
41 |         display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto;
42 |         box-sizing: border-box;
43 |     }
44 |     .caption, .attribution { font-size: 0.8rem; margin: 0; }
45 |     """
46 | 
47 |     feeds = [
48 |         (_name, "https://api.quantamagazine.org/feed/"),
49 |     ]
50 | 
51 |     def populate_article_metadata(self, article, __, _):
52 |         if (not self.pub_date) or article.utctime > self.pub_date:
53 |             self.pub_date = article.utctime
54 |             self.title = format_title(_name, article.utctime)
55 | 
56 |     def parse_feeds(self):
57 |         return self.group_feeds_by_date()
58 | 


--------------------------------------------------------------------------------
/recipes/restofworld.recipe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 https://github.com/ping/
 2 | #
 3 | # This software is released under the GNU General Public License v3.0
 4 | # https://opensource.org/licenses/GPL-3.0
 5 | 
 6 | """
 7 | restofworld.org
 8 | """
 9 | import os
10 | import sys
11 | 
12 | # custom include to share code between recipes
13 | sys.path.append(os.environ["recipes_includes"])
14 | from recipes_shared import BasicNewsrackRecipe, format_title
15 | 
16 | from calibre.web.feeds.news import BasicNewsRecipe
17 | 
18 | _name = "Rest of World"
19 | 
20 | 
21 | class RestOfWorld(BasicNewsrackRecipe, BasicNewsRecipe):
22 |     title = _name
23 |     description = "Reporting Global Tech Stories https://restofworld.org/"
24 |     language = "en"
25 |     __author__ = "ping"
26 |     publication_type = "blog"
27 |     oldest_article = 30  # days
28 |     max_articles_per_feed = 25
29 |     masthead_url = "https://restofworld.org/style-guide/images/Variation_3.svg"
30 |     timeout = 60
31 | 
32 |     keep_only_tags = [dict(id="content")]
33 | 
34 |     remove_tags = [
35 |         dict(
36 |             class_=[
37 |                 "reading-header",
38 |                 "footer-recirc",
39 |                 "contrib-headshots",
40 |                 "post-image-credit",
41 |                 "series-callout",
42 |             ]
43 |         ),
44 |         dict(attrs={"aria-hidden": "true"}),
45 |     ]
46 |     extra_css = """
47 |     h1.post-header__text__title { font-size: 1.8rem; margin-bottom: 0.4rem; }
48 |     h3.post-header__text__dek { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; font-weight: normal; }
49 |     .post-subheader { margin-bottom: 1rem; }
50 |     .post-subheader .post-subheader__byline, .contrib-byline { font-weight: bold; color: #444; }
51 |     .post-header__text__contrib p.contrib-bio { margin: 0.2rem 0; }
52 |     .post-header__image { margin-top: 0.5rem; margin-bottom: 0.8rem; }
53 |     .image__wrapper img {
54 |         display: block; margin-bottom: 0.3rem; max-width: 100%; height: auto;
55 |         box-sizing: border-box;
56 |     }
57 |     .figcaption { font-size: 0.8rem; }
58 |     blockquote { font-size: 1.25rem; margin-left: 0; text-align: center; }
59 |     .post-footer { margin: 1rem 0; padding-top: 0.5rem; border-top: 1px solid #444; }
60 |     .post-footer .post-footer__authors { font-size: 0.85rem; color: #444; font-style: italic; }
61 |     """
62 | 
63 |     feeds = [
64 |         ("Rest of World", "https://restofworld.org/feed/latest/"),
65 |     ]
66 | 
67 |     def populate_article_metadata(self, article, __, _):
68 |         if (not self.pub_date) or article.utctime > self.pub_date:
69 |             self.pub_date = article.utctime
70 |             self.title = format_title(_name, article.utctime)
71 | 
72 |     def preprocess_html(self, soup):
73 |         for h in soup.find_all("h2", class_="contrib-byline"):
74 |             h.name = "div"
75 |         for img in soup.find_all("img", attrs={"data-srcset": True}):
76 |             img["src"] = self.extract_from_img_srcset(
77 |                 img["data-srcset"], max_width=1000
78 |             )
79 |         for picture in soup.find_all("picture"):
80 |             sources = picture.find_all("source", attrs={"srcset": True})
81 |             if not sources:
82 |                 continue
83 |             if picture.find("img", attrs={"src": True}):
84 |                 for s in sources:
85 |                     s.decompose()
86 |         return soup
87 | 
88 |     def parse_feeds(self):
89 |         return self.group_feeds_by_date()
90 | 


--------------------------------------------------------------------------------
/recipes/smithsonian-magazine.recipe.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | from urllib.parse import urljoin, urlparse
  4 | 
  5 | # custom include to share code between recipes
  6 | sys.path.append(os.environ["recipes_includes"])
  7 | from recipes_shared import BasicNewsrackRecipe
  8 | 
  9 | from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes
 10 | from calibre.utils.date import parse_date
 11 | 
 12 | _name = "Smithsonian Magazine"
 13 | 
 14 | 
 15 | class SmithsonianMagazine(BasicNewsrackRecipe, BasicNewsRecipe):
 16 | 
 17 |     title = _name
 18 |     __author__ = "ping"
 19 | 
 20 |     description = "This magazine chronicles the arts, environment, sciences and popular culture of the times. It is edited for modern, well-rounded individuals with diverse, general interests. https://www.smithsonianmag.com/"  # noqa
 21 |     masthead_url = "https://www.smithsonianmag.com/static/smithsonianmag/img/smithsonian_magazine_logo_black.46435ad4efd4.svg"
 22 |     language = "en"
 23 |     category = "news"
 24 |     encoding = "UTF-8"
 25 |     BASE = "https://www.smithsonianmag.com/"
 26 | 
 27 |     compress_news_images_auto_size = 10
 28 | 
 29 |     keep_only_tags = [classes("main-hero main-content")]
 30 |     remove_tags = [
 31 |         classes(
 32 |             "tag-list recommended-videos comments amazon-associated-product affiliateLink "
 33 |             "mobile-heading author-headshot binding-box"
 34 |         ),
 35 |         prefixed_classes("widget-"),
 36 |     ]
 37 |     extra_css = """
 38 |     .category-label h2 { font-size: 1rem; }
 39 |     h1 { font-size: 1.8rem; margin-bottom: 0.5rem; }
 40 |     p.subtitle { font-size: 1.2rem; font-style: italic; font-weight: normal; margin-bottom: 0.5rem; margin-top: 0; }
 41 |     .author-text { color: #444; margin-top: 1rem; margin-bottom: 1rem; }
 42 |     .author-text p.author { font-weight: bold; display: inline-block; margin-top: 0; margin-bottom: 0; }
 43 |     .author-text p.author-short-bio { display: inline-block; }
 44 |     .author-text time { display: inline-block;  margin-left: 1rem; }
 45 |     .caption { font-size: 0.8rem; margin-top: 0.2rem; margin-bottom: 0.5rem; }
 46 |     img { max-width: 100%; height: auto; }
 47 |     """
 48 | 
 49 |     def preprocess_html(self, soup):
 50 |         for hr in soup.select(".category-label hr") + soup.select(".article-line hr"):
 51 |             hr.decompose()
 52 |         article = self.get_ld_json(soup, lambda d: d.get("dateModified"))
 53 |         date_modified = parse_date(article["dateModified"])
 54 |         if (not self.pub_date) or date_modified > self.pub_date:
 55 |             self.pub_date = date_modified
 56 |         return soup
 57 | 
 58 |     def populate_article_metadata(self, article, soup, first):
 59 |         h1 = soup.find("h1")
 60 |         if h1:
 61 |             # we update the title from the article because
 62 |             # the issue page often uses an alternative title
 63 |             article.title = self.tag_to_string(h1)
 64 | 
 65 |     def parse_index(self):
 66 |         soup = self.index_to_soup(self.BASE)
 67 |         curr_issue_ele = soup.find("div", class_="current-issue")
 68 |         self.title = f'{_name}: {self.tag_to_string(curr_issue_ele.find("time"))}'
 69 |         issue_url = urljoin(
 70 |             self.BASE, curr_issue_ele.select(".issue-left a")[0]["href"]
 71 |         )
 72 |         soup = self.index_to_soup(issue_url)
 73 |         try:
 74 |             # ultra high-res cover
 75 |             cover_url_parsed = urlparse(soup.select(".issue-cover img")[0]["src"])
 76 |             cover_url = cover_url_parsed.path[cover_url_parsed.path.index("https://") :]
 77 |             self.log(f"Cover url: {cover_url}")
 78 |         except:  # noqa
 79 |             cover_url = soup.select(".issue-cover img")[0]["src"]
 80 |         self.cover_url = cover_url
 81 | 
 82 |         articles = []
 83 |         for article in soup.select(".article-list .article-wrapper"):
 84 |             article_link = article.select_one(".headline a")
 85 |             description = ""
 86 |             summary = article.find("p", class_="summary")
 87 |             if summary:
 88 |                 description = self.tag_to_string(summary)
 89 |             articles.append(
 90 |                 {
 91 |                     "title": self.tag_to_string(article_link),
 92 |                     "url": urljoin(self.BASE, article_link["href"]),
 93 |                     "description": description,
 94 |                 }
 95 |             )
 96 |         if not articles:
 97 |             self.abort_recipe_processing("No articles found.")
 98 | 
 99 |         return [(_name, articles)]
100 | 


--------------------------------------------------------------------------------
/recipes/sydney-morning-herald.recipe.py:
--------------------------------------------------------------------------------
  1 | """
  2 | smh.com.au
  3 | """
  4 | __license__ = "GPL v3"
  5 | __copyright__ = "2010-2011, Darko Miletic <darko.miletic at gmail.com>"
  6 | 
  7 | import os
  8 | import sys
  9 | 
 10 | # custom include to share code between recipes
 11 | sys.path.append(os.environ["recipes_includes"])
 12 | from recipes_shared import BasicNewsrackRecipe, format_title
 13 | 
 14 | # Original at https://github.com/kovidgoyal/calibre/blob/8bc3d757f4bb78ee002caf2766d7285497349097/recipes/smh.recipe
 15 | from calibre.web.feeds.news import BasicNewsRecipe
 16 | 
 17 | _name = "Sydney Morning Herald"
 18 | 
 19 | 
 20 | class SydneyMorningHerald(BasicNewsrackRecipe, BasicNewsRecipe):
 21 |     title = _name
 22 |     __author__ = "Darko Miletic"
 23 |     description = "Breaking news from Sydney, Australia and the world. Features the latest business, sport, entertainment, travel, lifestyle, and technology news. https://www.smh.com.au/"  # noqa
 24 |     publisher = "Fairfax Digital"
 25 |     category = "news, politics, Australia, Sydney"
 26 |     oldest_article = 1
 27 |     max_articles_per_feed = 50
 28 |     ignore_duplicate_articles = {"title", "url"}
 29 | 
 30 |     language = "en_AU"
 31 |     remove_empty_feeds = True
 32 |     masthead_url = "https://upload.wikimedia.org/wikipedia/en/thumb/8/86/Sydney_Morning_Herald_logo.svg/1024px-Sydney_Morning_Herald_logo.svg.png"
 33 |     publication_type = "newspaper"
 34 | 
 35 |     compress_news_images_auto_size = 10
 36 | 
 37 |     remove_attributes = ["style", "font", "width", "height"]
 38 |     keep_only_tags = [dict(name="article")]
 39 |     remove_tags = [
 40 |         dict(name=["button", "svg"]),
 41 |         dict(id=["saveTooltip"]),
 42 |         dict(attrs={"class": "noPrint"}),
 43 |     ]
 44 | 
 45 |     extra_css = """
 46 |     h1[itemprop="headline"] { font-size: 1.8rem; margin-bottom: 0.5rem; }
 47 |     .bylines, span[data-testid="byline"] a { font-weight: bold; color: #444; }
 48 |     div[data-testid="category"], div[data-testid="tag-name"] { display: inline-block; margin-right: 0.2rem; }
 49 |     div[data-testid="image"] p { font-size: 0.8rem; margin-top: 0.2rem; }
 50 |     div[data-testid="image"] img { max-width: 100%; height: auto; }
 51 |     cite, cite span { margin-left: 0.2rem; }
 52 |     """
 53 | 
 54 |     # https://www.smh.com.au/rssheadlines
 55 |     feeds = [
 56 |         ("Latest News", "https://www.smh.com.au/rss/feed.xml"),
 57 |         ("Federal Politics", "https://www.smh.com.au/rss/politics/federal.xml"),
 58 |         ("NSW News", "https://www.smh.com.au/rss/national/nsw.xml"),
 59 |         ("World", "https://www.smh.com.au/rss/world.xml"),
 60 |         ("National", "https://www.smh.com.au/rss/national.xml"),
 61 |         ("Business", "https://www.smh.com.au/rss/business.xml"),
 62 |         ("Culture", "https://www.smh.com.au/rss/culture.xml"),
 63 |         ("Technology", "https://www.smh.com.au/rss/technology.xml"),
 64 |         ("Environment", "https://www.smh.com.au/rss/environment.xml"),
 65 |         # ("Lifestyle", "https://www.smh.com.au/rss/lifestyle.xml"),
 66 |         # ("Property", "https://www.smh.com.au/rss/property.xml"),
 67 |         # ("Sport", "https://www.smh.com.au/rss/sport.xml"),
 68 |         # ("Ruby League", "https://www.smh.com.au/rss/sport/nrl.xml"),
 69 |         # ("AFL", "https://www.smh.com.au/rss/sport/afl.xml"),
 70 |     ]
 71 | 
 72 |     def populate_article_metadata(self, article, _, __):
 73 |         if not self.pub_date or article.utctime > self.pub_date:
 74 |             self.pub_date = article.utctime
 75 |             self.title = format_title(_name, self.pub_date)
 76 | 
 77 |     def preprocess_raw_html(self, raw_html, url):
 78 |         soup = self.soup(raw_html)
 79 |         vid_player = soup.find(
 80 |             "div", attrs={"data-testid": "video-player", "class": "noPrint"}
 81 |         )
 82 |         if vid_player:
 83 |             err_msg = f"Excluding video article: {url}"
 84 |             self.log.warning(err_msg)
 85 |             self.abort_article(err_msg)
 86 |         live_blog = self.get_ld_json(
 87 |             soup, lambda d: d.get("@type", "") == "LiveBlogPosting"
 88 |         )
 89 |         if live_blog:
 90 |             err_msg = f"Excluding live post article: {url}"
 91 |             self.log.warning(err_msg)
 92 |             self.abort_article(err_msg)
 93 | 
 94 |         ul_eles = soup.find_all("ul") or []
 95 |         for ul in ul_eles:
 96 |             if not ul.find_all("li", attrs={"data-testid": ["category", "tag-name"]}):
 97 |                 continue
 98 |             for i, li in enumerate(ul.find_all("li")):
 99 |                 live_ticker = li.find("h5")
100 |                 if live_ticker:
101 |                     live_ticker.name = "span"
102 |                 a_link = li.find("a")
103 |                 if a_link:
104 |                     li.string = self.tag_to_string(a_link)
105 |                     if i > 0:
106 |                         li.string = " • " + li.string
107 |                 li.name = "div"
108 |             ul.name = "div"
109 |             break
110 | 
111 |         h5_eles = soup.find_all("h5") or []
112 |         for h5 in h5_eles:
113 |             if not h5.find_all("span", attrs={"data-testid": ["byline"]}):
114 |                 continue
115 |             h5.name = "div"
116 |             h5["class"] = "bylines"
117 |             break
118 | 
119 |         for picture in soup.find_all("picture"):
120 |             sources = picture.find_all("source", attrs={"srcset": True})
121 |             if not sources:
122 |                 continue
123 |             picture.img["src"] = sources[0]["srcset"].split(",")[0].split(" ")[0]
124 |             for s in sources:
125 |                 s.decompose()
126 |         return str(soup)
127 | 


--------------------------------------------------------------------------------
/recipes/taipei-times.recipe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 https://github.com/ping/
 2 | #
 3 | # This software is released under the GNU General Public License v3.0
 4 | # https://opensource.org/licenses/GPL-3.0
 5 | import os
 6 | import sys
 7 | from datetime import timezone, timedelta
 8 | 
 9 | # custom include to share code between recipes
10 | sys.path.append(os.environ["recipes_includes"])
11 | from recipes_shared import BasicNewsrackRecipe, format_title
12 | 
13 | from calibre.web.feeds.news import BasicNewsRecipe
14 | 
15 | _name = "Taipei Times"
16 | 
17 | 
18 | class TaipeiTimes(BasicNewsrackRecipe, BasicNewsRecipe):
19 |     title = _name
20 |     language = "en"
21 |     __author__ = "ping"
22 |     publication_type = "newspaper"
23 |     description = "News from the Taipei Times https://www.taipeitimes.com/"
24 |     masthead_url = "https://www.taipeitimes.com/assets/images/logo.gif"
25 | 
26 |     oldest_article = 1  # days
27 |     max_articles_per_feed = 50
28 |     ignore_duplicate_articles = {"title", "url"}
29 | 
30 |     keep_only_tags = [dict(name="div", class_="archives")]
31 |     remove_tags = [dict(attrs={"class": ["ad_mg_t", "ad_mg_b", "sh"]})]
32 | 
33 |     extra_css = """
34 |     .archives h1 { font-size: 1.8rem; margin-bottom: 0.5rem; }
35 |     .archives h2 { font-size: 1.2rem; margin-bottom: 0.5rem; font-weight: normal; font-style: italic; }
36 |     p.byline { font-weight: bold; color: #444; display: block; margin-top: 1rem; }
37 |     .imgboxa img { max-width: 100%; height: auto; }
38 |     .imgboxa p { font-size: 0.8rem; margin-top: 0.2rem; display: inline-block; font-weight: normal; }
39 |     """
40 | 
41 |     feeds = [(_name, "https://www.taipeitimes.com/xml/index.rss")]
42 | 
43 |     def populate_article_metadata(self, article, _, __):
44 |         if not self.pub_date or article.utctime > self.pub_date:
45 |             self.pub_date = article.utctime
46 |             post_date_local = article.utctime.astimezone(timezone(timedelta(hours=8)))
47 |             self.title = format_title(_name, post_date_local)
48 | 
49 |     def preprocess_raw_html(self, raw_html, _):
50 |         soup = self.soup(raw_html)
51 | 
52 |         # replace byline <ul> with actual byline element
53 |         byline = soup.select_one("ul.as")
54 |         if byline:
55 |             byline_name = byline.find(attrs={"class": "name"})
56 |             if byline_name:
57 |                 byline_name["class"] = "byline"
58 |                 byline.replace_with(byline_name)
59 | 
60 |         # replace with image caption's <h1> with <p> .... wtf
61 |         img_h1_captions = soup.select(".imgboxa h1")
62 |         for h1 in img_h1_captions:
63 |             h1.name = "p"
64 | 
65 |         return str(soup)
66 | 


--------------------------------------------------------------------------------
/recipes/thediplomat.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | 
  6 | """
  7 | thediplomat.com
  8 | """
  9 | import json
 10 | import os
 11 | import sys
 12 | from html import unescape
 13 | 
 14 | # custom include to share code between recipes
 15 | sys.path.append(os.environ["recipes_includes"])
 16 | from recipes_shared import WordPressNewsrackRecipe, get_date_format
 17 | 
 18 | from calibre.web.feeds.news import BasicNewsRecipe
 19 | 
 20 | _name = "The Diplomat"
 21 | 
 22 | 
 23 | class TheDiplomat(WordPressNewsrackRecipe, BasicNewsRecipe):
 24 |     title = _name
 25 |     description = "The Diplomat is a current-affairs magazine for the Asia-Pacific, with news and analysis on politics, security, business, technology and life across the region. https://thediplomat.com/"
 26 |     language = "en"
 27 |     __author__ = "ping"
 28 |     publication_type = "magazine"
 29 | 
 30 |     oldest_article = 7
 31 |     max_articles_per_feed = 25
 32 |     masthead_url = "https://thediplomat.com/wp-content/themes/td_theme_v3/assets/logo/diplomat_logo_black.svg"
 33 | 
 34 |     compress_news_images_auto_size = 8
 35 |     reverse_article_order = False
 36 | 
 37 |     remove_attributes = ["style", "width", "height"]
 38 | 
 39 |     extra_css = """
 40 |     .headline { font-size: 1.8rem; margin-bottom: 0.4rem; }
 41 |     .sub-headline { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
 42 |     .sub-headline p { margin-top: 0; }
 43 |     .article-meta { margin-bottom: 1rem; }
 44 |     .article-meta .author { font-weight: bold; color: #444; margin-right: 0.5rem; }
 45 |     .article-section { display: block; font-weight: bold; color: #444; }
 46 |     .article-img, .wp-caption { margin-bottom: 0.8rem; max-width: 100%; }
 47 |     .article-img img, .wp-caption img { display: block; max-width: 100%; height: auto; }
 48 |     .article-img .caption, .wp-caption-text { display: block; font-size: 0.8rem; margin-top: 0.2rem; }
 49 |     .article-img .caption p { margin: 0; }
 50 |     """
 51 | 
 52 |     feeds = [
 53 |         (_name, "https://thediplomat.com/"),
 54 |     ]
 55 | 
 56 |     def _extract_featured_media(self, post):
 57 |         """
 58 |         Include featured media with post content.
 59 | 
 60 |         :param post: post dict
 61 |         :param post_content: Extracted post content
 62 |         :return:
 63 |         """
 64 |         post_content = post["content"]["rendered"]
 65 |         if not post.get("featured_media"):
 66 |             return post_content
 67 | 
 68 |         for feature_info in post.get("_embedded", {}).get("wp:featuredmedia", []):
 69 |             # put feature media at the start of the post
 70 |             if feature_info.get("source_url"):
 71 |                 caption = feature_info.get("caption", {}).get("rendered", "")
 72 |                 # higher-res
 73 |                 image_src = f"""
 74 |                 <div class="article-img">
 75 |                     <img src="{feature_info["source_url"]}">
 76 |                     <div class="caption">{caption}</div>
 77 |                 </div>"""
 78 |                 post_content = image_src + post_content
 79 |             else:
 80 |                 post_content = (
 81 |                     feature_info.get("description", {}).get("rendered", "")
 82 |                     + post_content
 83 |                 )
 84 |         return post_content
 85 | 
 86 |     def preprocess_raw_html(self, raw_html, url):
 87 |         # formulate the api response into html
 88 |         post = json.loads(raw_html)
 89 |         post_date = self.parse_date(post["date"], tz_info=None, as_utc=False)
 90 |         soup = self.soup(
 91 |             f"""<html>
 92 |         <head></head>
 93 |         <body>
 94 |             <h1 class="headline"></h1>
 95 |             <article data-og-link="{post["link"]}">
 96 |                 <div class="sub-headline"></div>
 97 |                 <div class="article-meta">
 98 |                     <span class="published-dt">{post_date:{get_date_format()}}</span>
 99 |                 </div>
100 |             </div>
101 |             </article>
102 |         </body></html>"""
103 |         )
104 |         title = soup.new_tag("title")
105 |         title.string = unescape(post["title"]["rendered"])
106 |         soup.body.h1.string = unescape(post["title"]["rendered"])
107 |         soup.find("div", class_="sub-headline").append(
108 |             self.soup(post["excerpt"]["rendered"])
109 |         )
110 |         # inject authors
111 |         post_authors = self.extract_authors(post)
112 |         if post_authors:
113 |             soup.find(class_="article-meta").insert(
114 |                 0,
115 |                 self.soup(f'<span class="author">{", ".join(post_authors)}</span>'),
116 |             )
117 |         # inject categories
118 |         categories = self.extract_categories(post)
119 |         if categories:
120 |             soup.body.article.insert(
121 |                 0,
122 |                 self.soup(
123 |                     f'<span class="article-section">{" / ".join(categories)}</span>'
124 |                 ),
125 |             )
126 |         soup.body.article.append(self.soup(self._extract_featured_media(post)))
127 |         return str(soup)
128 | 
129 |     def populate_article_metadata(self, article, soup, first):
130 |         # pick up the og link from preprocess_raw_html() and set it as url instead of the api endpoint
131 |         og_link = soup.select("[data-og-link]")
132 |         if og_link:
133 |             article.url = og_link[0]["data-og-link"]
134 |         article.title = soup.find("h1", class_="headline").string
135 | 
136 |     def parse_index(self):
137 |         articles = {}
138 |         br = self.get_browser()
139 |         for feed_name, feed_url in self.feeds:
140 |             articles = self.get_articles(
141 |                 articles, feed_name, feed_url, self.oldest_article, {}, br
142 |             )
143 |         return articles.items()
144 | 


--------------------------------------------------------------------------------
/recipes/time-magazine.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | 
  6 | import json
  7 | import os
  8 | import sys
  9 | from datetime import timedelta, timezone
 10 | 
 11 | # custom include to share code between recipes
 12 | sys.path.append(os.environ["recipes_includes"])
 13 | from recipes_shared import BasicNewsrackRecipe, get_date_format
 14 | 
 15 | from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
 16 | from calibre.web.feeds.news import BasicNewsRecipe
 17 | 
 18 | _name = "TIME"
 19 | 
 20 | 
 21 | class TimeMagazine(BasicNewsrackRecipe, BasicNewsRecipe):
 22 |     title = _name
 23 |     __author__ = "ping"
 24 |     description = "Weekly US magazine. https://time.com/magazine/"
 25 |     language = "en"
 26 |     masthead_url = "https://time.com/img/logo.png"
 27 |     oldest_article = 14
 28 |     reverse_article_order = False
 29 | 
 30 |     remove_attributes = ["style"]
 31 |     extra_css = """
 32 |     .issue { font-weight: bold; margin-bottom: 0.2rem; }
 33 |     .headline { font-size: 1.8rem; margin-bottom: 0.5rem; }
 34 |     .article-meta {  margin-top: 1rem; margin-bottom: 1rem; }
 35 |     .article-meta .author { font-weight: bold; color: #444; display: inline-block; }
 36 |     .article-meta .published-dt { display: inline-block; margin-left: 0.5rem; }
 37 |     .image-caption { font-size: 0.8rem; margin-top: 0.2rem; margin-bottom: 0.5rem; }
 38 |     img { max-width: 100%; height: auto; }
 39 |     span.credit { margin-right: 0.5rem; }
 40 |     """
 41 | 
 42 |     def preprocess_raw_html(self, raw_html, url):
 43 |         # formulate the api response into html
 44 |         article = json.loads(raw_html)
 45 |         try:
 46 |             authors = [a["name"] for a in article.get("authors", [])]
 47 |         except TypeError:
 48 |             # sometimes authors = [[]]
 49 |             authors = []
 50 |         # "%Y-%m-%d %H:%M:%S"
 51 |         date_published_loc = self.parse_date(
 52 |             article["time"]["published"],
 53 |             tz_info=timezone(timedelta(hours=-4)),
 54 |             as_utc=False,
 55 |         )
 56 |         date_published_utc = date_published_loc.astimezone(timezone.utc)
 57 |         if not self.pub_date or date_published_utc > self.pub_date:
 58 |             self.pub_date = date_published_utc
 59 | 
 60 |         content_soup = self.soup(article["content"])
 61 |         cover_url = self.canonicalize_internal_url(self.cover_url)
 62 |         # clean up weirdness
 63 |         div_gmail = content_soup.find_all(name="div", attrs={"class": "gmail_default"})
 64 |         for div in div_gmail:
 65 |             div.name = "p"
 66 |         img_lazy = content_soup.find_all(name="img", attrs={"data-lazy-src": True})
 67 |         for img in img_lazy:
 68 |             # remove cover image
 69 |             if cover_url == self.canonicalize_internal_url(img["data-lazy-src"]):
 70 |                 img.parent.decompose()
 71 |                 continue
 72 |             img["src"] = img["data-lazy-src"]
 73 | 
 74 |         return f"""<html>
 75 |         <head><title>{article["friendly_title"]}</title></head>
 76 |         <body>
 77 |             <article data-og-link="{article["url"]}">
 78 |             <h1 class="headline">{article["friendly_title"]}</h1>
 79 |             <div class="article-meta">
 80 |                 <span class="author">
 81 |                     {", ".join(authors)}
 82 |                 </span>
 83 |                 <span class="published-dt">
 84 |                     {date_published_loc:{get_date_format()}}
 85 |                 </span>
 86 |             </div>
 87 |             {str(content_soup.body)}
 88 |             </article>
 89 |         </body></html>"""
 90 | 
 91 |     def populate_article_metadata(self, article, soup, first):
 92 |         # pick up the og link from preprocess_raw_html() and set it as url instead of the api endpoint
 93 |         og_link = soup.select("[data-og-link]")
 94 |         if og_link:
 95 |             article.url = og_link[0]["data-og-link"]
 96 | 
 97 |     def parse_index(self):
 98 |         br = self.get_browser()
 99 |         # Time also has WP endpoints, e.g. https://api.time.com/wp-json/ti-api/v1/posts
100 |         # https://time.com/api/magazine/region/us/
101 |         # https://time.com/api/magazine/region/europe/
102 |         # https://time.com/api/magazine/region/asia/
103 |         # https://time.com/api/magazine/region/south-pacific/
104 |         res = br.open_novisit(
105 |             "https://time.com/api/magazine/region/us/", timeout=self.timeout
106 |         )
107 |         issue_json_raw = res.read().decode("utf-8")
108 |         issue = json.loads(issue_json_raw)[0]
109 |         self.cover_url = issue.get("hero", {}).get("src", {}).get("large_2x")
110 |         self.title = f'{_name}: {issue["title"]}'
111 |         articles = []
112 |         self.temp_dir = PersistentTemporaryDirectory()
113 |         for article in issue["articles"]:
114 |             with PersistentTemporaryFile(suffix=".json", dir=self.temp_dir) as f:
115 |                 f.write(json.dumps(article).encode("utf-8"))
116 |             description = article.get("excerpt") or ""
117 |             section = article.get("section", {}).get("name", "")
118 |             if section:
119 |                 description = section + (" | " if description else "") + description
120 |             articles.append(
121 |                 {
122 |                     "title": article["friendly_title"],
123 |                     "url": "file://" + f.name,
124 |                     "description": description,
125 |                 }
126 |             )
127 |         return [("Articles", articles)]
128 | 


--------------------------------------------------------------------------------
/recipes/vox.recipe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 https://github.com/ping/
 2 | #
 3 | # This software is released under the GNU General Public License v3.0
 4 | # https://opensource.org/licenses/GPL-3.0
 5 | import os
 6 | import sys
 7 | 
 8 | # custom include to share code between recipes
 9 | sys.path.append(os.environ["recipes_includes"])
10 | from recipes_shared import BasicNewsrackRecipe, format_title, get_datetime_format
11 | 
12 | from calibre.web.feeds.news import BasicNewsRecipe
13 | 
14 | _name = "Vox"
15 | 
16 | 
17 | class Vox(BasicNewsrackRecipe, BasicNewsRecipe):
18 |     title = _name
19 |     language = "en"
20 |     description = "General interest news site https://www.vox.com/"
21 |     __author__ = "ping"
22 |     publication_type = "magazine"
23 |     masthead_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/a2/Vox_logo.svg/300px-Vox_logo.svg.png"
24 |     oldest_article = 7  # days
25 | 
26 |     max_articles_per_feed = 25
27 |     use_embedded_content = True
28 |     scale_news_images = (600, 600)
29 | 
30 |     remove_attributes = ["style", "font"]
31 | 
32 |     feeds = [
33 |         ("Font Page", "https://www.vox.com/rss/front-page/index.xml"),
34 |         ("All", "https://www.vox.com/rss/index.xml"),
35 |     ]
36 |     # e-image
37 |     extra_css = """
38 |     h2 { font-size: 1.8rem; margin-bottom: 0.4rem; }
39 |     .article-meta { padding-bottom: 0.5rem; }
40 |     .article-meta .author { font-weight: bold; color: #444; margin-right: 0.5rem; }
41 |     .e-image cite { display: block; }
42 |     .e-image div, .e-image cite { font-size: 0.8rem; }
43 |     """
44 | 
45 |     def populate_article_metadata(self, article, __, _):
46 |         if (not self.pub_date) or article.utctime > self.pub_date:
47 |             self.pub_date = article.utctime
48 |             self.title = format_title(_name, article.utctime)
49 | 
50 |     def parse_feeds(self):
51 |         parsed_feeds = super().parse_feeds()
52 |         for feed in parsed_feeds:
53 |             for article in feed.articles:
54 |                 article.content = (
55 |                     f"""
56 |                 <div class="article-meta">
57 |                     <span class="author">{article.author}</span>
58 |                     <span class="published-dt">{article.utctime:{get_datetime_format()}}</span>
59 |                 </div>
60 |                 """
61 |                     + article.content
62 |                 )
63 |         return parsed_feeds
64 | 


--------------------------------------------------------------------------------
/recipes/world-today.recipe.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 https://github.com/ping/
  2 | #
  3 | # This software is released under the GNU General Public License v3.0
  4 | # https://opensource.org/licenses/GPL-3.0
  5 | import os
  6 | import sys
  7 | from urllib.parse import urljoin
  8 | 
  9 | # custom include to share code between recipes
 10 | sys.path.append(os.environ["recipes_includes"])
 11 | from recipes_shared import BasicNewsrackRecipe
 12 | 
 13 | from calibre.web.feeds.news import BasicNewsRecipe
 14 | 
 15 | _issue_url = ""
 16 | _name = "The World Today"
 17 | 
 18 | 
 19 | class WorldToday(BasicNewsrackRecipe, BasicNewsRecipe):
 20 |     title = _name
 21 |     __author__ = "ping"
 22 |     description = "The World Today is a bi-monthly global affairs magazine founded by Chatham House, international affairs think tank, in 1945. https://www.chathamhouse.org/publications/the-world-today/"
 23 |     masthead_url = (
 24 |         "https://www.chathamhouse.org/themes/custom/numiko/logo/chatham-house-logo.png"
 25 |     )
 26 |     publication_type = "magazine"
 27 |     language = "en"
 28 |     compress_news_images_auto_size = 4
 29 |     scale_news_images = (800, 1200)
 30 | 
 31 |     BASE_URL = "https://www.chathamhouse.org"
 32 |     keep_only_tags = [
 33 |         dict(class_=["hero__title", "hero__subtitle", "hero__meta"]),
 34 |         dict(name="article", class_=["content-layout"]),
 35 |     ]
 36 |     remove_attributes = ["style", "width", "height"]
 37 |     remove_tags = [
 38 |         dict(
 39 |             class_=[
 40 |                 "hero__meta-label",
 41 |                 "person-teaser__contact",
 42 |                 "person-teaser__image-container",
 43 |             ]
 44 |         ),
 45 |         dict(name=["svg"]),
 46 |     ]
 47 |     extra_css = """
 48 |     h1.hero__title { font-size: 1.8rem; margin-bottom: 0.4rem; }
 49 |     .hero__subtitle { font-size: 1.2rem; margin-bottom: 1.2rem; font-style: italic; }
 50 |     .hero__subtitle p { margin: 0; }
 51 |     .hero__meta { color: #444; }
 52 |     .hero__meta .hero__meta-read-time { margin-left: 1rem; }
 53 |     .authors { color: #444; margin-bottom: 1rem; }
 54 |     .authors .person-teaser__meta { font-size: 0.85rem; }
 55 |     .authors a { color: #444; }
 56 |     .authors h3 { margin: 0; font-weight: bold; font-size: 1rem; }
 57 |     .authors p { margin: 0; }
 58 |     blockquote, .media-callout { font-size: 1.25rem; margin-left: 0; text-align: center; }
 59 |     blockquote { margin: 0 }
 60 |     .media-callout .h1 { font-weight: bold; font-size: 1.8rem; }
 61 |     .media-callout p, blockquote p { margin: 0; }
 62 |     .media-image img {
 63 |         display: block; margin-bottom: 0.3rem;
 64 |         max-width: 100%; height: auto;
 65 |         box-sizing: border-box;
 66 |     }
 67 |     .media-image p { font-size: 0.8rem; margin: 0; }
 68 |     .media-image p:first-child { display: inline-block; }
 69 |     .js-sidebar-responsive { margin-top: 2rem; }
 70 |     .js-sidebar-responsive h2 { font-size: 1rem; }
 71 |     """
 72 | 
 73 |     def preprocess_raw_html(self, raw_html, url):
 74 |         soup = self.soup(raw_html)
 75 |         # find pub date
 76 |         mod_date_ele = soup.find("meta", attrs={"property": "article:modified_time"})
 77 |         # Example: 2022-09-30T12:40:17+0100 "%Y-%m-%dT%H:%M:%S%z"
 78 |         post_mod_date = self.parse_date(mod_date_ele["content"])
 79 |         if not self.pub_date or post_mod_date > self.pub_date:
 80 |             self.pub_date = post_mod_date
 81 |         for img in soup.find_all("img", attrs={"srcset": True}):
 82 |             img["src"] = urljoin(
 83 |                 self.BASE_URL,
 84 |                 img["srcset"].strip().split(",")[-1].strip().split(" ")[0],
 85 |             )
 86 |             del img["srcset"]
 87 |         return str(soup)
 88 | 
 89 |     def parse_index(self):
 90 |         if _issue_url:
 91 |             soup = self.index_to_soup(_issue_url)
 92 |         else:
 93 |             soup = self.index_to_soup(
 94 |                 "https://www.chathamhouse.org/publications/the-world-today/"
 95 |             )
 96 | 
 97 |         issue_edition = (
 98 |             self.tag_to_string(
 99 |                 soup.find("h2", attrs={"class": "hero__title-supplementary"})
100 |             )
101 |             .replace("Issue:", "")
102 |             .strip()
103 |         )
104 |         self.title = f"{_name}: {issue_edition}"
105 | 
106 |         articles = []
107 |         issue_items = soup.find_all("article", attrs={"class": "teaser", "about": True})
108 |         for item in issue_items:
109 |             title = self.tag_to_string(
110 |                 item.find("h3", attrs={"class": "teaser__title"})
111 |             )
112 |             description = self.tag_to_string(
113 |                 item.find("div", attrs={"class": "teaser__summary"})
114 |             )
115 |             link = urljoin(self.BASE_URL, item["about"])
116 |             articles.append({"title": title, "url": link, "description": description})
117 | 
118 |         return [(_name, articles)]
119 | 


--------------------------------------------------------------------------------
/recipes_custom/README.txt:
--------------------------------------------------------------------------------
1 | Place your custom recipes (*.recipe.py or *.recipe) source files in this folder
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | humanize
3 | Pillow
4 | bleach
5 | 


--------------------------------------------------------------------------------
/static/OpenSans-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ping/newsrack/26deac07801eda7aaf33395f0f5e3efc43c4507b/static/OpenSans-Bold.ttf


--------------------------------------------------------------------------------
/static/OpenSans-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ping/newsrack/26deac07801eda7aaf33395f0f5e3efc43c4507b/static/OpenSans-Regular.ttf


--------------------------------------------------------------------------------
/static/OpenSans-Semibold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ping/newsrack/26deac07801eda7aaf33395f0f5e3efc43c4507b/static/OpenSans-Semibold.ttf


--------------------------------------------------------------------------------
/static/colours.scss:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2022 https://github.com/ping/
 3 | 
 4 | This software is released under the GNU General Public License v3.0
 5 | https://opensource.org/licenses/GPL-3.0
 6 | */
 7 | 
 8 | $base-bg-color: #ffffff;
 9 | $base-color: #000000;
10 | $base-disabled-color: #444956;
11 | $link-color: #0000e6;
12 | $link-hover-color: darken($link-color, 20%);
13 | $link-visited-color: #551a8a;
14 | $book-bg-color: #eaeaea;
15 | $error-color: #b90404;
16 | $tags-color: mix($base-disabled-color, $base-color);
17 | 
18 | $dark-base-bg-color: #1e2228;
19 | $dark-base-color: #e7e9f2;
20 | $dark-base-disabled-color: #a2a5af;
21 | $dark-link-color: #5ccaec;
22 | $dark-link-hover-color: lighten($dark-link-color, 20%);
23 | $dark-link-visited-color: #bbb9ff;
24 | $dark-book-bg-color: lighten($dark-base-bg-color, 5%);
25 | $dark-error-color: #ff6464;
26 | $dark-tags-color: mix($dark-base-disabled-color, $dark-base-color);
27 | 
28 | @import 'colours_custom';
29 | 
30 | @function url-friendly-colour($colour) {
31 |     @return '%23' + str-slice('#{$colour}', 2, -1)
32 | }
33 | 


--------------------------------------------------------------------------------
/static/colours_custom.scss:
--------------------------------------------------------------------------------
1 | // Define your own custom colours css here
2 | 


--------------------------------------------------------------------------------
/static/favicon.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path class="news" d="M368 415.86V72a24.07 24.07 0 00-24-24H72a24.07 24.07 0 00-24 24v352a40.12 40.12 0 0040 40h328" fill="none" stroke="currentColor" stroke-linejoin="round" stroke-width="32"/><path class="news" d="M416 464h0a48 48 0 01-48-48V128h72a24 24 0 0124 24v264a48 48 0 01-48 48z" fill="none" stroke="currentColor" stroke-linejoin="round" stroke-width="32"/><path class="news" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M240 128h64M240 192h64M112 256h192M112 320h192M112 384h192"/><path id="img" class="news" d="M176 208h-64a16 16 0 01-16-16v-64a16 16 0 0116-16h64a16 16 0 0116 16v64a16 16 0 01-16 16z" fill="currentColor"/>
2 | <style>
3 | path.news { color: #000; }
4 | @media (prefers-color-scheme: dark) {
5 | path.news { color: #e7e9f2; }
6 | }
7 | </style>
8 | </svg>


--------------------------------------------------------------------------------
/static/index.html:
--------------------------------------------------------------------------------
 1 | <!--
 2 | Copyright (c) 2022 https://github.com/ping/
 3 | 
 4 | This software is released under the GNU General Public License v3.0
 5 | https://opensource.org/licenses/GPL-3.0
 6 | -->
 7 | <!DOCTYPE html>
 8 | <html lang="en">
 9 | <head>
10 |     <meta charset="utf-8"/>
11 |     <meta name="viewport" content="width=device-width,initial-scale=1"/>
12 |     <meta name="referrer" content="same-origin"/>
13 |     <meta name="robots" content="noindex"/>
14 |     <meta name="description" content="An online newsrack of periodicals for your ereader"/>
15 |     <link rel="preload" href="lunr.json" as="fetch" crossorigin="anonymous" />
16 |     <link rel="icon" type="image/svg+xml" href="favicon.svg"/>
17 |     <title>News Rack</title>
18 |     <style>
19 |         {css}
20 |     </style>
21 |     <script src="https://cdn.jsdelivr.net/npm/lunr@2.3.9/lunr.min.js" type="application/javascript" defer
22 |         integrity="sha256-DFDZACuFeAqEKv/7Vnu1Tt5ALa58bcWZegGGFNgET8g=" crossorigin="anonymous"></script>
23 | </head>
24 | <body>
25 |     <script type="application/javascript">
26 |     // stupid workaround instead of relying on screen size
27 |     // to increase font size for non-kindle devices
28 |     if (navigator.userAgent.indexOf("Mozilla/5.0 (X11") < 0)
29 |         document.body.classList.add("nonkindle");
30 |     </script>
31 |     <script type="application/javascript">
32 |     {theme_js}
33 |     </script>
34 | 
35 |     <h1>News Rack</h1>
36 |     <div id="refreshed-info-top">
37 |         Updated <span data-refreshed-date="{refreshed_ts}">at {refreshed_dt:%-d %B, %Y %-I:%M%p %z}</span>
38 |         <span id="toggle-theme" class="not-for-kindle" tabindex="0" title="Toggle theme">
39 |             <svg id="toggle-theme-icon"><use href="reader_sprites.svg#icon-theme-auto"></use></svg>
40 |         </span>
41 |     </div>
42 |     <main>
43 |         <div id="search-form-container">
44 |             <form id="search-form">
45 |             <div id="search-text-container">
46 |                 <input type="search" name="search-text" id="search-text" disabled
47 |                        data-placeholder="Search in titles 'title:example' / articles 'articles:example' / tags 'tags:example'"
48 |                        placeholder="Preparing search...">
49 |                 <input type="reset" id="search-text-clear-btn" class="hide" value="clear">
50 |             </div>
51 |             <input type="submit" name="Search" id="search-button" value="Search" disabled>
52 |             </form>
53 | 
54 |             <span id="search-info"><a href="https://lunrjs.com/guides/searching.html">Search syntax</a></span>
55 |         </div>
56 |         {listing}
57 |     </main>
58 |     <footer>
59 |         <a title="Combined OPDS" href="{catalog}">Combined OPDS</a>
60 |         <div class="meta">
61 |             {source_link}
62 |             <div class="refreshed-info">
63 |                 Updated <span id="refreshed_dt" data-refreshed-date="{refreshed_ts}">at {refreshed_dt:%-d %B, %Y %-I:%M%p %z}</span>, and took about {elapsed}.
64 |             </div>
65 |         </div>
66 |     </footer>
67 |     <script type="application/javascript">{js}</script>
68 | </body>
69 | </html>


--------------------------------------------------------------------------------
/static/opds.scss:
--------------------------------------------------------------------------------
  1 | @import 'colours';
  2 | 
  3 | body {
  4 |   font-family: system-ui, sans-serif;
  5 |   max-width: 600px;
  6 |   margin: 1rem auto;
  7 |   padding: 0 1rem;
  8 |   color: $base-color;
  9 |   background-color: $base-bg-color;
 10 | }
 11 | 
 12 | a {
 13 |   color: $link-color;
 14 |   text-decoration: none;
 15 | 
 16 |   &:hover {
 17 |     color: $link-hover-color;
 18 |     text-decoration: underline;
 19 |   }
 20 | 
 21 |   &:visited {
 22 |     color: $link-visited-color;
 23 |   }
 24 | }
 25 | 
 26 | .notice {
 27 |   border-left: 4px solid lighten($link-color, 20%);
 28 |   background-color: $link-color;
 29 |   padding: 1rem;
 30 |   color: $base-bg-color;
 31 | 
 32 |   a {
 33 |     color: $base-bg-color;
 34 | 
 35 |     &:before {
 36 |       content: "\2190";
 37 |       margin-right: 0.2rem;
 38 |       display: inline-block;
 39 |       text-decoration: none;
 40 |     }
 41 |   }
 42 | }
 43 | 
 44 | ul.entries {
 45 |   padding-left: 1.5rem;
 46 | 
 47 |   > li {
 48 |     margin-bottom: 1.5rem;
 49 |   }
 50 | }
 51 | 
 52 | .item-header {
 53 |   font-size: 1.3rem;
 54 |   font-weight: bold;
 55 | }
 56 | 
 57 | .item-updated {
 58 |   font-size: 0.8rem;
 59 |   margin: 0.4rem 0;
 60 | 
 61 |   .cat {
 62 |     margin-right: 0.4rem;
 63 |     padding: 0.1rem 0.6rem;
 64 |     border-radius: 1rem;
 65 |     color: $base-color;
 66 |     border: 1px solid $base-color;
 67 |   }
 68 | }
 69 | 
 70 | .downloads {
 71 |   margin-top: 0.6rem;
 72 | 
 73 |   a.book {
 74 |     display: inline-block;
 75 |     text-align: center;
 76 |     min-width: 6rem;
 77 |     margin-right: 0.6rem;
 78 |     padding: 0.2rem 0.4rem;
 79 |     border-radius: 0.2rem;
 80 |     border: 1px solid $base-disabled-color;
 81 |     background-color: $book-bg-color;
 82 |   }
 83 | }
 84 | 
 85 | [data-theme="dark"] {
 86 |   body {
 87 |     color: $dark-base-color;
 88 |     background-color: $dark-base-bg-color;
 89 |   }
 90 | 
 91 |   a {
 92 |     color: $dark-link-color;
 93 | 
 94 |     &:hover {
 95 |       color: $dark-link-hover-color;
 96 |     }
 97 | 
 98 |     &:visited {
 99 |       color: $dark-link-visited-color;
100 |     }
101 |   }
102 | 
103 |   .notice {
104 |     background-color: $dark-link-color;
105 |     border-left-color: darken($dark-link-color, 20%);
106 |     color: $dark-base-bg-color;
107 | 
108 |     a {
109 |       color: $dark-base-bg-color;
110 |     }
111 |   }
112 | 
113 |   .item-updated {
114 | 
115 |     .cat {
116 |       color: $dark-base-color;
117 |       border-color: $dark-base-color;
118 |     }
119 |   }
120 | 
121 |   .downloads {
122 | 
123 |     a.book {
124 |       border-color: $dark-base-disabled-color;
125 |       background-color: $dark-book-bg-color;
126 |     }
127 |   }
128 | }
129 | 
130 | @import 'opds_custom';
131 | 


--------------------------------------------------------------------------------
/static/opds.xsl:
--------------------------------------------------------------------------------
 1 | <xsl:stylesheet exclude-result-prefixes="opds" version="3.0" xmlns:opds="http://www.w3.org/2005/Atom"
 2 |     xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 3 |     <xsl:output method="html" html-version="5.0" encoding="utf-8" indent="no" doctype-system="about:legacy-compat"/>
 4 |     <xsl:output doctype-public="-//W3c//DTD html 4.01//EN"/>
 5 |     <xsl:output doctype-system="http://www.w3c.org/tr/html4/strict.dtd"/>
 6 |     <xsl:template match="/opds:feed">
 7 |         <html lang="en">
 8 |             <head>
 9 |                 <meta charset="utf-8"/>
10 |                 <meta content="width=device-width, initial-scale=1" name="viewport"/>
11 |                 <meta name="referrer" content="same-origin"/>
12 |                 <meta name="robots" content="noindex"/>
13 |                 <meta name="description" content="An online newsrack of periodicals for your ereader"/>
14 |                 <link rel="icon" type="image/svg+xml" href="favicon.svg"/>
15 |                 <title>
16 |                     <xsl:value-of select="opds:title"/>
17 |                 </title>
18 |                 <link href="opds.css" media="screen" rel="stylesheet" type="text/css"/>
19 |             </head>
20 |             <body>
21 |                 <script src="theme.min.js"></script>
22 |                 <div class="container">
23 |                     <div class="notice">
24 |                         <p>
25 |                             <b>This page is an OPDS catalog feed.</b>
26 |                             The URL in your browser's address bar can be used with an ereader
27 |                             that supports OPDS. This will allow you to browse and download
28 |                             new periodicals directly from the ereader.
29 |                         </p>
30 |                         <a href="./">Back</a>
31 |                     </div>
32 |                     <h1>
33 |                         <a>
34 |                             <xsl:attribute name="href">
35 |                                 <xsl:value-of select="opds:uri"/>
36 |                             </xsl:attribute>
37 |                             <xsl:value-of select="opds:title"/>
38 |                         </a>
39 |                     </h1>
40 |                     <ul class="entries">
41 |                         <xsl:for-each select="opds:entry">
42 |                             <li>
43 |                                 <div class="item-header">
44 |                                     <xsl:value-of select="opds:title"/>
45 |                                 </div>
46 |                                 <div class="item-updated">
47 |                                     <span class="cat">
48 |                                         <xsl:value-of select="opds:category/@label"/>
49 |                                     </span>
50 |                                     Published
51 |                                     <xsl:value-of select="opds:updated"/>
52 |                                 </div>
53 |                                 <!--
54 |                                 Doesn't work in Firefox
55 |                                 <xsl:value-of select="opds:content" disable-output-escaping="yes"/>
56 |                                 -->
57 |                                 <div class="downloads">
58 |                                     <xsl:for-each select="opds:link[@rel='http://opds-spec.org/acquisition']">
59 |                                         <a class="book">
60 |                                             <xsl:attribute name="href">
61 |                                                 <xsl:value-of select="@href"/>
62 |                                             </xsl:attribute>
63 |                                             .<xsl:value-of select="substring-after(@href, '.')"/>
64 |                                         </a>
65 |                                     </xsl:for-each>
66 |                                 </div>
67 |                             </li>
68 |                         </xsl:for-each>
69 |                     </ul>
70 |                 </div>
71 | 
72 |             </body>
73 |         </html>
74 |     </xsl:template>
75 | </xsl:stylesheet>
76 | 


--------------------------------------------------------------------------------
/static/opds_custom.scss:
--------------------------------------------------------------------------------
1 | // Define your own custom opds css here
2 | 


--------------------------------------------------------------------------------
/static/reader.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta name="viewport" content="width=device-width,initial-scale=1"/>
 6 |     <meta name="referrer" content="same-origin"/>
 7 |     <meta name="robots" content="noindex"/>
 8 |     <meta name="description" content="An online newsrack of periodicals for your ereader">
 9 |     <link rel="icon" type="image/svg+xml" href="favicon.svg"/>
10 |     <title>Reader</title>
11 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.1/dist/css/bootstrap.min.css"
12 |           integrity="sha256-2TnSHycBDAm2wpZmgdi0z81kykGPJAkiUY+Wf97RbvY=" crossorigin="anonymous">
13 |     <link rel="preload" href="viewer-theme-dark.css" as="style" />
14 |     <link rel="preload" href="viewer-theme-light.css" as="style" />
15 |     <style>
16 |         {css}
17 |     </style>
18 | </head>
19 | <body>
20 | <script type="application/javascript">
21 | {theme_js}
22 | </script>
23 | <main class="container-fluid position-fixed">
24 |     <div id="display-container" class="h-100 d-flex justify-content-between gap-2 d-none">
25 |         <div class="pagination-container fs-1 flex-fill d-flex">
26 |             <a id="prev" href="#prev" title="Previous Page"
27 |                class="flex-fill d-flex align-items-center justify-content-center">
28 |                 <svg>
29 |                     <use href="reader_sprites.svg#icon-chevron-left"></use>
30 |                 </svg>
31 |             </a>
32 |         </div>
33 |         <div id="centre-container" class="h-100">
34 |             <div id="toc-container" class="d-flex gap-1">
35 |                 <span class="align-self-center">
36 |                     <a href="./" class="home mx-1 fs-2 d-flex flex-column" title="Home">
37 |                         <svg class="align-self-center"><use href="reader_sprites.svg#icon-home"></use></svg>
38 |                     </a>
39 |                 </span>
40 |                 <a id="prev-chapter" href="#prev-chapter" class="align-self-center p-1" title="Previous Chapter">
41 |                     <svg>
42 |                         <use href="reader_sprites.svg#icon-chevrons-left"></use>
43 |                     </svg>
44 |                 </a>
45 |                 <select id="toc" class="my-1 form-select form-select-sm"></select>
46 |                 <a id="next-chapter" href="#next-chapter" class="align-self-center p-1" title="Next Chapter">
47 |                     <svg>
48 |                         <use href="reader_sprites.svg#icon-chevrons-right"></use>
49 |                     </svg>
50 |                 </a>
51 |             </div>
52 |             <div id="epub-viewer" class="mx-auto shadow-sm"></div>
53 |             <div id="epub-title" class="mt-1 text-center"></div>
54 |         </div>
55 |         <div class="pagination-container fs-1 flex-fill d-flex">
56 |             <a id="next" href="#next" title="Next Page"
57 |                class="flex-fill d-flex align-items-center justify-content-center">
58 |                 <svg>
59 |                     <use href="reader_sprites.svg#icon-chevron-right"></use>
60 |                 </svg>
61 |             </a>
62 |         </div>
63 |     </div>
64 | 
65 |     <div id="loading-container" class="d-flex justify-content-center">
66 |         <div class="spinner-border text-primary m-5" role="status">
67 |             <span class="visually-hidden">Loading...</span>
68 |         </div>
69 |     </div>
70 | </main>
71 | <script src="https://cdn.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js"
72 |         integrity="sha256-rMfkFFWoB2W1/Zx+4bgHim0WC7vKRVrq6FTeZclH1Z4=" crossorigin="anonymous"></script>
73 | <script src="https://cdn.jsdelivr.net/npm/epubjs@0.3.93/dist/epub.min.js"
74 |         integrity="sha256-BurhV0UQe0qlCMlVOCdSUfab+58RdWIfxFjZ9C7QgtQ=" crossorigin="anonymous"></script>
75 | <script src="https://cdn.jsdelivr.net/npm/js-cookie@3.0.5/dist/js.cookie.min.js" integrity="sha256-WCzAhd2P6gRJF9Hv3oOOd+hFJi/QJbv+Azn4CGB8gfY=" crossorigin="anonymous"></script>
76 | <script type="application/javascript">{js}</script>
77 | </body>
78 | </html>


--------------------------------------------------------------------------------
/static/reader.scss:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2022 https://github.com/ping/
  3 | 
  4 | This software is released under the GNU General Public License v3.0
  5 | https://opensource.org/licenses/GPL-3.0
  6 | */
  7 | 
  8 | @import 'colours';
  9 | 
 10 | body {
 11 |   background-color: grayscale(darken($base-bg-color, 3%));
 12 |   color: $base-color;
 13 | }
 14 | 
 15 | main.container-fluid {
 16 |   inset: 0;
 17 | }
 18 | a {
 19 |   text-decoration: none;
 20 |   color: $link-color;
 21 | 
 22 |   &:hover {
 23 |     color: $link-hover-color;
 24 |   }
 25 | 
 26 |   &:visited {
 27 |     color: $link-visited-color;
 28 |   }
 29 | }
 30 | 
 31 | svg {
 32 |   width: 1rem;
 33 |   height: 1rem;
 34 | }
 35 | 
 36 | a.home svg {
 37 |   width: 1.5rem;
 38 |   height: 1.5rem;
 39 |   vertical-align: middle;
 40 | }
 41 | 
 42 | #toc-container {
 43 |   height: 5vh;
 44 |   min-height: 2.5rem;
 45 | }
 46 | 
 47 | #toc {
 48 |   color: $base-color;
 49 |   background-color: $base-bg-color;
 50 |   border: 1px solid lighten($base-disabled-color, 20%);
 51 |   border-radius: 0.25rem;
 52 | }
 53 | 
 54 | #centre-container {
 55 |   width: calc(100% - 9rem);
 56 |   max-width: 1100px;
 57 | }
 58 | 
 59 | #epub-viewer {
 60 |   // 5vh is the toc height
 61 |   height: calc(100% - 5vh - 2rem);
 62 | }
 63 | 
 64 | .pagination-container, #prev, #next {
 65 |   min-width: 3rem;
 66 | }
 67 | 
 68 | #prev, #next, #prev-chapter, #next-chapter {
 69 |   text-decoration: none;
 70 | 
 71 |   &:hover {
 72 |     background-color: $base-bg-color;
 73 |   }
 74 | 
 75 |   svg {
 76 |     width: 2rem;
 77 |     height: 2rem;
 78 |   }
 79 | }
 80 | 
 81 | #prev-chapter, #next-chapter {
 82 |   svg {
 83 |     width: 1.5rem;
 84 |     height: 1.5rem;
 85 |   }
 86 | }
 87 | 
 88 | #epub-title {
 89 |   font-size: 0.8rem;
 90 |   width: 100%;
 91 |   white-space: nowrap;
 92 |   overflow: hidden;
 93 |   text-overflow: ellipsis;
 94 |   color: $base-disabled-color;
 95 | }
 96 | 
 97 | @media (max-width: 575px) {
 98 |   main.container-fluid {
 99 |     --bs-gutter-x: 0;
100 |   }
101 |   #centre-container {
102 |     width: calc(100% - 5rem);
103 |   }
104 |   .pagination-container, #prev, #next {
105 |     min-width: 1.7rem;
106 |   }
107 |   #prev, #next {
108 |     svg {
109 |       width: 1.2rem;
110 |       height: 1.2rem;
111 |     }
112 |   }
113 | }
114 | 
115 | @media (min-width: 992px) {
116 |   #epub-viewer:after {
117 |     position: absolute;
118 |     width: 1px;
119 |     border-right: 1px $base-disabled-color solid;
120 |     height: 85vh;
121 |     z-index: 1;
122 |     left: 50%;
123 |     margin-left: -1px;
124 |     top: 8vh;
125 |     opacity: .15;
126 |     box-shadow: -2px 0 15px rgba(0, 0, 0, 1);
127 |     content: "";
128 |   }
129 | }
130 | 
131 | 
132 | [data-theme="dark"] {
133 |   body {
134 |     color: $dark-base-color;
135 |     background-color: lighten($dark-base-bg-color, 3%);
136 |   }
137 |   a {
138 |     color: $dark-link-color;
139 | 
140 |     &:hover {
141 |       color: $dark-link-hover-color;
142 |     }
143 | 
144 |     &:visited {
145 |       color: $dark-link-visited-color;
146 |     }
147 |   }
148 |   #prev, #next, #prev-chapter, #next-chapter {
149 |     &:hover {
150 |       background-color: $dark-base-bg-color;
151 |     }
152 |   }
153 |   #epub-viewer:after {
154 |     border-right-color: $dark-base-disabled-color;
155 |   }
156 |   #toc {
157 |     color: $dark-base-color;
158 |     background-color: $dark-base-bg-color;
159 |     border-color: darken($dark-base-disabled-color, 20%);
160 |     background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3e%3cpath fill='none' stroke='%23e7e9f2' stroke-linecap='round' stroke-linejoin='round' stroke-width='2' d='m2 5 6 6 6-6'/%3e%3c/svg%3e");
161 |   }
162 |   #epub-title {
163 |     font-size: 0.8rem;
164 |     color: $dark-base-disabled-color;
165 |   }
166 | 
167 | }
168 | 
169 | @import 'reader_custom';
170 | 


--------------------------------------------------------------------------------
/static/reader_custom.scss:
--------------------------------------------------------------------------------
1 | // Define your own custom reader viewer css here
2 | 


--------------------------------------------------------------------------------
/static/reader_sprites.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 2 | <symbol id="icon-home" viewBox="0 0 24 24" stroke-width="2" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
 3 |    <path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
 4 |    <path d="M5 12l-2 0l9 -9l9 9l-2 0"></path>
 5 |    <path d="M5 12v7a2 2 0 0 0 2 2h10a2 2 0 0 0 2 -2v-7"></path>
 6 |    <path d="M9 21v-6a2 2 0 0 1 2 -2h2a2 2 0 0 1 2 2v6"></path>
 7 | </symbol>
 8 | <symbol id="icon-chevron-right" viewBox="0 0 24 24" stroke-width="2" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
 9 | <path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
10 | <polyline points="9 6 15 12 9 18"></polyline>
11 | </symbol>
12 | <symbol id="icon-chevron-left" viewBox="0 0 24 24" stroke-width="2" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
13 | <path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
14 | <polyline points="15 6 9 12 15 18"></polyline>
15 | </symbol>
16 | <symbol id="icon-book" viewBox="0 0 24 24" stroke-width="2" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
17 | <path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
18 | <path d="M3 19a9 9 0 0 1 9 0a9 9 0 0 1 9 0"></path>
19 | <path d="M3 6a9 9 0 0 1 9 0a9 9 0 0 1 9 0"></path>
20 | <path d="M3 6l0 13"></path>
21 | <path d="M12 6l0 13"></path>
22 | <path d="M21 6l0 13"></path>
23 | </symbol>
24 | <symbol id="icon-chevrons-left" viewBox="0 0 24 24" stroke-width="2" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
25 | <path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
26 | <path d="M11 7l-5 5l5 5"></path>
27 | <path d="M17 7l-5 5l5 5"></path>
28 | </symbol>
29 | <symbol id="icon-chevrons-right" viewBox="0 0 24 24" stroke-width="2" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
30 | <path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
31 | <path d="M7 7l5 5l-5 5"></path>
32 | <path d="M13 7l5 5l-5 5"></path>
33 | </symbol>
34 | <symbol id="icon-theme-auto" viewBox="0 0 24 24" stroke-width="2" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
35 | <path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
36 | <path d="M12 12m-9 0a9 9 0 1 0 18 0a9 9 0 1 0 -18 0"></path>
37 | <path d="M12 3v18"></path><path d="M12 14l7 -7"></path><path d="M12 19l8.5 -8.5"></path>
38 | <path d="M12 9l4.5 -4.5"></path>
39 | </symbol>
40 | <symbol id="icon-theme-light" viewBox="0 0 24 24" stroke-width="2" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
41 | <path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
42 | <path d="M14.828 14.828a4 4 0 1 0 -5.656 -5.656a4 4 0 0 0 5.656 5.656z"></path>
43 | <path d="M6.343 17.657l-1.414 1.414"></path>
44 | <path d="M6.343 6.343l-1.414 -1.414"></path>
45 | <path d="M17.657 6.343l1.414 -1.414"></path>
46 | <path d="M17.657 17.657l1.414 1.414"></path>
47 | <path d="M4 12h-2"></path><path d="M12 4v-2"></path>
48 | <path d="M20 12h2"></path><path d="M12 20v2"></path>
49 | </symbol>
50 | <symbol id="icon-theme-dark" viewBox="0 0 24 24" stroke-width="2" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
51 | <path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
52 | <path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"></path>
53 | <path d="M17 4a2 2 0 0 0 2 2a2 2 0 0 0 -2 2a2 2 0 0 0 -2 -2a2 2 0 0 0 2 -2"></path>
54 | <path d="M19 11h2m-1 -1v2"></path>
55 | </symbol>
56 | </svg>


--------------------------------------------------------------------------------
/static/site_custom.scss:
--------------------------------------------------------------------------------
1 | // Define your own custom site css here
2 | 


--------------------------------------------------------------------------------
/static/theme.js:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Color mode toggler for Bootstrap's docs (https://getbootstrap.com/)
 3 |  * Copyright 2011-2023 The Bootstrap Authors
 4 |  * Licensed under the Creative Commons Attribution 3.0 Unported License.
 5 |  */
 6 | 
 7 | (() => {
 8 |     'use strict'
 9 | 
10 |     // supported keyCodes: enter=13, space=32
11 |     const supportedKeyCodes = [13];
12 |     const getStoredTheme = () => localStorage.getItem('theme');
13 |     const setStoredTheme = theme => localStorage.setItem('theme', theme);
14 | 
15 |     const getPreferredTheme = () => {
16 |         const storedTheme = getStoredTheme();
17 |         if (storedTheme) {
18 |             return storedTheme;
19 |         }
20 |         return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light';
21 |     };
22 | 
23 |     const getCurrTheme = () => {
24 |         return document.documentElement.getAttribute('data-theme');
25 |     };
26 | 
27 |     const setTheme = theme => {
28 |         if (theme === 'auto' && window.matchMedia('(prefers-color-scheme: dark)').matches) {
29 |             document.documentElement.setAttribute('data-theme', 'dark');
30 |         } else {
31 |             document.documentElement.setAttribute('data-theme', theme);
32 |         }
33 |     };
34 | 
35 |     setTheme(getPreferredTheme());
36 | 
37 |     const showActiveTheme = (theme) => {
38 |         const themeIcon = document.querySelector('#toggle-theme-icon use');
39 |         if (!themeIcon) {
40 |             return
41 |         }
42 |         let icon = 'auto';
43 |         if (theme === 'dark') { icon = 'light'; }
44 |         if (theme === 'light') { icon = 'dark'; }
45 |         themeIcon.setAttribute('href', `reader_sprites.svg#icon-theme-${icon}`);
46 |     }
47 | 
48 |     window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', () => {
49 |         const storedTheme = getStoredTheme();
50 |         if (storedTheme !== 'light' && storedTheme !== 'dark') {
51 |             setTheme(getPreferredTheme());
52 |         }
53 |     });
54 | 
55 |     const toggleTheme = (e) => {
56 |         if (e.type === "keyup" && supportedKeyCodes.indexOf(e.keyCode || e.which) < 0) {     // not enter key
57 |             return;
58 |         }
59 |         const newTheme = getCurrTheme() === 'dark' ? 'light' : 'dark';
60 |         setStoredTheme(newTheme);
61 |         setTheme(newTheme);
62 |         const themeSvg = document.getElementById('toggle-theme-icon');
63 |         themeSvg.addEventListener("animationend", (e) => {
64 |             e.target.classList.remove("spin-it");
65 |             showActiveTheme(newTheme);
66 |         });
67 |         themeSvg.classList.add("spin-it");
68 |     };
69 | 
70 |     window.addEventListener('DOMContentLoaded', () => {
71 |         showActiveTheme(getPreferredTheme());
72 | 
73 |         const themeToggler = document.getElementById("toggle-theme");
74 |         if (themeToggler) {
75 |             themeToggler.addEventListener("click", toggleTheme);
76 |             themeToggler.addEventListener("keyup", toggleTheme);
77 |         }
78 |     });
79 | })();


--------------------------------------------------------------------------------
/static/viewer-theme-dark.scss:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2022 https://github.com/ping/
 3 | 
 4 | This software is released under the GNU General Public License v3.0
 5 | https://opensource.org/licenses/GPL-3.0
 6 | */
 7 | 
 8 | @import 'viewer-theme';
 9 | 
10 | .viewer-theme {
11 |   background-color: $dark-base-bg-color;
12 |   color: $dark-base-color;
13 | 
14 |   // We overwrite the colour because the epub often assumes a light theme
15 |   // and this can result in issues when a dark theme is applied.
16 |   // This forcibly sets everything to the dark theme's base colour.
17 |   * {
18 |     color: $dark-base-color;
19 |   }
20 | 
21 |   a {
22 |     color: $dark-link-color;
23 | 
24 |     // These unsets are due to the styling generated by calibre when converting from mobi.
25 |     // Without the unsets, the elements inherit from the *{} defined just above.
26 |     * {
27 |       color: unset;
28 |     }
29 | 
30 |     &:hover {
31 |       color: $dark-link-hover-color;
32 | 
33 |       * {
34 |         color: unset;
35 |       }
36 |     }
37 | 
38 |     &:visited {
39 |       color: $dark-link-visited-color;
40 | 
41 |       * {
42 |         color: unset;
43 |       }
44 |     }
45 |   }
46 | 
47 |   table.touchscreen_navbar {
48 |     background-color: $dark-base-disabled-color;
49 | 
50 |     td {
51 |       color: $dark-base-color;
52 |       background-color: $dark-base-bg-color;
53 | 
54 |       a {
55 |         color: $dark-link-color;
56 |       }
57 |     }
58 |   }
59 | }


--------------------------------------------------------------------------------
/static/viewer-theme-light.scss:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2022 https://github.com/ping/
 3 | 
 4 | This software is released under the GNU General Public License v3.0
 5 | https://opensource.org/licenses/GPL-3.0
 6 | */
 7 | 
 8 | @import 'viewer-theme';
 9 | 
10 | .viewer-theme {
11 |   background-color: $base-bg-color;
12 |   color: $base-color;
13 | 
14 |   a {
15 |     color: $link-color;
16 | 
17 |     &:hover {
18 |       color: $link-hover-color;
19 |     }
20 | 
21 |     &:visited {
22 |       color: $link-visited-color;
23 |     }
24 |   }
25 | 
26 |   table.touchscreen_navbar {
27 |     background-color: $base-disabled-color;
28 | 
29 |     td {
30 |       color: $base-color;
31 |       background-color: $base-bg-color;
32 | 
33 |       a {
34 |         color: $link-color;
35 |       }
36 |     }
37 |   }
38 | }


--------------------------------------------------------------------------------
/static/viewer-theme.scss:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2022 https://github.com/ping/
 3 | 
 4 | This software is released under the GNU General Public License v3.0
 5 | https://opensource.org/licenses/GPL-3.0
 6 | */
 7 | 
 8 | @import 'colours';
 9 | 
10 | .viewer-theme {
11 |   font-family: Charter, 'Bitstream Charter', 'Sitka Text', Cambria, serif;
12 |   line-height: 1.4;
13 |   text-align: left;
14 | 
15 |   table.touchscreen_navbar {
16 |     &:first-child {
17 |       margin-bottom: 0.5rem;
18 |     }
19 |   }
20 | }
21 | 
22 | @import 'viewer-theme_custom';
23 | 


--------------------------------------------------------------------------------
/static/viewer-theme_custom.scss:
--------------------------------------------------------------------------------
1 | // Define your own custom viewer theme css here
2 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .tests_recipe_utils import RecipeUtilsTests
3 | 


--------------------------------------------------------------------------------
/tests/tests_recipe_utils.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from calendar import monthrange
 3 | from datetime import timedelta
 4 | 
 5 | from _recipe_utils import (
 6 |     get_local_now,
 7 |     onlyon_weekdays,
 8 |     onlyon_days,
 9 |     onlyat_hours,
10 |     every_x_days,
11 |     every_x_hours,
12 |     last_n_days_of_month,
13 |     first_n_days_of_month,
14 | )
15 | 
16 | 
17 | class RecipeUtilsTests(unittest.TestCase):
18 |     def test_onlyon_weekdays(self):
19 |         curr_weekday = get_local_now().weekday()
20 |         whole_week = list(range(0, 7))
21 |         self.assertTrue(onlyon_weekdays(whole_week))
22 | 
23 |         whole_week.remove(curr_weekday)
24 |         self.assertFalse(onlyon_weekdays(whole_week))
25 | 
26 |     def test_onlyon_days(self):
27 |         curr_day = get_local_now().day
28 |         whole_month = list(range(1, 32))
29 |         self.assertTrue(onlyon_days(whole_month))
30 | 
31 |         whole_month.remove(curr_day)
32 |         self.assertFalse(onlyon_days(whole_month))
33 | 
34 |     def test_onlyat_hours(self):
35 |         curr_hour = get_local_now().hour
36 |         whole_day = list(range(0, 24))
37 |         self.assertTrue(onlyat_hours(whole_day))
38 | 
39 |         whole_day.remove(curr_hour)
40 |         self.assertFalse(onlyat_hours(whole_day))
41 | 
42 |     def test_every_x_days(self):
43 |         last_run = (get_local_now() - timedelta(days=1)).timestamp()
44 |         self.assertTrue(every_x_days(last_run, 1))
45 | 
46 |         last_run = (get_local_now() - timedelta(days=0.5)).timestamp()
47 |         self.assertFalse(every_x_days(last_run, 1))
48 | 
49 |         last_run = (get_local_now() - timedelta(days=0.75)).timestamp()
50 |         self.assertTrue(every_x_days(last_run, 1, drift=0.25 * 24 * 60))
51 | 
52 |     def test_every_x_hours(self):
53 |         last_run = (get_local_now() - timedelta(hours=1)).timestamp()
54 |         self.assertTrue(every_x_hours(last_run, 1))
55 | 
56 |         last_run = (get_local_now() - timedelta(hours=0.5)).timestamp()
57 |         self.assertFalse(every_x_hours(last_run, 1))
58 | 
59 |         last_run = (get_local_now() - timedelta(hours=0.75)).timestamp()
60 |         self.assertTrue(every_x_hours(last_run, 1, drift=0.25 * 60))
61 | 
62 |     def test_last_n_days_of_month(self):
63 |         now = get_local_now()
64 |         _, month_end = monthrange(now.year, now.month)
65 |         self.assertTrue(last_n_days_of_month(month_end - now.day + 1))
66 |         self.assertFalse(last_n_days_of_month(month_end - now.day))
67 | 
68 |     def test_first_n_days_of_month(self):
69 |         now = get_local_now()
70 |         self.assertTrue(first_n_days_of_month(now.day))
71 |         self.assertFalse(first_n_days_of_month(now.day - 1))
72 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E501,E722,E203,W503
3 | 


--------------------------------------------------------------------------------