├── .editorconfig
├── .flake8
├── .github
    └── workflows
    │   └── python-package-poetry.yml
├── .gitignore
├── Dockerfile
├── LICENSE.txt
├── README.markdown
├── ebook
    ├── __init__.py
    ├── cover.py
    ├── epub.py
    └── image.py
├── examples
    ├── cultivationchatgroup.json
    ├── dungeonkeeperami.json
    ├── fifthdefiance.json
    ├── heretical-edge-2.json
    ├── heretical-edge.json
    ├── pact.json
    ├── paeantosmac.json
    ├── pale-lights.json
    ├── pale-withextras.json
    ├── pale.json
    ├── phoenixdestiny.json
    ├── practical1.json
    ├── practical2.json
    ├── practical3.json
    ├── practical4.json
    ├── practical5.json
    ├── practical6.json
    ├── practical7.json
    ├── practicalall.json
    ├── practicalextra.json
    ├── sagaofsoul.json
    ├── shouldthesun.json
    ├── thegodsarebastards.json
    ├── twig.json
    ├── unsong.json
    ├── vacantthrone.json
    ├── wanderinginn.json
    ├── ward.json
    └── worm.json
├── leech.py
├── poetry.lock
├── pyproject.toml
└── sites
    ├── __init__.py
    ├── ao3.py
    ├── arbitrary.py
    ├── deviantart.py
    ├── fanfictionnet.py
    ├── fictionlive.py
    ├── royalroad.py
    ├── stash.py
    ├── wattpad.py
    ├── xenforo.py
    └── xenforo2.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | end_of_line = lf
 5 | insert_final_newline = true
 6 | trim_trailing_whitespace = true
 7 | 
 8 | [*.py]
 9 | indent_style = space
10 | indent_size = 4
11 | charset = utf-8
12 | 
13 | [{package.json,.travis.yml}]
14 | indent_style = space
15 | indent_size = 2
16 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | extend_ignore =
3 |     # E128 continuation line under-indented for visual indent
4 |     # E128,
5 |     # E501 line too long
6 |     E501
7 | exclude = .git,__pycache__,venv
8 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package-poetry.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       matrix:
18 |         python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v2
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v4
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install tooling
27 |       run: |
28 |         python -m ensurepip
29 |         python -m pip install --upgrade pip
30 |         python -m pip install flake8 poetry
31 |     - name: Install dependencies
32 |       run: |
33 |         poetry install
34 |     - name: Lint with flake8
35 |       run: |
36 |         flake8 .
37 |     - name: Make sure help runs
38 |       run: |
39 |         poetry run leech --help
40 |     - name: Build a cover
41 |       run: |
42 |         poetry run python -m 'ebook.cover' && file -E output.png && rm output.png
43 |     - name: Verify poetry build
44 |       run: |
45 |         poetry build && ls -og dist/*
46 |     - name: eclint
47 |       uses: snow-actions/eclint@v1.0.1
48 |       with:
49 |         args: 'check *.py sites/*.py'
50 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.epub
 2 | *.mobi
 3 | ./*.json
 4 | leech.db
 5 | leech.sqlite
 6 | leech.cookies
 7 | leech.json
 8 | venv/
 9 | .venv
10 | 
11 | # Byte-compiled / optimized / DLL files
12 | __pycache__/
13 | *.py[cod]
14 | 
15 | # C extensions
16 | *.so
17 | 
18 | # Distribution / packaging
19 | bin/
20 | build/
21 | develop-eggs/
22 | dist/
23 | eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | .tox/
39 | .coverage
40 | .cache
41 | nosetests.xml
42 | coverage.xml
43 | 
44 | # Translations
45 | *.mo
46 | 
47 | # Mr Developer
48 | .mr.developer.cfg
49 | .project
50 | .pydevproject
51 | 
52 | # Rope
53 | .ropeproject
54 | 
55 | # Django stuff:
56 | *.log
57 | *.pot
58 | 
59 | # Sphinx documentation
60 | docs/_build/
61 | 
62 | # Pycharm
63 | .idea/
64 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM alpine:latest
 2 | 
 3 | # Package list taken from Pillow documentation:
 4 | # https://pillow.readthedocs.io/en/stable/installation.html#building-on-linux
 5 | RUN apk add tiff-dev jpeg-dev openjpeg-dev zlib-dev freetype-dev lcms2-dev \
 6 |     libwebp-dev tcl-dev tk-dev harfbuzz-dev fribidi-dev libimagequant-dev \
 7 |     libxcb-dev libpng-dev gcc musl-dev python3 python3-dev py3-pip py3-cryptography
 8 | RUN pip3 config set global.break-system-packages true && pip3 install poetry
 9 | 
10 | COPY . /leech
11 | 
12 | RUN cd /leech \
13 |     && poetry config virtualenvs.create false \
14 |     && poetry install --without dev
15 | 
16 | WORKDIR /work
17 | 
18 | ENTRYPOINT ["/leech/leech.py"]
19 | 
20 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2013-2017 David Lynch
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
  1 | Leech
  2 | ===
  3 | 
  4 | Let's say you want to read some sort of fiction. You're a fan of it, perhaps. But mobile websites are kind of non-ideal, so you'd like a proper ebook made from whatever you're reading.
  5 | 
  6 | Setup
  7 | ---
  8 | 
  9 | You need Python 3.9+ and poetry.
 10 | 
 11 | My recommended setup process is:
 12 | 
 13 |     $ pip install poetry
 14 |     $ poetry install
 15 |     $ poetry shell
 16 | 
 17 | ...adjust as needed. Just make sure the dependencies from `pyproject.toml` get installed somehow.
 18 | 
 19 | Usage
 20 | ---
 21 | 
 22 | Basic
 23 | 
 24 |     $ python3 leech.py [[URL]]
 25 | 
 26 | A new file will appear named `Title of the Story.epub`.
 27 | 
 28 | This is equivalent to the slightly longer
 29 | 
 30 |     $ python3 leech.py download [[URL]]
 31 | 
 32 | Flushing the cache
 33 | 
 34 |     $ python3 leech.py flush
 35 | 
 36 | Learn about other options
 37 | 
 38 |     $ python3 leech.py --help
 39 | 
 40 | If you want to put an ePub on a Kindle you'll have to either use Amazon's send-to-kindle tools or convert it. For the latter I'd recommend [Calibre](http://calibre-ebook.com/), though you could also try using [kindlegen](http://www.amazon.com/gp/feature.html?docId=1000765211) directly.
 41 | 
 42 | Supports
 43 | ---
 44 | 
 45 |  * Fanfiction.net
 46 |  * FictionPress
 47 |  * ArchiveOfOurOwn
 48 |    * Yes, it has its own built-in EPUB export, but the formatting is horrible
 49 |  * Various XenForo-based sites: SpaceBattles and SufficientVelocity, most notably
 50 |  * RoyalRoad
 51 |  * Fiction.live (Anonkun)
 52 |  * DeviantArt galleries/collections
 53 |  * Sta.sh
 54 |  * Completely arbitrary sites, with a bit more work (see below)
 55 | 
 56 | Configuration
 57 | ---
 58 | 
 59 | A very small amount of configuration is possible by creating a file called `leech.json` in the project directory. Currently you can define login information for sites that support it, and some options for book covers.
 60 | 
 61 | Example:
 62 | 
 63 | ```
 64 | {
 65 |     "logins": {
 66 |         "QuestionableQuesting": ["username", "password"]
 67 |     },
 68 |     "images": {
 69 |         "image_fetch": true,
 70 |         "image_format": "png",
 71 |         "compress_images": true,
 72 |         "max_image_size": 100000,
 73 |         "always_convert_images": true
 74 |     },
 75 |     "cover": {
 76 |         "fontname": "Comic Sans MS",
 77 |         "fontsize": 30,
 78 |         "bgcolor": [20, 120, 20],
 79 |         "textcolor": [180, 20, 180],
 80 |         "cover_url": "https://website.com/image.png"
 81 |     },
 82 |     "output_dir": "/tmp/ebooks",
 83 |     "site_options": {
 84 |         "RoyalRoad": {
 85 |             "output_dir": "/tmp/litrpg_isekai_trash",
 86 |             "image_fetch": false
 87 |         }
 88 |     }
 89 | }
 90 | ```
 91 | > Note: The `image_fetch` key is a boolean and can only be `true` or `false`. Booleans in JSON are written in lowercase.
 92 | > If it is `false`, Leech will not download any images.
 93 | > Leech will also ignore the `image_format` key if `images` is `false`.
 94 | 
 95 | > Note: If the `image_format` key does not exist, Leech will default to `jpeg`.
 96 | > The three image formats are `jpeg`, `png`, and `gif`. The `image_format` key is case-insensitive.
 97 | 
 98 | > Note: The `compress_images` key tells Leech to compress images. This is only supported for `jpeg` and `png` images.
 99 | > This also goes hand-in-hand with the `max_image_size` key. If the `compress_images` key is `true` but there's no `max_image_size` key,
100 | > Leech will compress the image to a size less than 1MB (1000000 bytes). If the `max_image_size` key is present, Leech will compress the image
101 | > to a size less than the value of the `max_image_size` key. The `max_image_size` key is in bytes.
102 | > If `compress_images` is `false`, Leech will ignore the `max_image_size` key.
103 | 
104 | > Warning: Compressing images might make Leech take a lot longer to download images.
105 | 
106 | > Warning: Compressing images might make the image quality worse.
107 | 
108 | > Warning: `max_image_size` is not a hard limit. Leech will try to compress the image to the size of the `max_image_size` key, but Leech might
109 | > not be able to compress the image to the exact size of the `max_image_size` key.
110 | 
111 | > Warning: `max_image_size` should not be too small. For instance, if you set `max_image_size` to 1000, Leech will probably not be able to
112 | > compress the image to 1000 bytes. If you set `max_image_size` to 1000000, Leech will probably be able to compress the image to 1000000 bytes.
113 | 
114 | > Warning: Leech will not compress GIFs, that might damage the animation.
115 | 
116 | > Note: if `always_convert_images` is `true`, Leech will convert all non-GIF images to the specified `image_format`.
117 | 
118 | Arbitrary Sites
119 | ---
120 | 
121 | If you want to just download a one-off story from a site, you can create a definition file to describe it. This requires investigation and understanding of things like CSS selectors, which may take some trial and error.
122 | 
123 | Example `practical.json`:
124 | 
125 | ```
126 | {
127 |     "url": "https://practicalguidetoevil.wordpress.com/table-of-contents/",
128 |     "title": "A Practical Guide To Evil: Book 1",
129 |     "author": "erraticerrata",
130 |     "chapter_selector": "#main .entry-content > ul:nth-of-type(1) > li > a",
131 |     "content_selector": "#main .entry-content",
132 |     "filter_selector": ".sharedaddy, .wpcnt, style",
133 |     "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
134 | }
135 | ```
136 | 
137 | Run as:
138 | 
139 |     $ ./leech.py practical.json
140 | 
141 | This tells leech to load `url`, follow the links described by `chapter_selector`, extract the content from those pages as described by `content_selector`, and remove any content from *that* which matches `filter_selector`. Optionally, `cover_url` will replace the default cover with the image of your choice.
142 | 
143 | If `chapter_selector` isn't given, it'll create a single-chapter book by applying `content_selector` to `url`.
144 | 
145 | This is a fairly viable way to extract a story from, say, a random Wordpress installation with a convenient table of contents. It's relatively likely to get you at least *most* of the way to the ebook you want, with maybe some manual editing needed.
146 | 
147 | A more advanced example with JSON would be:
148 | 
149 | ```
150 | {
151 |     "url": "https://practicalguidetoevil.wordpress.com/2015/03/25/prologue/",
152 |     "title": "A Practical Guide To Evil: Book 1",
153 |     "author": "erraticerrata",
154 |     "content_selector": "#main .entry-wrapper",
155 |     "content_title_selector": "h1.entry-title",
156 |     "content_text_selector": ".entry-content",
157 |     "filter_selector": ".sharedaddy, .wpcnt, style",
158 |     "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
159 |     "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
160 | }
161 | ```
162 | 
163 | Because there's no `chapter_selector` here, leech will keep on looking for a link which it can find with `next_selector` and following that link. We also see more advanced metadata acquisition here, with `content_title_selector` and `content_text_selector` being used to find specific elements from within the content.
164 | 
165 | If multiple matches for `content_selector` are found, leech will assume multiple chapters are present on one page, and will handle that. If you find a story that you want on a site which has all the chapters in the right order and next-page links, this is a notably efficient way to download it. See `examples/dungeonkeeperami.json` for this being used.
166 | 
167 | If you need more advanced behavior, consider looking at...
168 | 
169 | Adding new site handlers
170 | ---
171 | 
172 | To add support for a new site, create a file in the `sites` directory that implements the `Site` interface. Take a look at `ao3.py` for a minimal example of what you have to do.
173 | 
174 | Images support
175 | ---
176 | 
177 | Leech creates EPUB 2.01 files, which means that Leech can only save images in the following
178 | format:
179 | - JPEG (JPG/JFIF)
180 | - PNG
181 | - GIF
182 | 
183 | See the [Open Publication Structure (OPS) 2.0.1](https://idpf.org/epub/20/spec/OPS_2.0.1_draft.htm#TOC2.3.4) for more information.
184 | 
185 | Leech can not save images in SVG because it is not supported by Pillow.
186 | 
187 | Leech uses [Pillow](https://pillow.readthedocs.io/en/stable/index.html) for image manipulation and conversion. If you want to use a different
188 | image format, you can install the required dependencies for Pillow and you will probably have to tinker with Leech. See the [Pillow documentation](https://pillow.readthedocs.io/en/stable/installation.html#external-libraries) for more information.
189 | 
190 | To configure image support, you will need to create a file called `leech.json`. See the section below for more information.
191 | 
192 | Docker
193 | ---
194 | 
195 | You can build the project's Docker container like this:
196 | 
197 | ```shell
198 | docker build . -t kemayo/leech:snapshot
199 | ```
200 | 
201 | The container's entrypoint runs `leech` directly and sets the current working directory to `/work`, so you can mount any directory there:
202 | 
203 | ```shell
204 | docker run -it --rm -v ${DIR}:/work kemayo/leech:snapshot download [[URL]]
205 | ```
206 | 
207 | Contributing
208 | ---
209 | 
210 | If you submit a pull request to add support for another reasonably-general-purpose site, I will nigh-certainly accept it.
211 | 
212 | Run [EpubCheck](https://github.com/IDPF/epubcheck) on epubs you generate to make sure they're not breaking.
213 | 


--------------------------------------------------------------------------------
/ebook/__init__.py:
--------------------------------------------------------------------------------
  1 | from .epub import make_epub, EpubFile
  2 | from .cover import make_cover, make_cover_from_url
  3 | from .image import get_image_from_url
  4 | 
  5 | import html
  6 | import unicodedata
  7 | import datetime
  8 | from attrs import define, asdict
  9 | 
 10 | html_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
 11 | <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
 12 | <head>
 13 |     <title>{title}</title>
 14 |     <link rel="stylesheet" type="text/css" href="../Styles/base.css" />
 15 | </head>
 16 | <body>
 17 | <h1>{title}</h1>
 18 | {text}
 19 | </body>
 20 | </html>
 21 | '''
 22 | 
 23 | cover_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
 24 | <html xmlns="http://www.w3.org/1999/xhtml">
 25 | <head>
 26 |     <title>Cover</title>
 27 |     <link rel="stylesheet" type="text/css" href="Styles/base.css" />
 28 | </head>
 29 | <body>
 30 | <div class="cover">
 31 | <svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
 32 |     width="100%" height="100%" viewBox="0 0 573 800" preserveAspectRatio="xMidYMid meet">
 33 | <image width="600" height="800" xlink:href="images/cover.png" />
 34 | </svg>
 35 | </div>
 36 | </body>
 37 | </html>
 38 | '''
 39 | 
 40 | frontmatter_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
 41 | <html xmlns="http://www.w3.org/1999/xhtml">
 42 | <head>
 43 |     <title>Front Matter</title>
 44 |     <link rel="stylesheet" type="text/css" href="Styles/base.css" />
 45 | </head>
 46 | <body>
 47 | <div class="cover title">
 48 |     <h1>{title}<br />By {author}</h1>
 49 |     <dl>
 50 |         <dt>Source</dt>
 51 |         <dd>{unique_id}</dd>
 52 |         <dt>Started</dt>
 53 |         <dd>{started:%Y-%m-%d}</dd>
 54 |         <dt>Updated</dt>
 55 |         <dd>{updated:%Y-%m-%d}</dd>
 56 |         <dt>Downloaded on</dt>
 57 |         <dd>{now:%Y-%m-%d}</dd>
 58 |         {extra}
 59 |     </dl>
 60 | </div>
 61 | </body>
 62 | </html>
 63 | '''
 64 | 
 65 | 
 66 | @define
 67 | class CoverOptions:
 68 |     fontname: str = None
 69 |     fontsize: int = None
 70 |     width: int = None
 71 |     height: int = None
 72 |     wrapat: int = None
 73 |     bgcolor: tuple = None
 74 |     textcolor: tuple = None
 75 |     cover_url: str = None
 76 | 
 77 | 
 78 | @define
 79 | class ImageOptions:
 80 |     image_fetch: bool = False
 81 |     image_format: str = "JPEG"
 82 |     always_convert_images: bool = False
 83 |     compress_images: bool = False
 84 |     max_image_size: int = 1_000_000
 85 | 
 86 | 
 87 | def chapter_html(
 88 |     story,
 89 |     image_options,
 90 |     titleprefix=None,
 91 |     normalize=False,
 92 |     session=None
 93 | ):
 94 |     images = {}
 95 |     chapters = []
 96 |     for i, chapter in enumerate(story):
 97 |         title = chapter.title or f'#{i}'
 98 |         if hasattr(chapter, '__iter__'):
 99 |             # This is a Section
100 |             chapters.extend(chapter_html(
101 |                 chapter, image_options=image_options, titleprefix=title, normalize=normalize, session=session
102 |             ))
103 |         else:
104 |             contents = chapter.contents
105 |             images.update(chapter.images)
106 | 
107 |             title = titleprefix and f'{titleprefix}: {title}' or title
108 |             if normalize:
109 |                 title = unicodedata.normalize('NFKC', title)
110 |                 contents = unicodedata.normalize('NFKC', contents)
111 |             chapters.append(EpubFile(
112 |                 title=title,
113 |                 path=f'{story.id}/chapter{i + 1}.html',
114 |                 contents=html_template.format(
115 |                     title=html.escape(title), text=contents)
116 |             ))
117 | 
118 |     if story.footnotes:
119 |         chapters.append(EpubFile(title="Footnotes", path=f'{story.id}/footnotes.html', contents=html_template.format(
120 |             title="Footnotes", text=story.footnotes.contents)))
121 |         images.update(story.footnotes.images)
122 | 
123 |     for image in images.values():
124 |         img_contents = get_image_from_url(
125 |             image.url,
126 |             image_format=image_options.get('image_format'),
127 |             compress_images=image_options.get('compress_images'),
128 |             max_image_size=image_options.get('max_image_size'),
129 |             always_convert=image_options.get('always_convert_images'),
130 |             session=session
131 |         )
132 |         path = f'{story.id}/{image.path()}'
133 |         for chapterfile in chapters:
134 |             if chapterfile.path == path:
135 |                 break
136 |         else:
137 |             chapters.append(
138 |                 EpubFile(path=path, contents=img_contents[0], filetype=img_contents[2])
139 |             )
140 | 
141 |     return chapters
142 | 
143 | 
144 | def generate_epub(story, cover_options={}, image_options={}, output_filename=None, output_dir=None, normalize=False, allow_spaces=False, session=None, parser='lxml'):
145 |     dates = list(story.dates())
146 |     metadata = {
147 |         'title': story.title,
148 |         'author': story.author,
149 |         'unique_id': story.url,
150 |         'started': min(dates),
151 |         'updated': max(dates),
152 |         'extra': '',
153 |     }
154 |     extra_metadata = {}
155 | 
156 |     session.headers.update({
157 |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0',
158 |     })
159 |     if story.url:
160 |         session.headers.update({
161 |             'Referer': story.url,
162 |         })
163 | 
164 |     if story.summary:
165 |         extra_metadata['Summary'] = story.summary
166 |     if story.tags:
167 |         extra_metadata['Tags'] = ', '.join(story.tags)
168 | 
169 |     if extra_metadata:
170 |         metadata['extra'] = '\n        '.join(
171 |             f'<dt>{k}</dt><dd>{v}</dd>' for k, v in extra_metadata.items())
172 | 
173 |     valid_image_options = ('image_fetch', 'image_format', 'compress_images',
174 |                            'max_image_size', 'always_convert_images')
175 |     image_options = ImageOptions(
176 |         **{k: v for k, v in image_options.items() if k in valid_image_options})
177 |     image_options = asdict(image_options, filter=lambda k, v: v is not None)
178 | 
179 |     valid_cover_options = ('fontname', 'fontsize', 'width',
180 |                            'height', 'wrapat', 'bgcolor', 'textcolor', 'cover_url')
181 |     cover_options = CoverOptions(
182 |         **{k: v for k, v in cover_options.items() if k in valid_cover_options})
183 |     cover_options = asdict(cover_options, filter=lambda k, v: v is not None)
184 | 
185 |     if cover_options and "cover_url" in cover_options:
186 |         image = make_cover_from_url(
187 |             cover_options["cover_url"], story.title, story.author)
188 |     elif story.cover_url:
189 |         image = make_cover_from_url(story.cover_url, story.title, story.author)
190 |     else:
191 |         image = make_cover(story.title, story.author, **cover_options)
192 | 
193 |     return make_epub(
194 |         output_filename or story.title + '.epub',
195 |         [
196 |             # The cover is static, and the only change comes from the image which we generate
197 |             EpubFile(title='Cover', path='cover.html', contents=cover_template),
198 |             EpubFile(title='Front Matter', path='frontmatter.html', contents=frontmatter_template.format(
199 |                 now=datetime.datetime.now(), **metadata)),
200 |             *chapter_html(
201 |                 story,
202 |                 image_options=image_options,
203 |                 normalize=normalize,
204 |                 session=session
205 |             ),
206 |             EpubFile(
207 |                 path='Styles/base.css',
208 |                 contents=session.get(
209 |                     'https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text,
210 |                 filetype='text/css'
211 |             ),
212 |             EpubFile(path='images/cover.png',
213 |                      contents=image.read(), filetype='image/png'),
214 |         ],
215 |         metadata,
216 |         output_dir=output_dir,
217 |         allow_spaces=allow_spaces
218 |     )
219 | 


--------------------------------------------------------------------------------
/ebook/cover.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from PIL import Image, ImageDraw
 3 | from io import BytesIO
 4 | import textwrap
 5 | import requests
 6 | import logging
 7 | from . import image
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | def make_cover(title, author, width=600, height=800, fontname="Helvetica", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30):
13 |     img = Image.new("RGBA", (width, height), bgcolor)
14 |     draw = ImageDraw.Draw(img)
15 | 
16 |     title = textwrap.fill(title, wrapat)
17 |     author = textwrap.fill(author, wrapat)
18 | 
19 |     font = image._safe_font(fontname, size=fontsize)
20 |     title_size = image.textsize(draw, title, font=font)
21 |     image.draw_text_outlined(draw, ((width - title_size[0]) / 2, 100), title, textcolor, font=font)
22 |     # draw.text(((width - title_size[0]) / 2, 100), title, textcolor, font=font)
23 | 
24 |     font = image._safe_font(fontname, size=fontsize - 2)
25 |     author_size = image.textsize(draw, author, font=font)
26 |     image.draw_text_outlined(draw, ((width - author_size[0]) / 2, 100 + title_size[1] + 70), author, textcolor, font=font)
27 | 
28 |     output = BytesIO()
29 |     img.save(output, "PNG")
30 |     output.name = 'cover.png'
31 |     # writing left the cursor at the end of the file, so reset it
32 |     output.seek(0)
33 |     return output
34 | 
35 | 
36 | def make_cover_from_url(url, title, author):
37 |     try:
38 |         logger.info("Downloading cover from " + url)
39 |         img = requests.Session().get(url)
40 |         cover = BytesIO(img.content)
41 | 
42 |         imgformat = Image.open(cover).format
43 |         # The `Image.open` read a few bytes from the stream to work out the
44 |         # format, so reset it:
45 |         cover.seek(0)
46 | 
47 |         if imgformat != "PNG":
48 |             cover = image._convert_to_new_format(cover, "PNG")
49 |     except Exception as e:
50 |         logger.info("Encountered an error downloading cover: " + str(e))
51 |         cover = make_cover(title, author)
52 | 
53 |     return cover
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     f = make_cover('Test of a Title which is quite long and will require multiple lines', 'Some Dude')
58 |     with open('output.png', 'wb') as out:
59 |         out.write(f.read())
60 | 


--------------------------------------------------------------------------------
/ebook/epub.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import os.path
  4 | import zipfile
  5 | import xml.etree.ElementTree as etree
  6 | import uuid
  7 | import string
  8 | from collections import namedtuple
  9 | 
 10 | """
 11 | So, an epub is approximately a zipfile of HTML files, with
 12 | a bit of metadata thrown in for good measure.
 13 | 
 14 | This totally started from http://www.manuel-strehl.de/dev/simple_epub_ebooks_with_python.en.html
 15 | """
 16 | 
 17 | 
 18 | EpubFile = namedtuple('EbookFile', 'path, contents, title, filetype', defaults=(False, False, "application/xhtml+xml"))
 19 | 
 20 | 
 21 | def sanitize_filename(s, allow_spaces=False):
 22 |     """Take a string and return a valid filename constructed from the string.
 23 |     Uses a whitelist approach: any characters not present in valid_chars are
 24 |     removed. Also spaces are replaced with underscores.
 25 | 
 26 |     Note: this method may produce invalid filenames such as ``, `.` or `..`
 27 |     When I use this method I prepend a date string like '2009_01_15_19_46_32_'
 28 |     and append a file extension like '.txt', so I avoid the potential of using
 29 |     an invalid filename.
 30 | 
 31 |     """
 32 |     valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
 33 |     filename = ''.join(c for c in s if c in valid_chars)
 34 |     if not allow_spaces:
 35 |         filename = filename.replace(' ', '_')  # I don't like spaces in filenames.
 36 |     return filename
 37 | 
 38 | 
 39 | def make_epub(filename, files, meta, compress=True, output_dir=False, allow_spaces=False):
 40 |     unique_id = meta.get('unique_id', False)
 41 |     if not unique_id:
 42 |         unique_id = 'leech_book_' + str(uuid.uuid4())
 43 | 
 44 |     filename = sanitize_filename(filename, allow_spaces)
 45 |     if output_dir:
 46 |         filename = os.path.join(output_dir, filename)
 47 |     epub = zipfile.ZipFile(filename, 'w', compression=compress and zipfile.ZIP_DEFLATED or zipfile.ZIP_STORED)
 48 | 
 49 |     # The first file must be named "mimetype", and shouldn't be compressed
 50 |     epub.writestr("mimetype", "application/epub+zip", compress_type=zipfile.ZIP_STORED)
 51 | 
 52 |     # We need an index file, that lists all other HTML files
 53 |     # This index file itself is referenced in the META_INF/container.xml
 54 |     # file
 55 |     container = etree.Element('container', version="1.0", xmlns="urn:oasis:names:tc:opendocument:xmlns:container")
 56 |     rootfiles = etree.SubElement(container, 'rootfiles')
 57 |     etree.SubElement(rootfiles, 'rootfile', {
 58 |         'full-path': "OEBPS/Content.opf",
 59 |         'media-type': "application/oebps-package+xml",
 60 |     })
 61 |     epub.writestr("META-INF/container.xml", etree.tostring(container))
 62 | 
 63 |     package = etree.Element('package', {
 64 |         'version': "2.0",
 65 |         'xmlns': "http://www.idpf.org/2007/opf",
 66 |         'unique-identifier': 'book_identifier',  # could plausibly be based on the name
 67 |     })
 68 | 
 69 |     # build the metadata
 70 |     metadata = etree.SubElement(package, 'metadata', {
 71 |         'xmlns:dc': "http://purl.org/dc/elements/1.1/",
 72 |         'xmlns:opf': "http://www.idpf.org/2007/opf",
 73 |     })
 74 |     identifier = etree.SubElement(metadata, 'dc:identifier', id='book_identifier')
 75 |     if unique_id.find('://') != -1:
 76 |         identifier.set('opf:scheme', "URI")
 77 |     identifier.text = unique_id
 78 |     etree.SubElement(metadata, 'dc:title').text = meta.get('title', 'Untitled')
 79 |     etree.SubElement(metadata, 'dc:language').text = meta.get('language', 'en')
 80 |     etree.SubElement(metadata, 'dc:creator', {'opf:role': 'aut'}).text = meta.get('author', 'Unknown')
 81 |     etree.SubElement(metadata, 'meta', {'name': 'generator', 'content': 'leech'})
 82 | 
 83 |     # we'll need a manifest and spine
 84 |     manifest = etree.SubElement(package, 'manifest')
 85 |     spine = etree.SubElement(package, 'spine', toc="ncx")
 86 |     guide = etree.SubElement(package, 'guide')
 87 | 
 88 |     # ...and the ncx index
 89 |     ncx = etree.Element('ncx', {
 90 |         'xmlns': "http://www.daisy.org/z3986/2005/ncx/",
 91 |         'version': "2005-1",
 92 |         'xml:lang': "en-US",
 93 |     })
 94 |     etree.SubElement(etree.SubElement(ncx, 'head'), 'meta', name="dtb:uid", content=unique_id)
 95 |     etree.SubElement(etree.SubElement(ncx, 'docTitle'), 'text').text = meta.get('title', 'Untitled')
 96 |     etree.SubElement(etree.SubElement(ncx, 'docAuthor'), 'text').text = meta.get('author', 'Unknown')
 97 |     navmap = etree.SubElement(ncx, 'navMap')
 98 | 
 99 |     # Write each HTML file to the ebook, collect information for the index
100 |     for i, file in enumerate(files):
101 |         file_id = 'file_%d' % (i + 1)
102 |         etree.SubElement(manifest, 'item', {
103 |             'id': file_id,
104 |             'href': file.path,
105 |             'media-type': file.filetype,
106 |         })
107 |         if file.filetype == "application/xhtml+xml":
108 |             itemref = etree.SubElement(spine, 'itemref', idref=file_id)
109 |             point = etree.SubElement(navmap, 'navPoint', {
110 |                 'class': "h1",
111 |                 'id': file_id,
112 |             })
113 |             etree.SubElement(etree.SubElement(point, 'navLabel'), 'text').text = file.title
114 |             etree.SubElement(point, 'content', src=file.path)
115 | 
116 |         if 'cover.html' == os.path.basename(file.path):
117 |             etree.SubElement(guide, 'reference', {
118 |                 'type': 'cover',
119 |                 'title': 'Cover',
120 |                 'href': file.path,
121 |             })
122 |             itemref.set('linear', 'no')
123 |         if 'images/cover.png' == file.path:
124 |             etree.SubElement(metadata, 'meta', {
125 |                 'name': 'cover',
126 |                 'content': file_id,
127 |             })
128 | 
129 |         # and add the actual html to the zip
130 |         if file.contents:
131 |             epub.writestr('OEBPS/' + file.path, file.contents)
132 |         else:
133 |             epub.write(file.path, 'OEBPS/' + file.path)
134 | 
135 |     # ...and add the ncx to the manifest
136 |     etree.SubElement(manifest, 'item', {
137 |         'id': 'ncx',
138 |         'href': 'toc.ncx',
139 |         'media-type': "application/x-dtbncx+xml",
140 |     })
141 |     epub.writestr('OEBPS/toc.ncx', etree.tostring(ncx))
142 | 
143 |     # Finally, write the index
144 |     epub.writestr('OEBPS/Content.opf', etree.tostring(package))
145 | 
146 |     epub.close()
147 | 
148 |     return filename
149 | 
150 | 
151 | if __name__ == '__main__':
152 |     make_epub('test.epub', [EpubFile(title='Chapter 1', path='a.html', contents="Test"), EpubFile(title='Chapter 2', path='test/b.html', contents="Still a test")], {})
153 | 


--------------------------------------------------------------------------------
/ebook/image.py:
--------------------------------------------------------------------------------
  1 | # Basically the same as cover.py with some minor differences
  2 | import PIL
  3 | from PIL import Image, ImageDraw, ImageFont
  4 | from io import BytesIO
  5 | from base64 import b64decode
  6 | import math
  7 | import textwrap
  8 | import requests
  9 | import logging
 10 | 
 11 | from typing import Tuple
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | def get_size_format(b, factor=1000, suffix="B"):
 17 |     """
 18 |     Scale bytes to its proper byte format
 19 |     e.g:
 20 |         1253656 => '1.20MB'
 21 |         1253656678 => '1.17GB'
 22 |     """
 23 |     for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
 24 |         if b < factor:
 25 |             return f"{b:.2f}{unit}{suffix}"
 26 |         b /= factor
 27 |     return f"{b:.2f}Y{suffix}"
 28 | 
 29 | 
 30 | def compress_image(image: BytesIO, target_size: int, image_format: str) -> PIL.Image.Image:
 31 |     image_size = get_size_format(len(image.getvalue()))
 32 |     logger.info(f"Image size: {image_size}")
 33 | 
 34 |     big_photo = Image.open(image).convert("RGBA")
 35 | 
 36 |     target_pixel_count = 2.8114 * target_size
 37 |     if len(image.getvalue()) > target_size:
 38 |         logger.info(f"Image is greater than {get_size_format(target_size)}, compressing")
 39 |         scale_factor = target_pixel_count / math.prod(big_photo.size)
 40 |         if scale_factor < 1:
 41 |             x, y = tuple(int(scale_factor * dim) for dim in big_photo.size)
 42 |             logger.info(f"Resizing image dimensions from {big_photo.size} to ({x}, {y})")
 43 |             sml_photo = big_photo.resize((x, y), resample=Image.LANCZOS)
 44 |         else:
 45 |             sml_photo = big_photo
 46 |         compressed_image_size = get_size_format(len(PIL_Image_to_bytes(sml_photo, image_format)))
 47 |         logger.info(f"Compressed image size: {compressed_image_size}")
 48 |         return sml_photo
 49 |     else:
 50 |         logger.info(f"Image is less than {get_size_format(target_size)}, not compressing")
 51 |         return big_photo
 52 | 
 53 | 
 54 | def PIL_Image_to_bytes(
 55 |     pil_image: PIL.Image.Image,
 56 |     image_format: str
 57 | ) -> bytes:
 58 |     out_io = BytesIO()
 59 |     if image_format.lower().startswith("gif"):
 60 |         frames = []
 61 |         current = pil_image.convert('RGBA')
 62 |         while True:
 63 |             try:
 64 |                 frames.append(current)
 65 |                 pil_image.seek(pil_image.tell() + 1)
 66 |                 current = Image.alpha_composite(current, pil_image.convert('RGBA'))
 67 |             except EOFError:
 68 |                 break
 69 |         frames[0].save(out_io, format=image_format, save_all=True, append_images=frames[1:], optimize=True, loop=0)
 70 |         return out_io.getvalue()
 71 | 
 72 |     elif image_format.lower() in ["jpeg", "jpg"]:
 73 |         # Create a new image with a white background
 74 |         background_img = Image.new('RGBA', pil_image.size, "white")
 75 | 
 76 |         # Paste the image on top of the background
 77 |         background_img.paste(pil_image.convert("RGBA"), (0, 0), pil_image.convert("RGBA"))
 78 |         pil_image = background_img.convert('RGB')
 79 | 
 80 |     pil_image.save(out_io, format=image_format, optimize=True, quality=95)
 81 |     return out_io.getvalue()
 82 | 
 83 | 
 84 | def get_image_from_url(
 85 |     url: str,
 86 |     image_format: str = "JPEG",
 87 |     compress_images: bool = False,
 88 |     max_image_size: int = 1_000_000,
 89 |     always_convert: bool = False,
 90 |     session: requests.Session = None
 91 | ) -> Tuple[bytes, str, str]:
 92 |     """
 93 |     Based on make_cover_from_url(), this function takes in the image url usually gotten from the `src` attribute of
 94 |     an image tag and returns the image data, the image format and the image mime type
 95 | 
 96 |     @param url: The url of the image
 97 |     @param image_format: The format to convert the image to if it's not in the supported formats
 98 |     @param compress_images: Whether to compress the image or not
 99 |     @param max_image_size: The maximum size of the image in bytes
100 |     @return: A tuple of the image data, the image format and the image mime type
101 |     """
102 |     logger.info("Downloading image: %s", url)
103 |     session = session or requests.Session()
104 |     try:
105 |         if url.startswith("https://www.filepicker.io/api/"):
106 |             logger.warning("Filepicker.io image detected, converting to Fiction.live image. This might fail.")
107 |             url = f"https://cdn3.fiction.live/fp/{url.split('/')[-1]}?&quality=95"
108 |         elif url.startswith("https://cdn3.fiction.live/images/") or url.startswith("https://ddx5i92cqts4o.cloudfront.net/images/"):
109 |             logger.warning("Converting url to cdn6. This might fail.")
110 |             url = f"https://cdn6.fiction.live/file/fictionlive/images/{url.split('/images/')[-1]}"
111 |         elif url.startswith("data:image") and 'base64' in url:
112 |             logger.info("Base64 image detected")
113 |             head, base64data = url.split(',')
114 |             file_ext = str(head.split(';')[0].split('/')[1])
115 |             imgdata = b64decode(base64data)
116 |             if compress_images:
117 |                 if file_ext.lower() == "gif":
118 |                     logger.info("GIF images should not be compressed, skipping compression")
119 |                 else:
120 |                     compressed_base64_image = compress_image(BytesIO(imgdata), max_image_size, file_ext)
121 |                     imgdata = PIL_Image_to_bytes(compressed_base64_image, file_ext)
122 | 
123 |             if file_ext.lower() not in ["jpg", "jpeg", "png", "gif"]:
124 |                 logger.info(f"Image format {file_ext} not supported by EPUB2.0.1, converting to {image_format}")
125 |                 return _convert_to_new_format(imgdata, image_format).read(), image_format.lower(), f"image/{image_format.lower()}"
126 |             return imgdata, file_ext, f"image/{file_ext}"
127 | 
128 |         img = session.get(url, timeout=(6.01, 30))
129 |         image = BytesIO(img.content)
130 |         image.seek(0)
131 | 
132 |         PIL_image = Image.open(image)
133 | 
134 |         current_format = str(PIL_image.format)
135 | 
136 |         if current_format.lower() == "gif":
137 |             PIL_image = Image.open(image)
138 |             if PIL_image.info['version'] not in [b"GIF89a", "GIF89a"]:
139 |                 PIL_image.info['version'] = b"GIF89a"
140 |             return PIL_Image_to_bytes(PIL_image, "GIF"), "gif", "image/gif"
141 | 
142 |         if compress_images:
143 |             PIL_image = compress_image(image, max_image_size, current_format)
144 | 
145 |         if always_convert:
146 |             current_format = image_format
147 | 
148 |         return PIL_Image_to_bytes(PIL_image, current_format), current_format, f"image/{current_format.lower()}"
149 | 
150 |     except Exception as e:
151 |         logger.info("Encountered an error downloading image: " + str(e))
152 |         image = make_fallback_image("There was a problem downloading this image.").read()
153 |         return image, "jpeg", "image/jpeg"
154 | 
155 | 
156 | def make_fallback_image(
157 |     message: str,
158 |     width=600,
159 |     height=300,
160 |     fontname="Helvetica",
161 |     font_size=40,
162 |     bg_color=(0, 0, 0),
163 |     textcolor=(255, 255, 255),
164 |     wrap_at=30
165 | ):
166 |     """
167 |     This function should only be called if get_image_from_url() fails
168 |     """
169 |     img = Image.new("RGB", (width, height), bg_color)
170 |     draw = ImageDraw.Draw(img)
171 | 
172 |     message = textwrap.fill(message, wrap_at)
173 | 
174 |     font = _safe_font(fontname, size=font_size)
175 |     message_size = textsize(draw, message, font=font)
176 |     draw_text_outlined(
177 |         draw, ((width - message_size[0]) / 2, 100), message, textcolor, font=font)
178 |     # draw.text(((width - title_size[0]) / 2, 100), title, textcolor, font=font)
179 | 
180 |     output = BytesIO()
181 |     img.save(output, "JPEG")
182 |     # writing left the cursor at the end of the file, so reset it
183 |     output.seek(0)
184 |     return output
185 | 
186 | 
187 | def _convert_to_new_format(image_bytestream, image_format: str):
188 |     new_image = BytesIO()
189 |     try:
190 |         Image.open(image_bytestream).save(new_image, format=image_format.upper())
191 |         new_image.seek(0)
192 |     except Exception as e:
193 |         logger.info(f"Encountered an error converting image to {image_format}\nError: {e}")
194 |         new_image = make_fallback_image("There was a problem converting this image.")
195 |     return new_image
196 | 
197 | 
198 | def _safe_font(preferred, *args, **kwargs):
199 |     for font in (preferred, "Helvetica", "FreeSans", "Arial"):
200 |         try:
201 |             return ImageFont.truetype(*args, font=font, **kwargs)
202 |         except IOError:
203 |             pass
204 | 
205 |     # This is pretty terrible, but it'll work regardless of what fonts the
206 |     # system has. Worst issue: can't set the size.
207 |     return ImageFont.load_default()
208 | 
209 | 
210 | def textsize(draw, text, **kwargs):
211 |     left, top, right, bottom = draw.multiline_textbbox((0, 0), text, **kwargs)
212 |     width, height = right - left, bottom - top
213 |     return width, height
214 | 
215 | 
216 | def draw_text_outlined(draw, xy, text, fill=None, font=None, anchor=None):
217 |     x, y = xy
218 | 
219 |     # Outline
220 |     draw.text((x - 1, y), text=text, fill=(0, 0, 0), font=font, anchor=anchor)
221 |     draw.text((x + 1, y), text=text, fill=(0, 0, 0), font=font, anchor=anchor)
222 |     draw.text((x, y - 1), text=text, fill=(0, 0, 0), font=font, anchor=anchor)
223 |     draw.text((x, y + 1), text=text, fill=(0, 0, 0), font=font, anchor=anchor)
224 | 
225 |     # Fill
226 |     draw.text(xy, text=text, fill=fill, font=font, anchor=anchor)
227 | 
228 | 
229 | if __name__ == '__main__':
230 |     f = make_fallback_image(
231 |         'Test of a Title which is quite long and will require multiple lines',
232 |         'output.png'
233 |     )
234 |     with open('output.png', 'wb') as out:
235 |         out.write(f.read())
236 | 


--------------------------------------------------------------------------------
/examples/cultivationchatgroup.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://novelfull.com/cultivation-chat-group/chapter-1-mt-yellows-true-monarch-and-nine-provinces-1-group.html",
 3 |     "title": "Cultivation Chat Group",
 4 |     "author": "Legend of the Paladin",
 5 |     "content_selector": "#chapter",
 6 |     "content_title_selector": "h2 .chapter-text",
 7 |     "content_text_selector": "#chapter-content",
 8 |     "filter_selector": "style, script, .adsbygoogle, .ads",
 9 |     "next_selector": "#next_chap[href]"
10 | }
11 | 


--------------------------------------------------------------------------------
/examples/dungeonkeeperami.json:
--------------------------------------------------------------------------------
1 | {
2 |     "url": "https://forums.sufficientvelocity.com/threads/dungeon-keeper-ami-sailor-moon-dungeon-keeper-story-only-thread.30066/",
3 |     "title": "Dungeon Keeper Ami",
4 |     "author": "Pusakuronu",
5 |     "content_selector": "article.message-body .bbWrapper",
6 |     "filter_selector": ".sharedaddy, .wpcnt, style",
7 |     "next_selector": "link[rel=next]"
8 | }
9 | 


--------------------------------------------------------------------------------
/examples/fifthdefiance.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "https://thefifthdefiance.com/chapters/",
3 |   "title": "The Fifth Defiance",
4 |   "author": "Walter",
5 |   "chapter_selector": ".entry-content > p > a",
6 |   "content_selector": ".entry-content",
7 |   "content_title_selector": ".entry-title",
8 |   "filter_selector": ".sharedaddy, .wpcnt, style"
9 | }


--------------------------------------------------------------------------------
/examples/heretical-edge-2.json:
--------------------------------------------------------------------------------
1 | {
2 |     "url": "https://ceruleanscrawling.wordpress.com/heretical-edge-2-table-of-contents/",
3 |     "title": "Heretical Edge 2",
4 |     "author": "Ceruelean",
5 |     "chapter_selector": "article .entry-content > p > a:not([href*=patreon])",
6 |     "content_selector": "article .entry-content",
7 |     "filter_selector": ".sharedaddy, .wpcnt, style"
8 | }
9 | 


--------------------------------------------------------------------------------
/examples/heretical-edge.json:
--------------------------------------------------------------------------------
1 | {
2 |     "url": "https://ceruleanscrawling.wordpress.com/table-of-contents/",
3 |     "title": "Heretical Edge",
4 |     "author": "Ceruelean",
5 |     "chapter_selector": "article .entry-content > p > a",
6 |     "content_selector": "article .entry-content",
7 |     "filter_selector": ".sharedaddy, .wpcnt, style"
8 | }
9 | 


--------------------------------------------------------------------------------
/examples/pact.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"url": "https://pactwebserial.wordpress.com/2013/12/17/bonds-1-1/",
 3 | 	"title": "Pact",
 4 | 	"author": "Wildbow",
 5 | 	"content_selector": "#main",
 6 | 	"content_title_selector": "h1.entry-title",
 7 | 	"content_text_selector": ".entry-content",
 8 | 	"filter_selector": ".sharedaddy, style, a[href*='pactwebserial.wordpress.com']",
 9 | 	"next_selector": "a[rel=\"next\"]",
10 | 	"cover_url": "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/a456e440-ea22-45c0-8b39-dacf9bbddade/d7dxaz4-64cfabe8-f957-44af-aaea-82346c401b27.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOiIsImlzcyI6InVybjphcHA6Iiwib2JqIjpbW3sicGF0aCI6IlwvZlwvYTQ1NmU0NDAtZWEyMi00NWMwLThiMzktZGFjZjliYmRkYWRlXC9kN2R4YXo0LTY0Y2ZhYmU4LWY5NTctNDRhZi1hYWVhLTgyMzQ2YzQwMWIyNy5qcGcifV1dLCJhdWQiOlsidXJuOnNlcnZpY2U6ZmlsZS5kb3dubG9hZCJdfQ.J-Wn8bDrKmoKKZW8mkJdi3uRoDV2FDJQZ_TuTWvQazY"
11 | }
12 | 


--------------------------------------------------------------------------------
/examples/paeantosmac.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://paeantosmac.wordpress.com/2015/02/17/introduction/",
 3 |     "title": "Paean to SMAC",
 4 |     "author": "Nick Stipanovich",
 5 |     "content_selector": "article.post",
 6 |     "content_title_selector": "header h1",
 7 |     "content_text_selector": "div.entry-content",
 8 |     "filter_selector": ".sharedaddy, .wpcnt, style",
 9 |     "next_selector": "link[rel=next]"
10 | }
11 | 


--------------------------------------------------------------------------------
/examples/pale-lights.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://palelights.com/2022/08/17/chapter-1/",
 3 |     "title": "Pale Lights",
 4 |     "author": "erraticerrata",
 5 |     "content_selector": "#main",
 6 |     "content_title_selector": "h1.entry-title",
 7 |     "content_text_selector": ".entry-content",
 8 |     "filter_selector": ".sharedaddy, .wpcnt, style",
 9 |     "next_selector": "a[rel=\"next\"]",
10 |     "cover_url": "https://www.royalroadcdn.com/public/covers-large/pale-lights-aaaay6-1-bi.jpg"
11 | }
12 | 


--------------------------------------------------------------------------------
/examples/pale-withextras.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://palewebserial.wordpress.com/2020/05/05/blood-run-cold-0-0/",
 3 |     "title": "Pale",
 4 |     "author": "Wildbow",
 5 |     "content_selector": "#main",
 6 |     "content_title_selector": "h1.entry-title",
 7 |     "content_text_selector": ".entry-content",
 8 |     "filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']",
 9 |     "next_selector": "a[rel=\"next\"]"
10 | }
11 | 


--------------------------------------------------------------------------------
/examples/pale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://palewebserial.wordpress.com/table-of-contents/",
 3 |     "title": "Pale",
 4 |     "author": "Wildbow",
 5 |     "content_selector": "#main",
 6 |     "content_title_selector": "h1.entry-title",
 7 |     "content_text_selector": ".entry-content",
 8 |     "chapter_selector": "article .entry-content > p a",
 9 |     "filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']"
10 | }
11 | 


--------------------------------------------------------------------------------
/examples/phoenixdestiny.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "http://gravitytales.com/novel/phoenix-destiny/pd-chapter-1",
 3 |     "title": "Phoenix Destiny",
 4 |     "author": "Yun Ji",
 5 |     "content_selector": "#contentElement",
 6 |     "content_title_selector": "h4",
 7 |     "content_text_selector": "#chapterContent",
 8 |     "filter_selector": ".sharedaddy, .wpcnt, style",
 9 |     "next_selector": ".chapter-navigation > a:last-child[href*=\"pd-chapter\"]"
10 | }
11 | 


--------------------------------------------------------------------------------
/examples/practical1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://practicalguidetoevil.wordpress.com/2015/03/25/prologue/",
 3 |     "title": "A Practical Guide To Evil: Book 1",
 4 |     "author": "erraticerrata",
 5 |     "content_selector": "#main",
 6 |     "content_title_selector": "h1.entry-title",
 7 |     "content_text_selector": ".entry-content",
 8 |     "filter_selector": ".sharedaddy, .wpcnt, style",
 9 |     "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 |     "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }
12 | 


--------------------------------------------------------------------------------
/examples/practical2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://practicalguidetoevil.wordpress.com/2015/11/04/prologue-2/",
 3 |     "title": "A Practical Guide To Evil: Book 2",
 4 |     "author": "erraticerrata",
 5 |     "content_selector": "#main",
 6 |     "content_title_selector": "h1.entry-title",
 7 |     "content_text_selector": ".entry-content",
 8 |     "filter_selector": ".sharedaddy, .wpcnt, style",
 9 |     "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 |     "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }


--------------------------------------------------------------------------------
/examples/practical3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://practicalguidetoevil.wordpress.com/2017/02/08/prologue-3/",
 3 |     "title": "A Practical Guide To Evil: Book 3",
 4 |     "author": "erraticerrata",
 5 |     "content_selector": "#main",
 6 |     "content_title_selector": "h1.entry-title",
 7 |     "content_text_selector": ".entry-content",
 8 |     "filter_selector": ".sharedaddy, .wpcnt, style",
 9 |     "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 |     "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }
12 | 


--------------------------------------------------------------------------------
/examples/practical4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://practicalguidetoevil.wordpress.com/2018/04/09/prologue-4/",
 3 |     "title": "A Practical Guide To Evil: Book 4",
 4 |     "author": "erraticerrata",
 5 |     "content_selector": "#main",
 6 |     "content_title_selector": "h1.entry-title",
 7 |     "content_text_selector": ".entry-content",
 8 |     "filter_selector": ".sharedaddy, .wpcnt, style",
 9 |     "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 |     "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }
12 | 


--------------------------------------------------------------------------------
/examples/practical5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://practicalguidetoevil.wordpress.com/2019/01/14/prologue-5/",
 3 |     "title": "A Practical Guide To Evil: Book 5",
 4 |     "author": "erraticerrata",
 5 |     "content_selector": "#main",
 6 |     "content_title_selector": "h1.entry-title",
 7 |     "content_text_selector": ".entry-content",
 8 |     "filter_selector": ".sharedaddy, .wpcnt, style",
 9 |     "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 |     "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }
12 | 


--------------------------------------------------------------------------------
/examples/practical6.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://practicalguidetoevil.wordpress.com/2020/01/06/prologue-6/",
 3 |     "title": "A Practical Guide To Evil: Book 6",
 4 |     "author": "erraticerrata",
 5 |     "content_selector": "#main",
 6 |     "content_title_selector": "h1.entry-title",
 7 |     "content_text_selector": ".entry-content",
 8 |     "filter_selector": ".sharedaddy, .wpcnt, style",
 9 |     "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 |     "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }
12 | 


--------------------------------------------------------------------------------
/examples/practical7.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://practicalguidetoevil.wordpress.com/2021/03/02/prologue-7/",
 3 |     "title": "A Practical Guide To Evil: Book 7",
 4 |     "author": "erraticerrata",
 5 |     "content_selector": "#main",
 6 |     "content_title_selector": "h1.entry-title",
 7 |     "content_text_selector": ".entry-content",
 8 |     "filter_selector": ".sharedaddy, .wpcnt, style",
 9 |     "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 |     "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }


--------------------------------------------------------------------------------
/examples/practicalall.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://practicalguidetoevil.wordpress.com/2015/03/25/prologue/",
 3 |     "title": "A Practical Guide To Evil",
 4 |     "author": "erraticerrata",
 5 |     "content_selector": "#main",
 6 |     "content_title_selector": "h1.entry-title",
 7 |     "content_text_selector": ".entry-content",
 8 |     "filter_selector": ".sharedaddy, .wpcnt, style",
 9 |     "next_selector": "a[rel=\"next\"]",
10 |     "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }


--------------------------------------------------------------------------------
/examples/practicalextra.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://practicalguidetoevil.wordpress.com/extra-chapters/",
 3 |     "title": "A Practical Guide To Evil: Extra Chapters",
 4 |     "author": "erraticerrata",
 5 |     "chapter_selector": "#main .entry-content > ul > li > a",
 6 |     "content_selector": "#main .entry-content",
 7 |     "filter_selector": ".sharedaddy, .wpcnt, style",
 8 |     "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
 9 | }
10 | 


--------------------------------------------------------------------------------
/examples/sagaofsoul.json:
--------------------------------------------------------------------------------
1 | {
2 |     "url": "http://www.sagaofsoul.com/story.html",
3 |     "title": "Saga of Soul",
4 |     "author": "Ouri Maler",
5 |     "chapter_selector": "#mainbody li a",
6 |     "content_selector": "#mainbody",
7 |     "filter_selector": "script, noscript"
8 | }
9 | 


--------------------------------------------------------------------------------
/examples/shouldthesun.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://shouldthesun.wordpress.com/",
 3 |     "title": "Should The Sun Not Rise",
 4 |     "author": "Omicron",
 5 |     "chapter_selector": "#text-1 li a",
 6 |     "content_selector": ".entry-content",
 7 |     "filter_selector": ".sharedaddy, style, a[href*='shouldthesun.wordpress.com']",
 8 |     "cover_url": "https://shouldthesun.files.wordpress.com/2017/09/itzpapalotl.jpg"
 9 | }
10 | 


--------------------------------------------------------------------------------
/examples/thegodsarebastards.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "url": "https://tiraas.wordpress.com/table-of-contents/",
 3 |   "title": "The Gods Are Bastards",
 4 |   "author": "D. D. Webb",
 5 |   "chapter_selector": "article .entry-content a[href*='20']",
 6 |   "content_selector": "article .entry-content",
 7 |   "filter_selector": ".sharedaddy, .wpcnt, style, a[href*='tiraas.wordpress.com']",
 8 |   "cover_url": "https://tiraas.files.wordpress.com/2016/02/classof1182byhoarous.png"
 9 | }
10 | 


--------------------------------------------------------------------------------
/examples/twig.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"url": "https://twigserial.wordpress.com/2014/12/24/taking-root-1-1/",
 3 | 	"title": "Twig",
 4 | 	"author": "Wildbow",
 5 | 	"content_selector": "#main",
 6 | 	"content_title_selector": "h1.entry-title",
 7 | 	"content_text_selector": ".entry-content",
 8 | 	"filter_selector": ".sharedaddy, style, a[href*='twigserial.wordpress.com']",
 9 | 	"next_selector": "a[rel=\"next\"]",
10 | 	"cover_url": "https://twigserial.files.wordpress.com/2015/03/cropped-twig-commission-titled1.png"
11 | }
12 | 


--------------------------------------------------------------------------------
/examples/unsong.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"url": "https://unsongbook.com/prologue-2/",
 3 | 	"title": "Unsong",
 4 | 	"author": "Scott Alexander",
 5 | 	"content_selector": "#pjgm-content",
 6 | 	"content_title_selector": "h1.pjgm-posttitle",
 7 | 	"content_text_selector": ".pjgm-postcontent",
 8 | 	"filter_selector": ".sharedaddy, style",
 9 | 	"next_selector": "a[rel=\"next\"]",
10 | 	"cover_url": "https://i.imgur.com/d9LvKMc.png"
11 | }
12 | 


--------------------------------------------------------------------------------
/examples/vacantthrone.json:
--------------------------------------------------------------------------------
1 | {
2 |     "url": "https://tcthrone.wordpress.com/",
3 |     "title": "Vacant Throne",
4 |     "author": "TCurator",
5 |     "chapter_selector": "#main .entry-content > p a[href*=\"vacant-throne-\"]",
6 |     "content_selector": "#main .entry-content",
7 |     "filter_selector": ".sharedaddy, style, p:nth-of-type(1), a[href*='tcthrone.wordpress.com']"
8 | }
9 | 


--------------------------------------------------------------------------------
/examples/wanderinginn.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://wanderinginn.com/table-of-contents/",
 3 |     "title": "The Wandering Inn",
 4 |     "author": "pirate aba",
 5 |     "cover_url": "https://i0.wp.com/wanderinginn.com/wp-content/uploads/2023/03/Wandering_Inn-Vol1-eCover.jpg?ssl=1",
 6 |     "chapter_selector": "#table-of-contents .chapter-entry .body-web > a",
 7 |     "content_selector": ".entry-content",
 8 |     "filter_selector": "hr:last-of-type, hr:last-of-type ~ *"
 9 | }
10 | 


--------------------------------------------------------------------------------
/examples/ward.json:
--------------------------------------------------------------------------------
1 | {
2 |     "url": "https://www.parahumans.net/table-of-contents/",
3 |     "title": "Ward",
4 |     "author": "Wildbow",
5 |     "chapter_selector": "#main .entry-content a",
6 |     "content_selector": "#main .entry-content",
7 |     "filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com'], p:first-of-type, p:last-of-type"
8 | }


--------------------------------------------------------------------------------
/examples/worm.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "url": "https://parahumans.wordpress.com/2011/06/11/1-1/",
 3 |     "title": "Worm",
 4 |     "author": "Wildbow",
 5 |     "content_selector": "#main",
 6 |     "content_title_selector": "h1.entry-title",
 7 |     "content_text_selector": ".entry-content",
 8 |     "filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com']",
 9 |     "next_selector": "a[rel=\"next\"]",
10 |     "cover_url": "https://pre00.deviantart.net/969a/th/pre/i/2015/051/8/7/worm_cover_by_cactusfantastico-d8ivj4b.png"
11 | }
12 | 


--------------------------------------------------------------------------------
/leech.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import click
  4 | import http.cookiejar
  5 | import json
  6 | import logging
  7 | import os
  8 | import requests
  9 | import requests_cache
 10 | import sqlite3
 11 | from click_default_group import DefaultGroup
 12 | from functools import reduce
 13 | 
 14 | import sites
 15 | import ebook
 16 | 
 17 | __version__ = 2
 18 | USER_AGENT = 'Leech/%s +http://davidlynch.org' % __version__
 19 | 
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | def configure_logging(verbose):
 24 |     if verbose:
 25 |         logging.basicConfig(
 26 |             level=logging.DEBUG,
 27 |             format="[%(name)s @ %(levelname)s] %(message)s"
 28 |         )
 29 |     else:
 30 |         logging.basicConfig(
 31 |             level=logging.INFO,
 32 |             format="[%(name)s] %(message)s"
 33 |         )
 34 | 
 35 | 
 36 | def create_session(cache):
 37 |     if cache:
 38 |         session = requests_cache.CachedSession('leech', expire_after=4 * 3600)
 39 |     else:
 40 |         session = requests.Session()
 41 | 
 42 |     lwp_cookiejar = http.cookiejar.LWPCookieJar()
 43 |     try:
 44 |         lwp_cookiejar.load('leech.cookies', ignore_discard=True)
 45 |     except Exception:
 46 |         # This file is very much optional, so this log isn't really necessary
 47 |         # logging.exception("Couldn't load cookies from leech.cookies")
 48 |         pass
 49 |     session.cookies.update(lwp_cookiejar)
 50 |     session.headers.update({
 51 |         'User-Agent': USER_AGENT,
 52 |         'Accept-Language': 'en-US,en;q=0.5',
 53 |         'Accept-Encoding': 'gzip, deflate',
 54 |         'Accept': '*/*',  # this is essential for imgur
 55 |     })
 56 |     return session
 57 | 
 58 | 
 59 | def load_on_disk_options(site):
 60 |     try:
 61 |         with open('leech.json') as store_file:
 62 |             store = json.load(store_file)
 63 |             login = store.get('logins', {}).get(site.site_key(), False)
 64 |             cover_options = store.get('cover', {})
 65 |             image_options = store.get('images', {})
 66 |             consolidated_options = {
 67 |                 **{k: v for k, v in store.items() if k not in ('cover', 'images', 'logins')},
 68 |                 **store.get('site_options', {}).get(site.site_key(), {})
 69 |             }
 70 |     except FileNotFoundError:
 71 |         logger.info("Unable to locate leech.json. Continuing assuming it does not exist.")
 72 |         login = False
 73 |         image_options = {}
 74 |         cover_options = {}
 75 |         consolidated_options = {}
 76 |     return consolidated_options, login, cover_options, image_options
 77 | 
 78 | 
 79 | def create_options(site, site_options, unused_flags):
 80 |     """Compiles options provided from multiple different sources
 81 |     (e.g. on disk, via flags, via defaults, via JSON provided as a flag value)
 82 |     into a single options object."""
 83 |     default_site_options = site.get_default_options()
 84 | 
 85 |     flag_specified_site_options = site.interpret_site_specific_options(**unused_flags)
 86 | 
 87 |     configured_site_options, login, cover_options, image_options = load_on_disk_options(site)
 88 | 
 89 |     overridden_site_options = json.loads(site_options)
 90 | 
 91 |     # The final options dictionary is computed by layering the default, configured,
 92 |     # and overridden, and flag-specified options together in that order.
 93 |     options = dict(
 94 |         list(default_site_options.items()) +
 95 |         list(cover_options.items()) +
 96 |         list(image_options.items()) +
 97 |         list(configured_site_options.items()) +
 98 |         list(overridden_site_options.items()) +
 99 |         list(flag_specified_site_options.items())
100 |     )
101 |     return options, login
102 | 
103 | 
104 | def open_story(site, url, session, login, options):
105 |     handler = site(
106 |         session,
107 |         options=options
108 |     )
109 | 
110 |     if login:
111 |         handler.login(login)
112 | 
113 |     try:
114 |         story = handler.extract(url)
115 |     except sites.SiteException as e:
116 |         logger.error(e)
117 |         return
118 |     if not story:
119 |         logger.error("Couldn't extract story")
120 |         return
121 |     return story
122 | 
123 | 
124 | def site_specific_options(f):
125 |     option_list = sites.list_site_specific_options()
126 |     return reduce(lambda cmd, decorator: decorator(cmd), [f] + option_list)
127 | 
128 | 
129 | @click.group(cls=DefaultGroup, default='download', default_if_no_args=True)
130 | def cli():
131 |     """Top level click group. Uses click-default-group to preserve most behavior from leech v1."""
132 |     pass
133 | 
134 | 
135 | @cli.command()
136 | @click.option('--verbose', '-v', is_flag=True, help="verbose output")
137 | def flush(verbose):
138 |     """Flushes the contents of the cache."""
139 |     configure_logging(verbose)
140 |     requests_cache.install_cache('leech')
141 |     requests_cache.clear()
142 | 
143 |     conn = sqlite3.connect('leech.sqlite')
144 |     conn.execute("VACUUM")
145 |     conn.close()
146 | 
147 |     logger.info("Flushed cache")
148 | 
149 | 
150 | @cli.command()
151 | @click.argument('urls', nargs=-1, required=True)
152 | @click.option(
153 |     '--site-options',
154 |     default='{}',
155 |     help='JSON object encoding any site specific option.'
156 | )
157 | @click.option(
158 |     '--output-dir',
159 |     default=None,
160 |     help='Directory to save generated ebooks'
161 | )
162 | @click.option('--cache/--no-cache', default=True)
163 | @click.option('--normalize/--no-normalize', default=True, help="Whether to normalize strange unicode text")
164 | @click.option('--verbose', '-v', is_flag=True, help="Verbose debugging output")
165 | @site_specific_options  # Includes other click.options specific to sites
166 | def download(urls, site_options, cache, verbose, normalize, output_dir, **other_flags):
167 |     """Downloads a story and saves it on disk as an epub ebook."""
168 |     configure_logging(verbose)
169 |     session = create_session(cache)
170 | 
171 |     for url in urls:
172 |         site, url = sites.get(url)
173 |         options, login = create_options(site, site_options, other_flags)
174 |         story = open_story(site, url, session, login, options)
175 |         if story:
176 |             filename = ebook.generate_epub(
177 |                 story, options,
178 |                 image_options={
179 |                     'image_fetch': options.get('image_fetch', True),
180 |                     'image_format': options.get('image_format', 'jpeg'),
181 |                     'compress_images': options.get('compress_images', False),
182 |                     'max_image_size': options.get('max_image_size', 1_000_000),
183 |                     'always_convert_images': options.get('always_convert_images', False)
184 |                 },
185 |                 normalize=normalize,
186 |                 output_dir=output_dir or options.get('output_dir', os.getcwd()),
187 |                 allow_spaces=options.get('allow_spaces', False),
188 |                 session=session,
189 |                 parser=options.get('parser', 'lxml')
190 |             )
191 |             logger.info("File created: " + filename)
192 |         else:
193 |             logger.warning("No ebook created")
194 | 
195 | 
196 | if __name__ == '__main__':
197 |     cli()
198 | 


--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
  2 | 
  3 | [[package]]
  4 | name = "attrs"
  5 | version = "25.1.0"
  6 | description = "Classes Without Boilerplate"
  7 | optional = false
  8 | python-versions = ">=3.8"
  9 | groups = ["main"]
 10 | files = [
 11 |     {file = "attrs-25.1.0-py3-none-any.whl", hash = "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a"},
 12 |     {file = "attrs-25.1.0.tar.gz", hash = "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e"},
 13 | ]
 14 | 
 15 | [package.extras]
 16 | benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
 17 | cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
 18 | dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
 19 | docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
 20 | tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
 21 | tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""]
 22 | 
 23 | [[package]]
 24 | name = "beautifulsoup4"
 25 | version = "4.13.3"
 26 | description = "Screen-scraping library"
 27 | optional = false
 28 | python-versions = ">=3.7.0"
 29 | groups = ["main"]
 30 | files = [
 31 |     {file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"},
 32 |     {file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"},
 33 | ]
 34 | 
 35 | [package.dependencies]
 36 | soupsieve = ">1.2"
 37 | typing-extensions = ">=4.0.0"
 38 | 
 39 | [package.extras]
 40 | cchardet = ["cchardet"]
 41 | chardet = ["chardet"]
 42 | charset-normalizer = ["charset-normalizer"]
 43 | html5lib = ["html5lib"]
 44 | lxml = ["lxml"]
 45 | 
 46 | [[package]]
 47 | name = "cattrs"
 48 | version = "24.1.2"
 49 | description = "Composable complex class support for attrs and dataclasses."
 50 | optional = false
 51 | python-versions = ">=3.8"
 52 | groups = ["main"]
 53 | files = [
 54 |     {file = "cattrs-24.1.2-py3-none-any.whl", hash = "sha256:67c7495b760168d931a10233f979b28dc04daf853b30752246f4f8471c6d68d0"},
 55 |     {file = "cattrs-24.1.2.tar.gz", hash = "sha256:8028cfe1ff5382df59dd36474a86e02d817b06eaf8af84555441bac915d2ef85"},
 56 | ]
 57 | 
 58 | [package.dependencies]
 59 | attrs = ">=23.1.0"
 60 | exceptiongroup = {version = ">=1.1.1", markers = "python_version < \"3.11\""}
 61 | typing-extensions = {version = ">=4.1.0,<4.6.3 || >4.6.3", markers = "python_version < \"3.11\""}
 62 | 
 63 | [package.extras]
 64 | bson = ["pymongo (>=4.4.0)"]
 65 | cbor2 = ["cbor2 (>=5.4.6)"]
 66 | msgpack = ["msgpack (>=1.0.5)"]
 67 | msgspec = ["msgspec (>=0.18.5) ; implementation_name == \"cpython\""]
 68 | orjson = ["orjson (>=3.9.2) ; implementation_name == \"cpython\""]
 69 | pyyaml = ["pyyaml (>=6.0)"]
 70 | tomlkit = ["tomlkit (>=0.11.8)"]
 71 | ujson = ["ujson (>=5.7.0)"]
 72 | 
 73 | [[package]]
 74 | name = "certifi"
 75 | version = "2024.8.30"
 76 | description = "Python package for providing Mozilla's CA Bundle."
 77 | optional = false
 78 | python-versions = ">=3.6"
 79 | groups = ["main"]
 80 | files = [
 81 |     {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"},
 82 |     {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"},
 83 | ]
 84 | 
 85 | [[package]]
 86 | name = "charset-normalizer"
 87 | version = "3.4.0"
 88 | description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 89 | optional = false
 90 | python-versions = ">=3.7.0"
 91 | groups = ["main"]
 92 | files = [
 93 |     {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4f9fc98dad6c2eaa32fc3af1417d95b5e3d08aff968df0cd320066def971f9a6"},
 94 |     {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0de7b687289d3c1b3e8660d0741874abe7888100efe14bd0f9fd7141bcbda92b"},
 95 |     {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5ed2e36c3e9b4f21dd9422f6893dec0abf2cca553af509b10cd630f878d3eb99"},
 96 |     {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d3ff7fc90b98c637bda91c89d51264a3dcf210cade3a2c6f838c7268d7a4ca"},
 97 |     {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1110e22af8ca26b90bd6364fe4c763329b0ebf1ee213ba32b68c73de5752323d"},
 98 |     {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86f4e8cca779080f66ff4f191a685ced73d2f72d50216f7112185dc02b90b9b7"},
 99 |     {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f683ddc7eedd742e2889d2bfb96d69573fde1d92fcb811979cdb7165bb9c7d3"},
100 |     {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27623ba66c183eca01bf9ff833875b459cad267aeeb044477fedac35e19ba907"},
101 |     {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f606a1881d2663630ea5b8ce2efe2111740df4b687bd78b34a8131baa007f79b"},
102 |     {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0b309d1747110feb25d7ed6b01afdec269c647d382c857ef4663bbe6ad95a912"},
103 |     {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:136815f06a3ae311fae551c3df1f998a1ebd01ddd424aa5603a4336997629e95"},
104 |     {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:14215b71a762336254351b00ec720a8e85cada43b987da5a042e4ce3e82bd68e"},
105 |     {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:79983512b108e4a164b9c8d34de3992f76d48cadc9554c9e60b43f308988aabe"},
106 |     {file = "charset_normalizer-3.4.0-cp310-cp310-win32.whl", hash = "sha256:c94057af19bc953643a33581844649a7fdab902624d2eb739738a30e2b3e60fc"},
107 |     {file = "charset_normalizer-3.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:55f56e2ebd4e3bc50442fbc0888c9d8c94e4e06a933804e2af3e89e2f9c1c749"},
108 |     {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0d99dd8ff461990f12d6e42c7347fd9ab2532fb70e9621ba520f9e8637161d7c"},
109 |     {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c57516e58fd17d03ebe67e181a4e4e2ccab1168f8c2976c6a334d4f819fe5944"},
110 |     {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6dba5d19c4dfab08e58d5b36304b3f92f3bd5d42c1a3fa37b5ba5cdf6dfcbcee"},
111 |     {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf4475b82be41b07cc5e5ff94810e6a01f276e37c2d55571e3fe175e467a1a1c"},
112 |     {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce031db0408e487fd2775d745ce30a7cd2923667cf3b69d48d219f1d8f5ddeb6"},
113 |     {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ff4e7cdfdb1ab5698e675ca622e72d58a6fa2a8aa58195de0c0061288e6e3ea"},
114 |     {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3710a9751938947e6327ea9f3ea6332a09bf0ba0c09cae9cb1f250bd1f1549bc"},
115 |     {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82357d85de703176b5587dbe6ade8ff67f9f69a41c0733cf2425378b49954de5"},
116 |     {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47334db71978b23ebcf3c0f9f5ee98b8d65992b65c9c4f2d34c2eaf5bcaf0594"},
117 |     {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8ce7fd6767a1cc5a92a639b391891bf1c268b03ec7e021c7d6d902285259685c"},
118 |     {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f1a2f519ae173b5b6a2c9d5fa3116ce16e48b3462c8b96dfdded11055e3d6365"},
119 |     {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:63bc5c4ae26e4bc6be6469943b8253c0fd4e4186c43ad46e713ea61a0ba49129"},
120 |     {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bcb4f8ea87d03bc51ad04add8ceaf9b0f085ac045ab4d74e73bbc2dc033f0236"},
121 |     {file = "charset_normalizer-3.4.0-cp311-cp311-win32.whl", hash = "sha256:9ae4ef0b3f6b41bad6366fb0ea4fc1d7ed051528e113a60fa2a65a9abb5b1d99"},
122 |     {file = "charset_normalizer-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cee4373f4d3ad28f1ab6290684d8e2ebdb9e7a1b74fdc39e4c211995f77bec27"},
123 |     {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0713f3adb9d03d49d365b70b84775d0a0d18e4ab08d12bc46baa6132ba78aaf6"},
124 |     {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de7376c29d95d6719048c194a9cf1a1b0393fbe8488a22008610b0361d834ecf"},
125 |     {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a51b48f42d9358460b78725283f04bddaf44a9358197b889657deba38f329db"},
126 |     {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b295729485b06c1a0683af02a9e42d2caa9db04a373dc38a6a58cdd1e8abddf1"},
127 |     {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee803480535c44e7f5ad00788526da7d85525cfefaf8acf8ab9a310000be4b03"},
128 |     {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d59d125ffbd6d552765510e3f31ed75ebac2c7470c7274195b9161a32350284"},
129 |     {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cda06946eac330cbe6598f77bb54e690b4ca93f593dee1568ad22b04f347c15"},
130 |     {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07afec21bbbbf8a5cc3651aa96b980afe2526e7f048fdfb7f1014d84acc8b6d8"},
131 |     {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6b40e8d38afe634559e398cc32b1472f376a4099c75fe6299ae607e404c033b2"},
132 |     {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b8dcd239c743aa2f9c22ce674a145e0a25cb1566c495928440a181ca1ccf6719"},
133 |     {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:84450ba661fb96e9fd67629b93d2941c871ca86fc38d835d19d4225ff946a631"},
134 |     {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:44aeb140295a2f0659e113b31cfe92c9061622cadbc9e2a2f7b8ef6b1e29ef4b"},
135 |     {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1db4e7fefefd0f548d73e2e2e041f9df5c59e178b4c72fbac4cc6f535cfb1565"},
136 |     {file = "charset_normalizer-3.4.0-cp312-cp312-win32.whl", hash = "sha256:5726cf76c982532c1863fb64d8c6dd0e4c90b6ece9feb06c9f202417a31f7dd7"},
137 |     {file = "charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:b197e7094f232959f8f20541ead1d9862ac5ebea1d58e9849c1bf979255dfac9"},
138 |     {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dd4eda173a9fcccb5f2e2bd2a9f423d180194b1bf17cf59e3269899235b2a114"},
139 |     {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9e3c4c9e1ed40ea53acf11e2a386383c3304212c965773704e4603d589343ed"},
140 |     {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92a7e36b000bf022ef3dbb9c46bfe2d52c047d5e3f3343f43204263c5addc250"},
141 |     {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54b6a92d009cbe2fb11054ba694bc9e284dad30a26757b1e372a1fdddaf21920"},
142 |     {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ffd9493de4c922f2a38c2bf62b831dcec90ac673ed1ca182fe11b4d8e9f2a64"},
143 |     {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:35c404d74c2926d0287fbd63ed5d27eb911eb9e4a3bb2c6d294f3cfd4a9e0c23"},
144 |     {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4796efc4faf6b53a18e3d46343535caed491776a22af773f366534056c4e1fbc"},
145 |     {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7fdd52961feb4c96507aa649550ec2a0d527c086d284749b2f582f2d40a2e0d"},
146 |     {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:92db3c28b5b2a273346bebb24857fda45601aef6ae1c011c0a997106581e8a88"},
147 |     {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ab973df98fc99ab39080bfb0eb3a925181454d7c3ac8a1e695fddfae696d9e90"},
148 |     {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4b67fdab07fdd3c10bb21edab3cbfe8cf5696f453afce75d815d9d7223fbe88b"},
149 |     {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:aa41e526a5d4a9dfcfbab0716c7e8a1b215abd3f3df5a45cf18a12721d31cb5d"},
150 |     {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ffc519621dce0c767e96b9c53f09c5d215578e10b02c285809f76509a3931482"},
151 |     {file = "charset_normalizer-3.4.0-cp313-cp313-win32.whl", hash = "sha256:f19c1585933c82098c2a520f8ec1227f20e339e33aca8fa6f956f6691b784e67"},
152 |     {file = "charset_normalizer-3.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:707b82d19e65c9bd28b81dde95249b07bf9f5b90ebe1ef17d9b57473f8a64b7b"},
153 |     {file = "charset_normalizer-3.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dbe03226baf438ac4fda9e2d0715022fd579cb641c4cf639fa40d53b2fe6f3e2"},
154 |     {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd9a8bd8900e65504a305bf8ae6fa9fbc66de94178c420791d0293702fce2df7"},
155 |     {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8831399554b92b72af5932cdbbd4ddc55c55f631bb13ff8fe4e6536a06c5c51"},
156 |     {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a14969b8691f7998e74663b77b4c36c0337cb1df552da83d5c9004a93afdb574"},
157 |     {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dcaf7c1524c0542ee2fc82cc8ec337f7a9f7edee2532421ab200d2b920fc97cf"},
158 |     {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425c5f215d0eecee9a56cdb703203dda90423247421bf0d67125add85d0c4455"},
159 |     {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:d5b054862739d276e09928de37c79ddeec42a6e1bfc55863be96a36ba22926f6"},
160 |     {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:f3e73a4255342d4eb26ef6df01e3962e73aa29baa3124a8e824c5d3364a65748"},
161 |     {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:2f6c34da58ea9c1a9515621f4d9ac379871a8f21168ba1b5e09d74250de5ad62"},
162 |     {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:f09cb5a7bbe1ecae6e87901a2eb23e0256bb524a79ccc53eb0b7629fbe7677c4"},
163 |     {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:0099d79bdfcf5c1f0c2c72f91516702ebf8b0b8ddd8905f97a8aecf49712c621"},
164 |     {file = "charset_normalizer-3.4.0-cp37-cp37m-win32.whl", hash = "sha256:9c98230f5042f4945f957d006edccc2af1e03ed5e37ce7c373f00a5a4daa6149"},
165 |     {file = "charset_normalizer-3.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:62f60aebecfc7f4b82e3f639a7d1433a20ec32824db2199a11ad4f5e146ef5ee"},
166 |     {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:af73657b7a68211996527dbfeffbb0864e043d270580c5aef06dc4b659a4b578"},
167 |     {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cab5d0b79d987c67f3b9e9c53f54a61360422a5a0bc075f43cab5621d530c3b6"},
168 |     {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9289fd5dddcf57bab41d044f1756550f9e7cf0c8e373b8cdf0ce8773dc4bd417"},
169 |     {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b493a043635eb376e50eedf7818f2f322eabbaa974e948bd8bdd29eb7ef2a51"},
170 |     {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fa2566ca27d67c86569e8c85297aaf413ffab85a8960500f12ea34ff98e4c41"},
171 |     {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8e538f46104c815be19c975572d74afb53f29650ea2025bbfaef359d2de2f7f"},
172 |     {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fd30dc99682dc2c603c2b315bded2799019cea829f8bf57dc6b61efde6611c8"},
173 |     {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2006769bd1640bdf4d5641c69a3d63b71b81445473cac5ded39740a226fa88ab"},
174 |     {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:dc15e99b2d8a656f8e666854404f1ba54765871104e50c8e9813af8a7db07f12"},
175 |     {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ab2e5bef076f5a235c3774b4f4028a680432cded7cad37bba0fd90d64b187d19"},
176 |     {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:4ec9dd88a5b71abfc74e9df5ebe7921c35cbb3b641181a531ca65cdb5e8e4dea"},
177 |     {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:43193c5cda5d612f247172016c4bb71251c784d7a4d9314677186a838ad34858"},
178 |     {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:aa693779a8b50cd97570e5a0f343538a8dbd3e496fa5dcb87e29406ad0299654"},
179 |     {file = "charset_normalizer-3.4.0-cp38-cp38-win32.whl", hash = "sha256:7706f5850360ac01d80c89bcef1640683cc12ed87f42579dab6c5d3ed6888613"},
180 |     {file = "charset_normalizer-3.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:c3e446d253bd88f6377260d07c895816ebf33ffffd56c1c792b13bff9c3e1ade"},
181 |     {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:980b4f289d1d90ca5efcf07958d3eb38ed9c0b7676bf2831a54d4f66f9c27dfa"},
182 |     {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f28f891ccd15c514a0981f3b9db9aa23d62fe1a99997512b0491d2ed323d229a"},
183 |     {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8aacce6e2e1edcb6ac625fb0f8c3a9570ccc7bfba1f63419b3769ccf6a00ed0"},
184 |     {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd7af3717683bea4c87acd8c0d3d5b44d56120b26fd3f8a692bdd2d5260c620a"},
185 |     {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ff2ed8194587faf56555927b3aa10e6fb69d931e33953943bc4f837dfee2242"},
186 |     {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e91f541a85298cf35433bf66f3fab2a4a2cff05c127eeca4af174f6d497f0d4b"},
187 |     {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:309a7de0a0ff3040acaebb35ec45d18db4b28232f21998851cfa709eeff49d62"},
188 |     {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:285e96d9d53422efc0d7a17c60e59f37fbf3dfa942073f666db4ac71e8d726d0"},
189 |     {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5d447056e2ca60382d460a604b6302d8db69476fd2015c81e7c35417cfabe4cd"},
190 |     {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:20587d20f557fe189b7947d8e7ec5afa110ccf72a3128d61a2a387c3313f46be"},
191 |     {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:130272c698667a982a5d0e626851ceff662565379baf0ff2cc58067b81d4f11d"},
192 |     {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:ab22fbd9765e6954bc0bcff24c25ff71dcbfdb185fcdaca49e81bac68fe724d3"},
193 |     {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7782afc9b6b42200f7362858f9e73b1f8316afb276d316336c0ec3bd73312742"},
194 |     {file = "charset_normalizer-3.4.0-cp39-cp39-win32.whl", hash = "sha256:2de62e8801ddfff069cd5c504ce3bc9672b23266597d4e4f50eda28846c322f2"},
195 |     {file = "charset_normalizer-3.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:95c3c157765b031331dd4db3c775e58deaee050a3042fcad72cbc4189d7c8dca"},
196 |     {file = "charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079"},
197 |     {file = "charset_normalizer-3.4.0.tar.gz", hash = "sha256:223217c3d4f82c3ac5e29032b3f1c2eb0fb591b72161f86d93f5719079dae93e"},
198 | ]
199 | 
200 | [[package]]
201 | name = "click"
202 | version = "8.1.8"
203 | description = "Composable command line interface toolkit"
204 | optional = false
205 | python-versions = ">=3.7"
206 | groups = ["main"]
207 | files = [
208 |     {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
209 |     {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
210 | ]
211 | 
212 | [package.dependencies]
213 | colorama = {version = "*", markers = "platform_system == \"Windows\""}
214 | 
215 | [[package]]
216 | name = "click-default-group"
217 | version = "1.2.4"
218 | description = "click_default_group"
219 | optional = false
220 | python-versions = ">=2.7"
221 | groups = ["main"]
222 | files = [
223 |     {file = "click_default_group-1.2.4-py2.py3-none-any.whl", hash = "sha256:9b60486923720e7fc61731bdb32b617039aba820e22e1c88766b1125592eaa5f"},
224 |     {file = "click_default_group-1.2.4.tar.gz", hash = "sha256:eb3f3c99ec0d456ca6cd2a7f08f7d4e91771bef51b01bdd9580cc6450fe1251e"},
225 | ]
226 | 
227 | [package.dependencies]
228 | click = "*"
229 | 
230 | [package.extras]
231 | test = ["pytest"]
232 | 
233 | [[package]]
234 | name = "colorama"
235 | version = "0.4.6"
236 | description = "Cross-platform colored terminal text."
237 | optional = false
238 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
239 | groups = ["main"]
240 | markers = "platform_system == \"Windows\""
241 | files = [
242 |     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
243 |     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
244 | ]
245 | 
246 | [[package]]
247 | name = "exceptiongroup"
248 | version = "1.2.2"
249 | description = "Backport of PEP 654 (exception groups)"
250 | optional = false
251 | python-versions = ">=3.7"
252 | groups = ["main"]
253 | markers = "python_version < \"3.11\""
254 | files = [
255 |     {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
256 |     {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
257 | ]
258 | 
259 | [package.extras]
260 | test = ["pytest (>=6)"]
261 | 
262 | [[package]]
263 | name = "flake8"
264 | version = "6.1.0"
265 | description = "the modular source code checker: pep8 pyflakes and co"
266 | optional = false
267 | python-versions = ">=3.8.1"
268 | groups = ["dev"]
269 | files = [
270 |     {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"},
271 |     {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"},
272 | ]
273 | 
274 | [package.dependencies]
275 | mccabe = ">=0.7.0,<0.8.0"
276 | pycodestyle = ">=2.11.0,<2.12.0"
277 | pyflakes = ">=3.1.0,<3.2.0"
278 | 
279 | [[package]]
280 | name = "idna"
281 | version = "3.10"
282 | description = "Internationalized Domain Names in Applications (IDNA)"
283 | optional = false
284 | python-versions = ">=3.6"
285 | groups = ["main"]
286 | files = [
287 |     {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
288 |     {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
289 | ]
290 | 
291 | [package.extras]
292 | all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
293 | 
294 | [[package]]
295 | name = "lxml"
296 | version = "5.3.1"
297 | description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
298 | optional = false
299 | python-versions = ">=3.6"
300 | groups = ["main"]
301 | files = [
302 |     {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a4058f16cee694577f7e4dd410263cd0ef75644b43802a689c2b3c2a7e69453b"},
303 |     {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:364de8f57d6eda0c16dcfb999af902da31396949efa0e583e12675d09709881b"},
304 |     {file = "lxml-5.3.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:528f3a0498a8edc69af0559bdcf8a9f5a8bf7c00051a6ef3141fdcf27017bbf5"},
305 |     {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db4743e30d6f5f92b6d2b7c86b3ad250e0bad8dee4b7ad8a0c44bfb276af89a3"},
306 |     {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17b5d7f8acf809465086d498d62a981fa6a56d2718135bb0e4aa48c502055f5c"},
307 |     {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:928e75a7200a4c09e6efc7482a1337919cc61fe1ba289f297827a5b76d8969c2"},
308 |     {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a997b784a639e05b9d4053ef3b20c7e447ea80814a762f25b8ed5a89d261eac"},
309 |     {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7b82e67c5feb682dbb559c3e6b78355f234943053af61606af126df2183b9ef9"},
310 |     {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:f1de541a9893cf8a1b1db9bf0bf670a2decab42e3e82233d36a74eda7822b4c9"},
311 |     {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:de1fc314c3ad6bc2f6bd5b5a5b9357b8c6896333d27fdbb7049aea8bd5af2d79"},
312 |     {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:7c0536bd9178f754b277a3e53f90f9c9454a3bd108b1531ffff720e082d824f2"},
313 |     {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:68018c4c67d7e89951a91fbd371e2e34cd8cfc71f0bb43b5332db38497025d51"},
314 |     {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa826340a609d0c954ba52fd831f0fba2a4165659ab0ee1a15e4aac21f302406"},
315 |     {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:796520afa499732191e39fc95b56a3b07f95256f2d22b1c26e217fb69a9db5b5"},
316 |     {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3effe081b3135237da6e4c4530ff2a868d3f80be0bda027e118a5971285d42d0"},
317 |     {file = "lxml-5.3.1-cp310-cp310-win32.whl", hash = "sha256:a22f66270bd6d0804b02cd49dae2b33d4341015545d17f8426f2c4e22f557a23"},
318 |     {file = "lxml-5.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:0bcfadea3cdc68e678d2b20cb16a16716887dd00a881e16f7d806c2138b8ff0c"},
319 |     {file = "lxml-5.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e220f7b3e8656ab063d2eb0cd536fafef396829cafe04cb314e734f87649058f"},
320 |     {file = "lxml-5.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f2cfae0688fd01f7056a17367e3b84f37c545fb447d7282cf2c242b16262607"},
321 |     {file = "lxml-5.3.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67d2f8ad9dcc3a9e826bdc7802ed541a44e124c29b7d95a679eeb58c1c14ade8"},
322 |     {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db0c742aad702fd5d0c6611a73f9602f20aec2007c102630c06d7633d9c8f09a"},
323 |     {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:198bb4b4dd888e8390afa4f170d4fa28467a7eaf857f1952589f16cfbb67af27"},
324 |     {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d2a3e412ce1849be34b45922bfef03df32d1410a06d1cdeb793a343c2f1fd666"},
325 |     {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b8969dbc8d09d9cd2ae06362c3bad27d03f433252601ef658a49bd9f2b22d79"},
326 |     {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5be8f5e4044146a69c96077c7e08f0709c13a314aa5315981185c1f00235fe65"},
327 |     {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:133f3493253a00db2c870d3740bc458ebb7d937bd0a6a4f9328373e0db305709"},
328 |     {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:52d82b0d436edd6a1d22d94a344b9a58abd6c68c357ed44f22d4ba8179b37629"},
329 |     {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b6f92e35e2658a5ed51c6634ceb5ddae32053182851d8cad2a5bc102a359b33"},
330 |     {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:203b1d3eaebd34277be06a3eb880050f18a4e4d60861efba4fb946e31071a295"},
331 |     {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:155e1a5693cf4b55af652f5c0f78ef36596c7f680ff3ec6eb4d7d85367259b2c"},
332 |     {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:22ec2b3c191f43ed21f9545e9df94c37c6b49a5af0a874008ddc9132d49a2d9c"},
333 |     {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7eda194dd46e40ec745bf76795a7cccb02a6a41f445ad49d3cf66518b0bd9cff"},
334 |     {file = "lxml-5.3.1-cp311-cp311-win32.whl", hash = "sha256:fb7c61d4be18e930f75948705e9718618862e6fc2ed0d7159b2262be73f167a2"},
335 |     {file = "lxml-5.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c809eef167bf4a57af4b03007004896f5c60bd38dc3852fcd97a26eae3d4c9e6"},
336 |     {file = "lxml-5.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e69add9b6b7b08c60d7ff0152c7c9a6c45b4a71a919be5abde6f98f1ea16421c"},
337 |     {file = "lxml-5.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4e52e1b148867b01c05e21837586ee307a01e793b94072d7c7b91d2c2da02ffe"},
338 |     {file = "lxml-5.3.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4b382e0e636ed54cd278791d93fe2c4f370772743f02bcbe431a160089025c9"},
339 |     {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2e49dc23a10a1296b04ca9db200c44d3eb32c8d8ec532e8c1fd24792276522a"},
340 |     {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4399b4226c4785575fb20998dc571bc48125dc92c367ce2602d0d70e0c455eb0"},
341 |     {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5412500e0dc5481b1ee9cf6b38bb3b473f6e411eb62b83dc9b62699c3b7b79f7"},
342 |     {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c93ed3c998ea8472be98fb55aed65b5198740bfceaec07b2eba551e55b7b9ae"},
343 |     {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:63d57fc94eb0bbb4735e45517afc21ef262991d8758a8f2f05dd6e4174944519"},
344 |     {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:b450d7cabcd49aa7ab46a3c6aa3ac7e1593600a1a0605ba536ec0f1b99a04322"},
345 |     {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:4df0ec814b50275ad6a99bc82a38b59f90e10e47714ac9871e1b223895825468"},
346 |     {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d184f85ad2bb1f261eac55cddfcf62a70dee89982c978e92b9a74a1bfef2e367"},
347 |     {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b725e70d15906d24615201e650d5b0388b08a5187a55f119f25874d0103f90dd"},
348 |     {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a31fa7536ec1fb7155a0cd3a4e3d956c835ad0a43e3610ca32384d01f079ea1c"},
349 |     {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3c3c8b55c7fc7b7e8877b9366568cc73d68b82da7fe33d8b98527b73857a225f"},
350 |     {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d61ec60945d694df806a9aec88e8f29a27293c6e424f8ff91c80416e3c617645"},
351 |     {file = "lxml-5.3.1-cp312-cp312-win32.whl", hash = "sha256:f4eac0584cdc3285ef2e74eee1513a6001681fd9753b259e8159421ed28a72e5"},
352 |     {file = "lxml-5.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:29bfc8d3d88e56ea0a27e7c4897b642706840247f59f4377d81be8f32aa0cfbf"},
353 |     {file = "lxml-5.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c093c7088b40d8266f57ed71d93112bd64c6724d31f0794c1e52cc4857c28e0e"},
354 |     {file = "lxml-5.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b0884e3f22d87c30694e625b1e62e6f30d39782c806287450d9dc2fdf07692fd"},
355 |     {file = "lxml-5.3.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1637fa31ec682cd5760092adfabe86d9b718a75d43e65e211d5931809bc111e7"},
356 |     {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a364e8e944d92dcbf33b6b494d4e0fb3499dcc3bd9485beb701aa4b4201fa414"},
357 |     {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:779e851fd0e19795ccc8a9bb4d705d6baa0ef475329fe44a13cf1e962f18ff1e"},
358 |     {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c4393600915c308e546dc7003d74371744234e8444a28622d76fe19b98fa59d1"},
359 |     {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:673b9d8e780f455091200bba8534d5f4f465944cbdd61f31dc832d70e29064a5"},
360 |     {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2e4a570f6a99e96c457f7bec5ad459c9c420ee80b99eb04cbfcfe3fc18ec6423"},
361 |     {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:71f31eda4e370f46af42fc9f264fafa1b09f46ba07bdbee98f25689a04b81c20"},
362 |     {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:42978a68d3825eaac55399eb37a4d52012a205c0c6262199b8b44fcc6fd686e8"},
363 |     {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8b1942b3e4ed9ed551ed3083a2e6e0772de1e5e3aca872d955e2e86385fb7ff9"},
364 |     {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:85c4f11be9cf08917ac2a5a8b6e1ef63b2f8e3799cec194417e76826e5f1de9c"},
365 |     {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:231cf4d140b22a923b1d0a0a4e0b4f972e5893efcdec188934cc65888fd0227b"},
366 |     {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5865b270b420eda7b68928d70bb517ccbe045e53b1a428129bb44372bf3d7dd5"},
367 |     {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dbf7bebc2275016cddf3c997bf8a0f7044160714c64a9b83975670a04e6d2252"},
368 |     {file = "lxml-5.3.1-cp313-cp313-win32.whl", hash = "sha256:d0751528b97d2b19a388b302be2a0ee05817097bab46ff0ed76feeec24951f78"},
369 |     {file = "lxml-5.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:91fb6a43d72b4f8863d21f347a9163eecbf36e76e2f51068d59cd004c506f332"},
370 |     {file = "lxml-5.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:016b96c58e9a4528219bb563acf1aaaa8bc5452e7651004894a973f03b84ba81"},
371 |     {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82a4bb10b0beef1434fb23a09f001ab5ca87895596b4581fd53f1e5145a8934a"},
372 |     {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d68eeef7b4d08a25e51897dac29bcb62aba830e9ac6c4e3297ee7c6a0cf6439"},
373 |     {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:f12582b8d3b4c6be1d298c49cb7ae64a3a73efaf4c2ab4e37db182e3545815ac"},
374 |     {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2df7ed5edeb6bd5590914cd61df76eb6cce9d590ed04ec7c183cf5509f73530d"},
375 |     {file = "lxml-5.3.1-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:585c4dc429deebc4307187d2b71ebe914843185ae16a4d582ee030e6cfbb4d8a"},
376 |     {file = "lxml-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:06a20d607a86fccab2fc15a77aa445f2bdef7b49ec0520a842c5c5afd8381576"},
377 |     {file = "lxml-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:057e30d0012439bc54ca427a83d458752ccda725c1c161cc283db07bcad43cf9"},
378 |     {file = "lxml-5.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4867361c049761a56bd21de507cab2c2a608c55102311d142ade7dab67b34f32"},
379 |     {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3dddf0fb832486cc1ea71d189cb92eb887826e8deebe128884e15020bb6e3f61"},
380 |     {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bcc211542f7af6f2dfb705f5f8b74e865592778e6cafdfd19c792c244ccce19"},
381 |     {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaca5a812f050ab55426c32177091130b1e49329b3f002a32934cd0245571307"},
382 |     {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:236610b77589faf462337b3305a1be91756c8abc5a45ff7ca8f245a71c5dab70"},
383 |     {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:aed57b541b589fa05ac248f4cb1c46cbb432ab82cbd467d1c4f6a2bdc18aecf9"},
384 |     {file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:75fa3d6946d317ffc7016a6fcc44f42db6d514b7fdb8b4b28cbe058303cb6e53"},
385 |     {file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:96eef5b9f336f623ffc555ab47a775495e7e8846dde88de5f941e2906453a1ce"},
386 |     {file = "lxml-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:ef45f31aec9be01379fc6c10f1d9c677f032f2bac9383c827d44f620e8a88407"},
387 |     {file = "lxml-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0611da6b07dd3720f492db1b463a4d1175b096b49438761cc9f35f0d9eaaef5"},
388 |     {file = "lxml-5.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b2aca14c235c7a08558fe0a4786a1a05873a01e86b474dfa8f6df49101853a4e"},
389 |     {file = "lxml-5.3.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae82fce1d964f065c32c9517309f0c7be588772352d2f40b1574a214bd6e6098"},
390 |     {file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7aae7a3d63b935babfdc6864b31196afd5145878ddd22f5200729006366bc4d5"},
391 |     {file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8e0d177b1fe251c3b1b914ab64135475c5273c8cfd2857964b2e3bb0fe196a7"},
392 |     {file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:6c4dd3bfd0c82400060896717dd261137398edb7e524527438c54a8c34f736bf"},
393 |     {file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f1208c1c67ec9e151d78aa3435aa9b08a488b53d9cfac9b699f15255a3461ef2"},
394 |     {file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:c6aacf00d05b38a5069826e50ae72751cb5bc27bdc4d5746203988e429b385bb"},
395 |     {file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5881aaa4bf3a2d086c5f20371d3a5856199a0d8ac72dd8d0dbd7a2ecfc26ab73"},
396 |     {file = "lxml-5.3.1-cp38-cp38-win32.whl", hash = "sha256:45fbb70ccbc8683f2fb58bea89498a7274af1d9ec7995e9f4af5604e028233fc"},
397 |     {file = "lxml-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:7512b4d0fc5339d5abbb14d1843f70499cab90d0b864f790e73f780f041615d7"},
398 |     {file = "lxml-5.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5885bc586f1edb48e5d68e7a4b4757b5feb2a496b64f462b4d65950f5af3364f"},
399 |     {file = "lxml-5.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1b92fe86e04f680b848fff594a908edfa72b31bfc3499ef7433790c11d4c8cd8"},
400 |     {file = "lxml-5.3.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a091026c3bf7519ab1e64655a3f52a59ad4a4e019a6f830c24d6430695b1cf6a"},
401 |     {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ffb141361108e864ab5f1813f66e4e1164181227f9b1f105b042729b6c15125"},
402 |     {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3715cdf0dd31b836433af9ee9197af10e3df41d273c19bb249230043667a5dfd"},
403 |     {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88b72eb7222d918c967202024812c2bfb4048deeb69ca328363fb8e15254c549"},
404 |     {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa59974880ab5ad8ef3afaa26f9bda148c5f39e06b11a8ada4660ecc9fb2feb3"},
405 |     {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3bb8149840daf2c3f97cebf00e4ed4a65a0baff888bf2605a8d0135ff5cf764e"},
406 |     {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:0d6b2fa86becfa81f0a0271ccb9eb127ad45fb597733a77b92e8a35e53414914"},
407 |     {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:136bf638d92848a939fd8f0e06fcf92d9f2e4b57969d94faae27c55f3d85c05b"},
408 |     {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:89934f9f791566e54c1d92cdc8f8fd0009447a5ecdb1ec6b810d5f8c4955f6be"},
409 |     {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a8ade0363f776f87f982572c2860cc43c65ace208db49c76df0a21dde4ddd16e"},
410 |     {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:bfbbab9316330cf81656fed435311386610f78b6c93cc5db4bebbce8dd146675"},
411 |     {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:172d65f7c72a35a6879217bcdb4bb11bc88d55fb4879e7569f55616062d387c2"},
412 |     {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e3c623923967f3e5961d272718655946e5322b8d058e094764180cdee7bab1af"},
413 |     {file = "lxml-5.3.1-cp39-cp39-win32.whl", hash = "sha256:ce0930a963ff593e8bb6fda49a503911accc67dee7e5445eec972668e672a0f0"},
414 |     {file = "lxml-5.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:f7b64fcd670bca8800bc10ced36620c6bbb321e7bc1214b9c0c0df269c1dddc2"},
415 |     {file = "lxml-5.3.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:afa578b6524ff85fb365f454cf61683771d0170470c48ad9d170c48075f86725"},
416 |     {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67f5e80adf0aafc7b5454f2c1cb0cde920c9b1f2cbd0485f07cc1d0497c35c5d"},
417 |     {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dd0b80ac2d8f13ffc906123a6f20b459cb50a99222d0da492360512f3e50f84"},
418 |     {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:422c179022ecdedbe58b0e242607198580804253da220e9454ffe848daa1cfd2"},
419 |     {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:524ccfded8989a6595dbdda80d779fb977dbc9a7bc458864fc9a0c2fc15dc877"},
420 |     {file = "lxml-5.3.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:48fd46bf7155def2e15287c6f2b133a2f78e2d22cdf55647269977b873c65499"},
421 |     {file = "lxml-5.3.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:05123fad495a429f123307ac6d8fd6f977b71e9a0b6d9aeeb8f80c017cb17131"},
422 |     {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a243132767150a44e6a93cd1dde41010036e1cbc63cc3e9fe1712b277d926ce3"},
423 |     {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c92ea6d9dd84a750b2bae72ff5e8cf5fdd13e58dda79c33e057862c29a8d5b50"},
424 |     {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2f1be45d4c15f237209bbf123a0e05b5d630c8717c42f59f31ea9eae2ad89394"},
425 |     {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:a83d3adea1e0ee36dac34627f78ddd7f093bb9cfc0a8e97f1572a949b695cb98"},
426 |     {file = "lxml-5.3.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:3edbb9c9130bac05d8c3fe150c51c337a471cc7fdb6d2a0a7d3a88e88a829314"},
427 |     {file = "lxml-5.3.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2f23cf50eccb3255b6e913188291af0150d89dab44137a69e14e4dcb7be981f1"},
428 |     {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df7e5edac4778127f2bf452e0721a58a1cfa4d1d9eac63bdd650535eb8543615"},
429 |     {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:094b28ed8a8a072b9e9e2113a81fda668d2053f2ca9f2d202c2c8c7c2d6516b1"},
430 |     {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:514fe78fc4b87e7a7601c92492210b20a1b0c6ab20e71e81307d9c2e377c64de"},
431 |     {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8fffc08de02071c37865a155e5ea5fce0282e1546fd5bde7f6149fcaa32558ac"},
432 |     {file = "lxml-5.3.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4b0d5cdba1b655d5b18042ac9c9ff50bda33568eb80feaaca4fc237b9c4fbfde"},
433 |     {file = "lxml-5.3.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3031e4c16b59424e8d78522c69b062d301d951dc55ad8685736c3335a97fc270"},
434 |     {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb659702a45136c743bc130760c6f137870d4df3a9e14386478b8a0511abcfca"},
435 |     {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a11b16a33656ffc43c92a5343a28dc71eefe460bcc2a4923a96f292692709f6"},
436 |     {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c5ae125276f254b01daa73e2c103363d3e99e3e10505686ac7d9d2442dd4627a"},
437 |     {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c76722b5ed4a31ba103e0dc77ab869222ec36efe1a614e42e9bcea88a36186fe"},
438 |     {file = "lxml-5.3.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:33e06717c00c788ab4e79bc4726ecc50c54b9bfb55355eae21473c145d83c2d2"},
439 |     {file = "lxml-5.3.1.tar.gz", hash = "sha256:106b7b5d2977b339f1e97efe2778e2ab20e99994cbb0ec5e55771ed0795920c8"},
440 | ]
441 | 
442 | [package.extras]
443 | cssselect = ["cssselect (>=0.7)"]
444 | html-clean = ["lxml_html_clean"]
445 | html5 = ["html5lib"]
446 | htmlsoup = ["BeautifulSoup4"]
447 | source = ["Cython (>=3.0.11,<3.1.0)"]
448 | 
449 | [[package]]
450 | name = "mccabe"
451 | version = "0.7.0"
452 | description = "McCabe checker, plugin for flake8"
453 | optional = false
454 | python-versions = ">=3.6"
455 | groups = ["dev"]
456 | files = [
457 |     {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
458 |     {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
459 | ]
460 | 
461 | [[package]]
462 | name = "mintotp"
463 | version = "0.3.0"
464 | description = "MinTOTP - Minimal TOTP Generator"
465 | optional = false
466 | python-versions = "*"
467 | groups = ["main"]
468 | files = [
469 |     {file = "mintotp-0.3.0-py3-none-any.whl", hash = "sha256:eadee8531d9ee95eda92fd17949137454acd1d2a001dcf68f99bb8de56f06468"},
470 |     {file = "mintotp-0.3.0.tar.gz", hash = "sha256:d0f4db5edb38a7481120176a526e8c29539b9e80581dd2dcc1811557d77cfad5"},
471 | ]
472 | 
473 | [[package]]
474 | name = "pillow"
475 | version = "11.1.0"
476 | description = "Python Imaging Library (Fork)"
477 | optional = false
478 | python-versions = ">=3.9"
479 | groups = ["main"]
480 | files = [
481 |     {file = "pillow-11.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8"},
482 |     {file = "pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192"},
483 |     {file = "pillow-11.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a07dba04c5e22824816b2615ad7a7484432d7f540e6fa86af60d2de57b0fcee2"},
484 |     {file = "pillow-11.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e267b0ed063341f3e60acd25c05200df4193e15a4a5807075cd71225a2386e26"},
485 |     {file = "pillow-11.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd165131fd51697e22421d0e467997ad31621b74bfc0b75956608cb2906dda07"},
486 |     {file = "pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:abc56501c3fd148d60659aae0af6ddc149660469082859fa7b066a298bde9482"},
487 |     {file = "pillow-11.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:54ce1c9a16a9561b6d6d8cb30089ab1e5eb66918cb47d457bd996ef34182922e"},
488 |     {file = "pillow-11.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:73ddde795ee9b06257dac5ad42fcb07f3b9b813f8c1f7f870f402f4dc54b5269"},
489 |     {file = "pillow-11.1.0-cp310-cp310-win32.whl", hash = "sha256:3a5fe20a7b66e8135d7fd617b13272626a28278d0e578c98720d9ba4b2439d49"},
490 |     {file = "pillow-11.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:b6123aa4a59d75f06e9dd3dac5bf8bc9aa383121bb3dd9a7a612e05eabc9961a"},
491 |     {file = "pillow-11.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:a76da0a31da6fcae4210aa94fd779c65c75786bc9af06289cd1c184451ef7a65"},
492 |     {file = "pillow-11.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e06695e0326d05b06833b40b7ef477e475d0b1ba3a6d27da1bb48c23209bf457"},
493 |     {file = "pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96f82000e12f23e4f29346e42702b6ed9a2f2fea34a740dd5ffffcc8c539eb35"},
494 |     {file = "pillow-11.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3cd561ded2cf2bbae44d4605837221b987c216cff94f49dfeed63488bb228d2"},
495 |     {file = "pillow-11.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f189805c8be5ca5add39e6f899e6ce2ed824e65fb45f3c28cb2841911da19070"},
496 |     {file = "pillow-11.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dd0052e9db3474df30433f83a71b9b23bd9e4ef1de13d92df21a52c0303b8ab6"},
497 |     {file = "pillow-11.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:837060a8599b8f5d402e97197d4924f05a2e0d68756998345c829c33186217b1"},
498 |     {file = "pillow-11.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa8dd43daa836b9a8128dbe7d923423e5ad86f50a7a14dc688194b7be5c0dea2"},
499 |     {file = "pillow-11.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0a2f91f8a8b367e7a57c6e91cd25af510168091fb89ec5146003e424e1558a96"},
500 |     {file = "pillow-11.1.0-cp311-cp311-win32.whl", hash = "sha256:c12fc111ef090845de2bb15009372175d76ac99969bdf31e2ce9b42e4b8cd88f"},
501 |     {file = "pillow-11.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbd43429d0d7ed6533b25fc993861b8fd512c42d04514a0dd6337fb3ccf22761"},
502 |     {file = "pillow-11.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:f7955ecf5609dee9442cbface754f2c6e541d9e6eda87fad7f7a989b0bdb9d71"},
503 |     {file = "pillow-11.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2062ffb1d36544d42fcaa277b069c88b01bb7298f4efa06731a7fd6cc290b81a"},
504 |     {file = "pillow-11.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a85b653980faad27e88b141348707ceeef8a1186f75ecc600c395dcac19f385b"},
505 |     {file = "pillow-11.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9409c080586d1f683df3f184f20e36fb647f2e0bc3988094d4fd8c9f4eb1b3b3"},
506 |     {file = "pillow-11.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fdadc077553621911f27ce206ffcbec7d3f8d7b50e0da39f10997e8e2bb7f6a"},
507 |     {file = "pillow-11.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:93a18841d09bcdd774dcdc308e4537e1f867b3dec059c131fde0327899734aa1"},
508 |     {file = "pillow-11.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9aa9aeddeed452b2f616ff5507459e7bab436916ccb10961c4a382cd3e03f47f"},
509 |     {file = "pillow-11.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3cdcdb0b896e981678eee140d882b70092dac83ac1cdf6b3a60e2216a73f2b91"},
510 |     {file = "pillow-11.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36ba10b9cb413e7c7dfa3e189aba252deee0602c86c309799da5a74009ac7a1c"},
511 |     {file = "pillow-11.1.0-cp312-cp312-win32.whl", hash = "sha256:cfd5cd998c2e36a862d0e27b2df63237e67273f2fc78f47445b14e73a810e7e6"},
512 |     {file = "pillow-11.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a697cd8ba0383bba3d2d3ada02b34ed268cb548b369943cd349007730c92bddf"},
513 |     {file = "pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5"},
514 |     {file = "pillow-11.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae98e14432d458fc3de11a77ccb3ae65ddce70f730e7c76140653048c71bfcbc"},
515 |     {file = "pillow-11.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cc1331b6d5a6e144aeb5e626f4375f5b7ae9934ba620c0ac6b3e43d5e683a0f0"},
516 |     {file = "pillow-11.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:758e9d4ef15d3560214cddbc97b8ef3ef86ce04d62ddac17ad39ba87e89bd3b1"},
517 |     {file = "pillow-11.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b523466b1a31d0dcef7c5be1f20b942919b62fd6e9a9be199d035509cbefc0ec"},
518 |     {file = "pillow-11.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:9044b5e4f7083f209c4e35aa5dd54b1dd5b112b108648f5c902ad586d4f945c5"},
519 |     {file = "pillow-11.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:3764d53e09cdedd91bee65c2527815d315c6b90d7b8b79759cc48d7bf5d4f114"},
520 |     {file = "pillow-11.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:31eba6bbdd27dde97b0174ddf0297d7a9c3a507a8a1480e1e60ef914fe23d352"},
521 |     {file = "pillow-11.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b5d658fbd9f0d6eea113aea286b21d3cd4d3fd978157cbf2447a6035916506d3"},
522 |     {file = "pillow-11.1.0-cp313-cp313-win32.whl", hash = "sha256:f86d3a7a9af5d826744fabf4afd15b9dfef44fe69a98541f666f66fbb8d3fef9"},
523 |     {file = "pillow-11.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:593c5fd6be85da83656b93ffcccc2312d2d149d251e98588b14fbc288fd8909c"},
524 |     {file = "pillow-11.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:11633d58b6ee5733bde153a8dafd25e505ea3d32e261accd388827ee987baf65"},
525 |     {file = "pillow-11.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:70ca5ef3b3b1c4a0812b5c63c57c23b63e53bc38e758b37a951e5bc466449861"},
526 |     {file = "pillow-11.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8000376f139d4d38d6851eb149b321a52bb8893a88dae8ee7d95840431977081"},
527 |     {file = "pillow-11.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ee85f0696a17dd28fbcfceb59f9510aa71934b483d1f5601d1030c3c8304f3c"},
528 |     {file = "pillow-11.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:dd0e081319328928531df7a0e63621caf67652c8464303fd102141b785ef9547"},
529 |     {file = "pillow-11.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e63e4e5081de46517099dc30abe418122f54531a6ae2ebc8680bcd7096860eab"},
530 |     {file = "pillow-11.1.0-cp313-cp313t-win32.whl", hash = "sha256:dda60aa465b861324e65a78c9f5cf0f4bc713e4309f83bc387be158b077963d9"},
531 |     {file = "pillow-11.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ad5db5781c774ab9a9b2c4302bbf0c1014960a0a7be63278d13ae6fdf88126fe"},
532 |     {file = "pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756"},
533 |     {file = "pillow-11.1.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:bf902d7413c82a1bfa08b06a070876132a5ae6b2388e2712aab3a7cbc02205c6"},
534 |     {file = "pillow-11.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c1eec9d950b6fe688edee07138993e54ee4ae634c51443cfb7c1e7613322718e"},
535 |     {file = "pillow-11.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e275ee4cb11c262bd108ab2081f750db2a1c0b8c12c1897f27b160c8bd57bbc"},
536 |     {file = "pillow-11.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4db853948ce4e718f2fc775b75c37ba2efb6aaea41a1a5fc57f0af59eee774b2"},
537 |     {file = "pillow-11.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:ab8a209b8485d3db694fa97a896d96dd6533d63c22829043fd9de627060beade"},
538 |     {file = "pillow-11.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:54251ef02a2309b5eec99d151ebf5c9904b77976c8abdcbce7891ed22df53884"},
539 |     {file = "pillow-11.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5bb94705aea800051a743aa4874bb1397d4695fb0583ba5e425ee0328757f196"},
540 |     {file = "pillow-11.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:89dbdb3e6e9594d512780a5a1c42801879628b38e3efc7038094430844e271d8"},
541 |     {file = "pillow-11.1.0-cp39-cp39-win32.whl", hash = "sha256:e5449ca63da169a2e6068dd0e2fcc8d91f9558aba89ff6d02121ca8ab11e79e5"},
542 |     {file = "pillow-11.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:3362c6ca227e65c54bf71a5f88b3d4565ff1bcbc63ae72c34b07bbb1cc59a43f"},
543 |     {file = "pillow-11.1.0-cp39-cp39-win_arm64.whl", hash = "sha256:b20be51b37a75cc54c2c55def3fa2c65bb94ba859dde241cd0a4fd302de5ae0a"},
544 |     {file = "pillow-11.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8c730dc3a83e5ac137fbc92dfcfe1511ce3b2b5d7578315b63dbbb76f7f51d90"},
545 |     {file = "pillow-11.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d33d2fae0e8b170b6a6c57400e077412240f6f5bb2a342cf1ee512a787942bb"},
546 |     {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8d65b38173085f24bc07f8b6c505cbb7418009fa1a1fcb111b1f4961814a442"},
547 |     {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83"},
548 |     {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d44ff19eea13ae4acdaaab0179fa68c0c6f2f45d66a4d8ec1eda7d6cecbcc15f"},
549 |     {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d3d8da4a631471dfaf94c10c85f5277b1f8e42ac42bade1ac67da4b4a7359b73"},
550 |     {file = "pillow-11.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0"},
551 |     {file = "pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20"},
552 | ]
553 | 
554 | [package.extras]
555 | docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"]
556 | fpx = ["olefile"]
557 | mic = ["olefile"]
558 | tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"]
559 | typing = ["typing-extensions ; python_version < \"3.10\""]
560 | xmp = ["defusedxml"]
561 | 
562 | [[package]]
563 | name = "platformdirs"
564 | version = "4.3.6"
565 | description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
566 | optional = false
567 | python-versions = ">=3.8"
568 | groups = ["main"]
569 | files = [
570 |     {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
571 |     {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
572 | ]
573 | 
574 | [package.extras]
575 | docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
576 | test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
577 | type = ["mypy (>=1.11.2)"]
578 | 
579 | [[package]]
580 | name = "pycodestyle"
581 | version = "2.11.1"
582 | description = "Python style guide checker"
583 | optional = false
584 | python-versions = ">=3.8"
585 | groups = ["dev"]
586 | files = [
587 |     {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"},
588 |     {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"},
589 | ]
590 | 
591 | [[package]]
592 | name = "pyflakes"
593 | version = "3.1.0"
594 | description = "passive checker of Python programs"
595 | optional = false
596 | python-versions = ">=3.8"
597 | groups = ["dev"]
598 | files = [
599 |     {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"},
600 |     {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"},
601 | ]
602 | 
603 | [[package]]
604 | name = "requests"
605 | version = "2.32.3"
606 | description = "Python HTTP for Humans."
607 | optional = false
608 | python-versions = ">=3.8"
609 | groups = ["main"]
610 | files = [
611 |     {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
612 |     {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
613 | ]
614 | 
615 | [package.dependencies]
616 | certifi = ">=2017.4.17"
617 | charset-normalizer = ">=2,<4"
618 | idna = ">=2.5,<4"
619 | urllib3 = ">=1.21.1,<3"
620 | 
621 | [package.extras]
622 | socks = ["PySocks (>=1.5.6,!=1.5.7)"]
623 | use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
624 | 
625 | [[package]]
626 | name = "requests-cache"
627 | version = "1.2.1"
628 | description = "A persistent cache for python requests"
629 | optional = false
630 | python-versions = ">=3.8"
631 | groups = ["main"]
632 | files = [
633 |     {file = "requests_cache-1.2.1-py3-none-any.whl", hash = "sha256:1285151cddf5331067baa82598afe2d47c7495a1334bfe7a7d329b43e9fd3603"},
634 |     {file = "requests_cache-1.2.1.tar.gz", hash = "sha256:68abc986fdc5b8d0911318fbb5f7c80eebcd4d01bfacc6685ecf8876052511d1"},
635 | ]
636 | 
637 | [package.dependencies]
638 | attrs = ">=21.2"
639 | cattrs = ">=22.2"
640 | platformdirs = ">=2.5"
641 | requests = ">=2.22"
642 | url-normalize = ">=1.4"
643 | urllib3 = ">=1.25.5"
644 | 
645 | [package.extras]
646 | all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "pymongo (>=3)", "pyyaml (>=6.0.1)", "redis (>=3)", "ujson (>=5.4)"]
647 | bson = ["bson (>=0.5)"]
648 | docs = ["furo (>=2023.3,<2024.0)", "linkify-it-py (>=2.0,<3.0)", "myst-parser (>=1.0,<2.0)", "sphinx (>=5.0.2,<6.0.0)", "sphinx-autodoc-typehints (>=1.19)", "sphinx-automodapi (>=0.14)", "sphinx-copybutton (>=0.5)", "sphinx-design (>=0.2)", "sphinx-notfound-page (>=0.8)", "sphinxcontrib-apidoc (>=0.3)", "sphinxext-opengraph (>=0.9)"]
649 | dynamodb = ["boto3 (>=1.15)", "botocore (>=1.18)"]
650 | json = ["ujson (>=5.4)"]
651 | mongodb = ["pymongo (>=3)"]
652 | redis = ["redis (>=3)"]
653 | security = ["itsdangerous (>=2.0)"]
654 | yaml = ["pyyaml (>=6.0.1)"]
655 | 
656 | [[package]]
657 | name = "six"
658 | version = "1.16.0"
659 | description = "Python 2 and 3 compatibility utilities"
660 | optional = false
661 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
662 | groups = ["main"]
663 | files = [
664 |     {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
665 |     {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
666 | ]
667 | 
668 | [[package]]
669 | name = "soupsieve"
670 | version = "2.6"
671 | description = "A modern CSS selector implementation for Beautiful Soup."
672 | optional = false
673 | python-versions = ">=3.8"
674 | groups = ["main"]
675 | files = [
676 |     {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
677 |     {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
678 | ]
679 | 
680 | [[package]]
681 | name = "typing-extensions"
682 | version = "4.12.2"
683 | description = "Backported and Experimental Type Hints for Python 3.8+"
684 | optional = false
685 | python-versions = ">=3.8"
686 | groups = ["main"]
687 | files = [
688 |     {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
689 |     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
690 | ]
691 | 
692 | [[package]]
693 | name = "url-normalize"
694 | version = "1.4.3"
695 | description = "URL normalization for Python"
696 | optional = false
697 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
698 | groups = ["main"]
699 | files = [
700 |     {file = "url-normalize-1.4.3.tar.gz", hash = "sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2"},
701 |     {file = "url_normalize-1.4.3-py2.py3-none-any.whl", hash = "sha256:ec3c301f04e5bb676d333a7fa162fa977ad2ca04b7e652bfc9fac4e405728eed"},
702 | ]
703 | 
704 | [package.dependencies]
705 | six = "*"
706 | 
707 | [[package]]
708 | name = "urllib3"
709 | version = "2.2.3"
710 | description = "HTTP library with thread-safe connection pooling, file post, and more."
711 | optional = false
712 | python-versions = ">=3.8"
713 | groups = ["main"]
714 | files = [
715 |     {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"},
716 |     {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"},
717 | ]
718 | 
719 | [package.extras]
720 | brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""]
721 | h2 = ["h2 (>=4,<5)"]
722 | socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
723 | zstd = ["zstandard (>=0.18.0)"]
724 | 
725 | [metadata]
726 | lock-version = "2.1"
727 | python-versions = "^3.9"
728 | content-hash = "92cfb836603d3fa5af84e8b5de458c70cfa66ef8878a7125424609fa22921343"
729 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "leech"
 3 | version = "1.0.0"
 4 | description = "Turn a story on certain websites into an ebook for convenient reading"
 5 | authors = ["David Lynch <kemayo@gmail.com>"]
 6 | license = "MIT License"
 7 | include = ["ebook/*", "sites/*"]
 8 | 
 9 | [tool.poetry.scripts]
10 | leech = "leech:cli"
11 | 
12 | [tool.poetry.dependencies]
13 | python = "^3.9"
14 | attrs = "^25.1.0"
15 | beautifulsoup4 = "^4.13.3"
16 | click-default-group = "^1.2.4"
17 | click = "^8.1.8"
18 | requests = "^2.32.3"
19 | requests-cache = "^1.2.1"
20 | Pillow = "^11.1.0"
21 | mintotp = "^0.3.0"
22 | lxml = "^5.3.1"
23 | 
24 | [tool.poetry.group.dev.dependencies]
25 | flake8 = "^6.1.0"
26 | 
27 | [build-system]
28 | requires = ["poetry-core>=1.0.0"]
29 | build-backend = "poetry.core.masonry.api"


--------------------------------------------------------------------------------
/sites/__init__.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import click
  3 | import glob
  4 | import os
  5 | import random
  6 | import uuid
  7 | import datetime
  8 | import time
  9 | import logging
 10 | import urllib
 11 | import re
 12 | import hashlib
 13 | from attrs import define, field, Factory
 14 | from bs4 import BeautifulSoup
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | logger.addHandler(logging.NullHandler())
 18 | _sites = []
 19 | 
 20 | 
 21 | def _default_uuid_string(self):
 22 |     rd = random.Random(x=self.url)
 23 |     return str(uuid.UUID(int=rd.getrandbits(8*16), version=4))
 24 | 
 25 | 
 26 | @define
 27 | class Image:
 28 |     url: str
 29 | 
 30 |     def path(self):
 31 |         return f"images/{hashlib.sha1(self.url.encode()).hexdigest()}.{self.ext()}"
 32 | 
 33 |     def ext(self):
 34 |         if self.url.startswith("data:image") and 'base64' in self.url:
 35 |             head, base64data = self.url.split(',')
 36 |             return str(head.split(';')[0].split('/')[1])
 37 |         path = urllib.parse.urlparse(self.url).path
 38 |         return os.path.splitext(path)[1]
 39 | 
 40 | 
 41 | @define
 42 | class Chapter:
 43 |     title: str
 44 |     contents: str
 45 |     date: datetime.datetime = False
 46 |     images: dict = Factory(dict)
 47 | 
 48 | 
 49 | @define
 50 | class Section:
 51 |     title: str
 52 |     author: str
 53 |     url: str
 54 |     cover_url: str = ''
 55 |     id: str = Factory(_default_uuid_string, takes_self=True)
 56 |     contents: list = Factory(list)
 57 |     footnotes: list = Factory(list)
 58 |     tags: list = Factory(list)
 59 |     summary: str = ''
 60 | 
 61 |     def __iter__(self):
 62 |         return self.contents.__iter__()
 63 | 
 64 |     def __getitem__(self, index):
 65 |         return self.contents.__getitem__(index)
 66 | 
 67 |     def __setitem__(self, index, value):
 68 |         return self.contents.__setitem__(index, value)
 69 | 
 70 |     def __len__(self):
 71 |         return len(self.contents)
 72 | 
 73 |     def everychapter(self):
 74 |         for chapter in self.contents:
 75 |             if hasattr(chapter, '__iter__'):
 76 |                 yield from chapter
 77 |             else:
 78 |                 yield chapter
 79 | 
 80 |     def add(self, value, index=None):
 81 |         if index is not None:
 82 |             self.contents.insert(index, value)
 83 |         else:
 84 |             self.contents.append(value)
 85 | 
 86 |     def dates(self):
 87 |         for chapter in self.everychapter():
 88 |             yield chapter.date
 89 | 
 90 | 
 91 | @define
 92 | class Site:
 93 |     """A Site handles checking whether a URL might represent a site, and then
 94 |     extracting the content of a story from said site.
 95 |     """
 96 |     session: object = field()
 97 |     footnotes: list = field(factory=list, init=False)
 98 |     options: dict = Factory(
 99 |         lambda site: site.get_default_options(),
100 |         takes_self=True
101 |     )
102 | 
103 |     @classmethod
104 |     def site_key(cls):
105 |         if hasattr(cls, '_key'):
106 |             return cls._key
107 |         return cls.__name__
108 | 
109 |     @staticmethod
110 |     def get_site_specific_option_defs():
111 |         """Returns a list of click.option objects to add to CLI commands.
112 | 
113 |         It is best practice to ensure that these names are reasonably unique
114 |         to ensure that they do not conflict with the core options, or other
115 |         sites' options. It is OK for different site's options to have the
116 |         same name, but pains should be taken to ensure they remain semantically
117 |         similar in meaning.
118 |         """
119 |         return [
120 |             SiteSpecificOption(
121 |                 'strip_colors',
122 |                 '--strip-colors/--no-strip-colors',
123 |                 default=True,
124 |                 help="If true, colors will be stripped from the text."
125 |             ),
126 |             SiteSpecificOption(
127 |                 'image_fetch',
128 |                 '--fetch-images/--no-fetch-images',
129 |                 default=True,
130 |                 help="If true, images embedded in the story will be downloaded"
131 |             ),
132 |             SiteSpecificOption(
133 |                 'spoilers',
134 |                 '--spoilers',
135 |                 choices=('include', 'inline', 'skip'),
136 |                 default='include',
137 |                 help="Whether to include spoilers"
138 |             ),
139 |             SiteSpecificOption(
140 |                 'deprecated_skip_spoilers',
141 |                 '--skip-spoilers/--include-spoilers',
142 |                 help="If true, do not transcribe any tags that are marked as a spoiler. (DEPRECATED)",
143 |                 exposed=False,
144 |                 click_kwargs={
145 |                     "callback": lambda ctx, param, value: ctx.params.update({"spoilers": value and "skip" or "include"}),
146 |                 },
147 |             ),
148 |             SiteSpecificOption(
149 |                 'parser',
150 |                 '--parser',
151 |                 help="Which HTML parser to use",
152 |                 choices=('lxml', 'html5lib', 'html.parser', 'lxml-xml'),
153 |                 default='lxml',
154 |             ),
155 |         ]
156 | 
157 |     @classmethod
158 |     def get_default_options(cls):
159 |         options = {}
160 |         for option in cls.get_site_specific_option_defs():
161 |             if option.exposed:
162 |                 options[option.name] = option.default
163 |         return options
164 | 
165 |     @classmethod
166 |     def interpret_site_specific_options(cls, **kwargs):
167 |         """Returns options summarizing CLI flags provided.
168 | 
169 |         Only includes entries the user has explicitly provided as flags
170 |         / will not contain default values. For that, use get_default_options().
171 |         """
172 |         options = {}
173 |         for option in cls.get_site_specific_option_defs():
174 |             option_value = kwargs.get(option.name)
175 |             if option.exposed and option_value is not None:
176 |                 options[option.name] = option_value
177 |         return options
178 | 
179 |     @staticmethod
180 |     def matches(url):
181 |         raise NotImplementedError()
182 | 
183 |     def extract(self, url):
184 |         """Download a story from a given URL
185 | 
186 |         Args:
187 |             url (string): A valid URL for this Site
188 |         Returns:
189 |             story (dict) containing keys:
190 |                 title (string)
191 |                 author (string)
192 |                 chapters (list): list of Chapters (namedtuple, defined above)
193 |         """
194 |         raise NotImplementedError()
195 | 
196 |     def login(self, login_details):
197 |         raise NotImplementedError()
198 | 
199 |     def _soup(self, url, method=False, delay=0, retry=3, retry_delay=10, **kw):
200 |         if not method:
201 |             method = self.options.get('parser', 'lxml')
202 |         if url.startswith('http://') or url.startswith('https://'):
203 |             page = self.session.get(url, **kw)
204 |             if not page:
205 |                 if page.status_code == 403 and page.headers.get('Server', False) == 'cloudflare' and "captcha-bypass" in page.text:
206 |                     raise CloudflareException("Couldn't fetch, probably because of Cloudflare protection", url)
207 |                 if retry and retry > 0:
208 |                     real_delay = retry_delay
209 |                     if 'Retry-After' in page.headers:
210 |                         real_delay = int(page.headers['Retry-After'])
211 |                     logger.warning("Load failed: waiting %s to retry (%s: %s)", real_delay, page.status_code, page.url)
212 |                     time.sleep(real_delay)
213 |                     return self._soup(url, method=method, retry=retry - 1, retry_delay=retry_delay, **kw)
214 |                 raise SiteException("Couldn't fetch", url)
215 |             if delay and delay > 0 and not page.from_cache:
216 |                 time.sleep(delay)
217 |             text = page.text
218 |             fallback_base = url
219 |         else:
220 |             text = url
221 |             fallback_base = ''
222 |         soup = BeautifulSoup(text, method)
223 |         return soup, (soup.head and soup.head.base) and soup.head.base.get('href') or fallback_base
224 | 
225 |     def _form_in_soup(self, soup):
226 |         if soup.name == 'form':
227 |             return soup
228 |         return soup.find('form')
229 | 
230 |     def _form_data(self, soup):
231 |         data = {}
232 |         form = self._form_in_soup(soup)
233 |         if not form:
234 |             return data, '', ''
235 |         for tag in form.find_all('input'):
236 |             itype = tag.attrs.get('type', 'text')
237 |             name = tag.attrs.get('name')
238 |             if not name:
239 |                 continue
240 |             value = tag.attrs.get('value', '')
241 |             if itype in ('checkbox', 'radio') and not tag.attrs.get('checked', False):
242 |                 continue
243 |             data[name] = value
244 |         for select in form.find_all('select'):
245 |             # todo: multiple
246 |             name = select.attrs.get('name')
247 |             if not name:
248 |                 continue
249 |             data[name] = ''
250 |             for option in select.find_all('option'):
251 |                 value = option.attrs.get('value', '')
252 |                 if value and option.attrs.get('selected'):
253 |                     data[name] = value
254 |         for textarea in form.find_all('textarea'):
255 |             name = textarea.attrs.get('name')
256 |             if not name:
257 |                 continue
258 |             data[name] = textarea.attrs.get('value', '')
259 | 
260 |         return data, form.attrs.get('action'), form.attrs.get('method', 'get').lower()
261 | 
262 |     def _new_tag(self, *args, **kw):
263 |         soup = BeautifulSoup("", self.options.get('parser'))
264 |         return soup.new_tag(*args, **kw)
265 | 
266 |     def _join_url(self, *args, **kwargs):
267 |         return urllib.parse.urljoin(*args, **kwargs)
268 | 
269 |     def _footnote(self, contents, chapterid):
270 |         """Register a footnote and return a link to that footnote"""
271 | 
272 |         # TODO: This embeds knowledge of what the generated filenames will be. Work out a better way.
273 | 
274 |         idx = len(self.footnotes) + 1
275 | 
276 |         # epub spec footnotes are all about epub:type on the footnote and the link
277 |         # http://www.idpf.org/accessibility/guidelines/content/semantics/epub-type.php
278 |         contents.name = 'div'
279 |         contents.attrs['id'] = f'footnote{idx}'
280 |         contents.attrs['epub:type'] = 'rearnote'
281 | 
282 |         # a backlink is essential for Kindle to think of this as a footnote
283 |         # otherwise it doesn't get the inline-popup treatment
284 |         # http://kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.pdf
285 |         # section 3.9.10
286 |         backlink = self._new_tag('a', href=f'chapter{chapterid}.html#noteback{idx}')
287 |         backlink.string = '^'
288 |         contents.insert(0, backlink)
289 | 
290 |         self.footnotes.append(contents.prettify())
291 | 
292 |         # now build the link to the footnote to return, with appropriate
293 |         # epub annotations.
294 |         spoiler_link = self._new_tag('a')
295 |         spoiler_link.attrs = {
296 |             'id': f'noteback{idx}',
297 |             'href': f'footnotes.html#footnote{idx}',
298 |             'epub:type': 'noteref',
299 |         }
300 |         spoiler_link.string = str(idx)
301 | 
302 |         return spoiler_link
303 | 
304 |     def _clean(self, contents, base=False):
305 |         """Clean up story content to be more ebook-friendly
306 | 
307 |         TODO: this expects a soup as its argument, so the couple of API-driven sites can't use it as-is
308 |         """
309 |         # Cloudflare is used on many sites, and mangles things that look like email addresses
310 |         # e.g. Point_Me_@_The_Sky becomes
311 |         # <a href="/cdn-cgi/l/email-protection" class="__cf_email__" data-cfemail="85d5eaecebf1dac8e0dac5">[email&#160;protected]</a>_The_Sky
312 |         # or
313 |         # <a href="/cdn-cgi/l/email-protection#85d5eaecebf1dac8e0dac5"><span class="__cf_email__" data-cfemail="85d5eaecebf1dac8e0dac5">[email&#160;protected]</span></a>_The_Sky
314 |         for tag in contents.find_all(class_='__cf_email__'):
315 |             # See: https://usamaejaz.com/cloudflare-email-decoding/
316 |             enc = bytes.fromhex(tag['data-cfemail'])
317 |             email = bytes([c ^ enc[0] for c in enc[1:]]).decode('utf8')
318 |             if tag.parent.name == 'a' and tag.parent['href'].startswith('/cdn-cgi/l/email-protection'):
319 |                 tag = tag.parent
320 |             tag.insert_before(email)
321 |             tag.decompose()
322 |         # strip colors
323 |         if self.options['strip_colors']:
324 |             for tag in contents.find_all(style=re.compile(r'(?:color|background)\s*:')):
325 |                 tag['style'] = re.sub(r'(?:color|background)\s*:[^;]+;?', '', tag['style'])
326 | 
327 |         if base:
328 |             for img in contents.find_all('img', src=True):
329 |                 # Later epub processing needs absolute image URLs
330 |                 # print("fixing img src", img['src'], self._join_url(base, img['src']))
331 |                 img['src'] = self._join_url(base, img['src'])
332 |                 del img['srcset']
333 |                 del img['sizes']
334 | 
335 |         return contents
336 | 
337 |     def _finalize(self, story):
338 |         # Call this on a story after it's fully extracted to clean up things
339 |         for chapter in story:
340 |             if hasattr(chapter, '__iter__'):
341 |                 self._finalize(chapter, story)
342 |             else:
343 |                 self._process_images(chapter)
344 | 
345 |         if self.footnotes:
346 |             story.footnotes = Chapter('Footnotes', '\n\n'.join(self.footnotes))
347 |             self.footnotes = []
348 |             self._process_images(story.footnotes)
349 | 
350 |     def _process_images(self, chapter):
351 |         soup, base = self._soup(chapter.contents)
352 | 
353 |         if self.options.get('image_fetch'):
354 |             for count, img in enumerate(soup.find_all('img', src=True)):
355 |                 # logger.info(f"Image in {chapter.title}: {img['src']}")
356 |                 if img['src'] not in chapter.images:
357 |                     chapter.images[img['src']] = Image(img['src'])
358 | 
359 |                 img['src'] = chapter.images.get(img['src']).path()
360 |         else:
361 |             # Remove all images from the chapter so you don't get that annoying grey background.
362 |             for img in soup.find_all('img'):
363 |                 # Note: alt="" will be completely removed here, which is consitent with the semantics
364 |                 if img.parent.name.lower() == "figure":
365 |                     # TODO: figcaption?
366 |                     img.parent.replace_with(img.get('alt', '🖼'))
367 |                 else:
368 |                     img.replace_with(img.get('alt', '🖼'))
369 | 
370 |         chapter.contents = str(soup)
371 | 
372 | 
373 | @define
374 | class SiteSpecificOption:
375 |     """Represents a site-specific option that can be configured.
376 | 
377 |     Will be added to the CLI as a click.option -- many of these
378 |     fields correspond to click.option arguments."""
379 |     name: str
380 |     flag_pattern: str
381 |     type: object = None
382 |     default: bool = False
383 |     help: str = None
384 |     choices: tuple = None
385 |     exposed: bool = True
386 |     click_kwargs: frozenset = field(converter=lambda kwargs: frozenset(kwargs.items()), default={})
387 | 
388 |     def __eq__(self, other):
389 |         return self.name == other.name
390 | 
391 |     def __hash__(self):
392 |         return hash(self.name)
393 | 
394 |     def as_click_option(self):
395 |         return click.option(
396 |             str(self.name),
397 |             str(self.flag_pattern),
398 |             type=self.choices and click.Choice(self.choices) or self.type,
399 |             # Note: This default not matching self.default is intentional.
400 |             # It ensures that we know if a flag was explicitly provided,
401 |             # which keeps it from overriding options set in leech.json etc.
402 |             # Instead, default is used in site_cls.get_default_options()
403 |             default=None,
404 |             help=self.help if self.help is not None else "",
405 |             expose_value=self.exposed,
406 |             **dict(self.click_kwargs)
407 |         )
408 | 
409 | 
410 | class SiteException(Exception):
411 |     pass
412 | 
413 | 
414 | class CloudflareException(SiteException):
415 |     pass
416 | 
417 | 
418 | def register(site_class):
419 |     _sites.append(site_class)
420 |     return site_class
421 | 
422 | 
423 | def get(url):
424 |     for site_class in _sites:
425 |         match = site_class.matches(url)
426 |         if match:
427 |             logger.info("Handler: %s (%s)", site_class, match)
428 |             return site_class, match
429 |     raise NotImplementedError("Could not find a handler for " + url)
430 | 
431 | 
432 | def list_site_specific_options():
433 |     """Returns a list of all site's click options, which will be presented to the user."""
434 | 
435 |     # Ensures that duplicate options are not added twice.
436 |     # Especially important for subclassed sites (e.g. Xenforo sites)
437 |     options = set()
438 | 
439 |     for site_class in _sites:
440 |         options.update(site_class.get_site_specific_option_defs())
441 |     return [option.as_click_option() for option in options]
442 | 
443 | 
444 | # And now, a particularly hacky take on a plugin system:
445 | # Make an __all__ out of all the python files in this directory that don't start
446 | # with __. Then import * them.
447 | 
448 | modules = glob.glob(os.path.join(os.path.dirname(__file__), "*.py"))
449 | __all__ = [os.path.basename(f)[:-3] for f in modules if not f.startswith("__")]
450 | 
451 | from . import *  # noqa
452 | 


--------------------------------------------------------------------------------
/sites/ao3.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import logging
  4 | import datetime
  5 | import re
  6 | import requests_cache
  7 | from . import register, Site, Section, Chapter, SiteException
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | 
 12 | @register
 13 | class ArchiveOfOurOwn(Site):
 14 |     """Archive of Our Own: it has its own epub export, but the formatting is awful"""
 15 |     @staticmethod
 16 |     def matches(url):
 17 |         # e.g. http://archiveofourown.org/works/5683105/chapters/13092007
 18 |         match = re.match(r'^(https?://(?:www\.)?archiveofourown\.org/works/\d+)/?.*', url)
 19 |         if match:
 20 |             return match.group(1) + '/'
 21 | 
 22 |     def login(self, login_details):
 23 |         with requests_cache.disabled():
 24 |             # Can't just pass this url to _soup because I need the cookies later
 25 |             login = self.session.get('https://archiveofourown.org/users/login')
 26 |             soup, nobase = self._soup(login.text)
 27 |             post, action, method = self._form_data(soup.find(id='new_user'))
 28 |             post['user[login]'] = login_details[0]
 29 |             post['user[password]'] = login_details[1]
 30 |             # I feel the session *should* handle this cookies bit for me. But
 31 |             # it doesn't. And I don't know why.
 32 |             result = self.session.post(
 33 |                 self._join_url(login.url, action),
 34 |                 data=post, cookies=login.cookies
 35 |             )
 36 |             if result.ok:
 37 |                 logger.info("Logged in as %s", login_details[0])
 38 |             else:
 39 |                 logger.error("Failed to log in as %s", login_details[0])
 40 | 
 41 |     def extract(self, url):
 42 |         workid = re.match(r'^https?://(?:www\.)?archiveofourown\.org/works/(\d+)/?.*', url).group(1)
 43 |         return self._extract_work(workid)
 44 | 
 45 |     def _extract_work(self, workid):
 46 |         # Fetch the full work
 47 |         url = f'http://archiveofourown.org/works/{workid}?view_adult=true&view_full_work=true'
 48 |         logger.info("Extracting full work @ %s", url)
 49 |         soup, base = self._soup(url)
 50 | 
 51 |         if not soup.find(id='workskin'):
 52 |             raise SiteException("Can't find the story text; you may need to log in or flush the cache")
 53 | 
 54 |         story = Section(
 55 |             title=soup.select('#workskin > .preface .title')[0].text.strip(),
 56 |             author=soup.select('#workskin .preface .byline a')[0].text.strip(),
 57 |             summary=soup.select('#workskin .preface .summary blockquote')[0].prettify(),
 58 |             url=f'http://archiveofourown.org/works/{workid}',
 59 |             tags=[tag.get_text().strip() for tag in soup.select('.work.meta .tags a.tag')]
 60 |         )
 61 | 
 62 |         # Fetch the chapter list as well because it contains info that's not in the full work
 63 |         nav_soup, nav_base = self._soup(f'https://archiveofourown.org/works/{workid}/navigate')
 64 |         chapters = soup.select('#chapters > div')
 65 |         if len(chapters) == 1:
 66 |             # in a single-chapter story the #chapters div is actually the chapter
 67 |             chapters = [soup.find(id='chapters').parent]
 68 | 
 69 |         for index, chapter in enumerate(nav_soup.select('#main ol[role="navigation"] li')):
 70 |             link = chapter.find('a')
 71 |             logger.info("Extracting chapter %s", link.string)
 72 | 
 73 |             updated = datetime.datetime.strptime(
 74 |                 chapter.find('span', class_='datetime').string,
 75 |                 "(%Y-%m-%d)"
 76 |             )
 77 | 
 78 |             chapter_soup = chapters[index]
 79 |             if not chapter_soup:
 80 |                 logger.warning("Couldn't find chapter %s in full work", index + 1)
 81 |                 continue
 82 | 
 83 |             story.add(Chapter(
 84 |                 title=link.string,
 85 |                 # the `or soup` fallback covers single-chapter works
 86 |                 contents=self._chapter(chapter_soup, base),
 87 |                 date=updated
 88 |             ))
 89 | 
 90 |         self._finalize(story)
 91 | 
 92 |         return story
 93 | 
 94 |     def _chapter(self, soup, base):
 95 |         content = soup.find('div', role='article')
 96 | 
 97 |         for landmark in content.find_all(class_='landmark'):
 98 |             landmark.decompose()
 99 | 
100 |         # TODO: Maybe these should be footnotes instead?
101 |         notes = soup.select('#chapters .end.notes')
102 |         if notes:
103 |             notes = notes[0]
104 |             for landmark in notes.find_all(class_='landmark'):
105 |                 landmark.decompose()
106 | 
107 |         self._clean(content, base)
108 | 
109 |         return content.prettify() + (notes and notes.prettify() or '')
110 | 
111 | 
112 | @register
113 | class ArchiveOfOurOwnSeries(ArchiveOfOurOwn):
114 |     _key = "ArchiveOfOurOwn"
115 | 
116 |     @staticmethod
117 |     def matches(url):
118 |         # e.g. http://archiveofourown.org/series/5683105/
119 |         match = re.match(r'^(https?://archiveofourown\.org/series/\d+)/?.*', url)
120 |         if match:
121 |             return match.group(1) + '/'
122 | 
123 |     def extract(self, url):
124 |         seriesid = re.match(r'^https?://archiveofourown\.org/series/(\d+)/?.*', url).group(1)
125 | 
126 |         soup, base = self._soup(f'http://archiveofourown.org/series/{seriesid}?view_adult=true')
127 | 
128 |         story = Section(
129 |             title=soup.select('#main h2.heading')[0].text.strip(),
130 |             author=soup.select('#main dl.series.meta a[rel="author"]')[0].string,
131 |             url=f'http://archiveofourown.org/series/{seriesid}'
132 |         )
133 | 
134 |         for work in soup.select('#main ul.series li.work'):
135 |             workid = work.get('id').replace('work_', '')
136 |             substory = self._extract_work(workid)
137 | 
138 |             # TODO: improve epub-writer to be able to generate a toc.ncx with nested headings
139 |             story.add(substory)
140 | 
141 |         return story
142 | 


--------------------------------------------------------------------------------
/sites/arbitrary.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import logging
  4 | from attrs import define
  5 | import datetime
  6 | import json
  7 | import re
  8 | import os.path
  9 | from . import register, Site, Section, Chapter, SiteException
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | """
 14 | Example JSON:
 15 | {
 16 |     "url": "https://practicalguidetoevil.wordpress.com/table-of-contents/",
 17 |     "title": "A Practical Guide To Evil: Book 1",
 18 |     "author": "erraticerrata",
 19 |     "chapter_selector": "#main .entry-content > ul > li > a",
 20 |     "content_selector": "#main .entry-content",
 21 |     "filter_selector": ".sharedaddy, .wpcnt, style",
 22 |     "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
 23 | }
 24 | """
 25 | 
 26 | 
 27 | @define
 28 | class SiteDefinition:
 29 |     url: str
 30 |     title: str
 31 |     author: str
 32 |     content_selector: str
 33 |     # If present, find something within `content` to use a chapter title; if not found, the link text to it will be used
 34 |     content_title_selector: str = False
 35 |     # If present, find a specific element in the `content` to be the chapter text
 36 |     content_text_selector: str = False
 37 |     # If present, it looks for chapters linked from `url`. If not, it assumes `url` points to a chapter.
 38 |     chapter_selector: str = False
 39 |     # If present, use to find a link to the next content page (only used if not using chapter_selector)
 40 |     next_selector: str = False
 41 |     # If present, use to filter out content that matches the selector
 42 |     filter_selector: str = False
 43 |     cover_url: str = ''
 44 | 
 45 | 
 46 | @register
 47 | class Arbitrary(Site):
 48 |     """A way to describe an arbitrary side for a one-off fetch
 49 |     """
 50 |     @staticmethod
 51 |     def matches(url):
 52 |         # e.g. practical1.json
 53 |         if url.endswith('.json') and os.path.isfile(url):
 54 |             return url
 55 | 
 56 |     def extract(self, url):
 57 |         with open(url) as definition_file:
 58 |             definition = SiteDefinition(**json.load(definition_file))
 59 | 
 60 |         story = Section(
 61 |             title=definition.title,
 62 |             author=definition.author,
 63 |             url=url,
 64 |             cover_url=definition.cover_url
 65 |         )
 66 | 
 67 |         if definition.chapter_selector:
 68 |             soup, base = self._soup(definition.url)
 69 |             for chapter_link in soup.select(definition.chapter_selector):
 70 |                 chapter_url = str(chapter_link.get('href'))
 71 |                 if base:
 72 |                     chapter_url = self._join_url(base, chapter_url)
 73 |                 chapter_url = self._join_url(definition.url, chapter_url)
 74 |                 for chapter in self._chapter(chapter_url, definition, title=chapter_link.string):
 75 |                     story.add(chapter)
 76 |         else:
 77 |             # set of already processed urls. Stored to detect loops.
 78 |             found_content_urls = set()
 79 |             content_urls = [definition.url]
 80 | 
 81 |             def process_content_url(content_url):
 82 |                 if content_url in found_content_urls:
 83 |                     return None
 84 |                 found_content_urls.add(content_url)
 85 |                 for chapter in self._chapter(content_url, definition):
 86 |                     story.add(chapter)
 87 |                 return content_url
 88 | 
 89 |             while content_urls:
 90 |                 for temp_url in content_urls:
 91 |                     # stop inner loop once a new link is found
 92 |                     if content_url := process_content_url(temp_url):
 93 |                         break
 94 |                 # reset url list
 95 |                 content_urls = []
 96 |                 if content_url and definition.next_selector:
 97 |                     soup, base = self._soup(content_url)
 98 |                     next_link = soup.select(definition.next_selector)
 99 |                     if next_link:
100 |                         for next_link_item in next_link:
101 |                             next_link_url = str(next_link_item.get('href'))
102 |                             if base:
103 |                                 next_link_url = self._join_url(base, next_link_url)
104 |                             content_urls.append(self._join_url(content_url, next_link_url))
105 | 
106 |         if not story:
107 |             raise SiteException("No story content found; check the content selectors")
108 | 
109 |         self._finalize(story)
110 | 
111 |         return story
112 | 
113 |     def _chapter(self, url, definition, title=False):
114 |         logger.info("Extracting chapter @ %s", url)
115 |         soup, base = self._soup(url)
116 | 
117 |         chapters = []
118 | 
119 |         if not soup.select(definition.content_selector):
120 |             return chapters
121 | 
122 |         # clean up a few things which will definitely break epubs:
123 |         # TODO: expand this greatly, or make it configurable
124 |         for namespaced in soup.find_all(re.compile(r'[a-z]+:[a-z]+')):
125 |             # Namespaced elements are going to cause validation errors
126 |             namespaced.decompose()
127 | 
128 |         for content in soup.select(definition.content_selector):
129 |             if definition.filter_selector:
130 |                 for filtered in content.select(definition.filter_selector):
131 |                     filtered.decompose()
132 | 
133 |             if definition.content_title_selector:
134 |                 title_element = content.select(definition.content_title_selector)
135 |                 if title_element:
136 |                     title = title_element[0].get_text().strip()
137 | 
138 |             if definition.content_text_selector:
139 |                 # TODO: multiple text elements?
140 |                 content = content.select(definition.content_text_selector)[0]
141 | 
142 |             # TODO: consider `'\n'.join(map(str, content.contents))`
143 |             content.name = 'div'
144 | 
145 |             self._clean(content, base)
146 | 
147 |             chapters.append(Chapter(
148 |                 title=title,
149 |                 contents=content.prettify(),
150 |                 # TODO: better date detection
151 |                 date=datetime.datetime.now()
152 |             ))
153 | 
154 |         return chapters
155 | 


--------------------------------------------------------------------------------
/sites/deviantart.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import logging
 4 | import re
 5 | 
 6 | from . import register, Section
 7 | from .stash import Stash
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | @register
13 | class DeviantArt(Stash):
14 |     @staticmethod
15 |     def matches(url):
16 |         # Need a collection page
17 |         match = re.match(r'^https?://[^.]+\.deviantart\.com/(?:gallery|favourites)/\d+/?', url)
18 |         if match:
19 |             return match.group(0) + '/'
20 | 
21 |     def extract(self, url):
22 |         soup, base = self._soup(url)
23 |         content = soup.find(id="output")
24 |         if not content:
25 |             return
26 | 
27 |         if "gallery" in url:
28 |             author = str(content.select('h1 a.u')[0].string)
29 |         else:
30 |             authors = set(str(author.string) for author in content.select('.stream .details a.u'))
31 |             author = ', '.join(authors)
32 | 
33 |         story = Section(
34 |             title=str(content.find(class_="folder-title").string),
35 |             author=author,
36 |             url=url
37 |         )
38 | 
39 |         thumbs = content.select(".stream a.thumb")
40 |         if not thumbs:
41 |             return
42 |         for thumb in thumbs:
43 |             try:
44 |                 if thumb['href'] != '#':
45 |                     story.add(self._chapter(thumb['href']))
46 |             except Exception:
47 |                 logger.exception("Couldn't extract chapters from thumbs")
48 | 
49 |         self._finalize(story)
50 | 
51 |         return story
52 | 


--------------------------------------------------------------------------------
/sites/fanfictionnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import logging
  4 | import datetime
  5 | import re
  6 | import urllib.parse
  7 | import attr
  8 | from . import register, Site, SiteException, CloudflareException, Section, Chapter
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | @register
 14 | class FanFictionNet(Site):
 15 |     _cloudflared = attr.ib(init=False, default=False)
 16 | 
 17 |     """FFN: it has a lot of stuff"""
 18 |     @staticmethod
 19 |     def matches(url):
 20 |         # e.g. https://www.fanfiction.net/s/4109686/3/Taking-Sights
 21 |         match = re.match(r'^https?://(?:www|m)\.fanfiction\.net/s/(\d+)/?.*', url)
 22 |         if match:
 23 |             return 'https://www.fanfiction.net/s/' + match.group(1) + '/'
 24 | 
 25 |     def extract(self, url):
 26 |         soup, base = self._soup(url)
 27 | 
 28 |         content = soup.find(id="content_wrapper_inner")
 29 |         if not content:
 30 |             raise SiteException("No content")
 31 | 
 32 |         metadata = content.find(id='profile_top')
 33 | 
 34 |         story = Section(
 35 |             title=str(metadata.find('b', class_="xcontrast_txt").string),
 36 |             author=str(metadata.find('a', class_="xcontrast_txt").string),
 37 |             url=url
 38 |         )
 39 | 
 40 |         dates = content.find_all('span', attrs={'data-xutime': True})
 41 |         published = False
 42 |         updated = False
 43 |         if len(dates) == 1:
 44 |             published = datetime.datetime.fromtimestamp(int(dates[0]['data-xutime']))
 45 |         elif len(dates) == 2:
 46 |             updated = datetime.datetime.fromtimestamp(int(dates[0]['data-xutime']))
 47 |             published = datetime.datetime.fromtimestamp(int(dates[1]['data-xutime']))
 48 | 
 49 |         chapter_select = content.find(id="chap_select")
 50 |         if chapter_select:
 51 |             base_url = re.search(r'(https?://[^/]+/s/\d+/?)', url)
 52 |             if not base_url:
 53 |                 raise SiteException("Can't find base URL for chapters")
 54 |             base_url = base_url.group(0)
 55 | 
 56 |             suffix = re.search(r"'(/[^']+)';", chapter_select.attrs['onchange'])
 57 |             if not suffix:
 58 |                 raise SiteException("Can't find URL suffix for chapters")
 59 |             suffix = suffix.group(1)
 60 | 
 61 |             # beautiful soup doesn't handle ffn's unclosed option tags at all well here
 62 |             options = re.findall(r'<option.+?value="?(\d+)"?[^>]*>([^<]+)', str(chapter_select))
 63 |             for option in options:
 64 |                 story.add(Chapter(title=option[1], contents=self._chapter(base_url + option[0] + suffix), date=False))
 65 | 
 66 |             # fix up the dates
 67 |             story[-1].date = updated
 68 |             story[0].date = published
 69 |         else:
 70 |             story.add(Chapter(title=story.title, contents=self._chapter(url), date=published))
 71 | 
 72 |         self._finalize(story)
 73 | 
 74 |         return story
 75 | 
 76 |     def _chapter(self, url):
 77 |         logger.info("Fetching chapter @ %s", url)
 78 |         soup, base = self._soup(url)
 79 | 
 80 |         content = soup.find(id="content_wrapper_inner")
 81 |         if not content:
 82 |             raise SiteException("No chapter content")
 83 | 
 84 |         text = content.find(id="storytext")
 85 |         if not text:
 86 |             raise SiteException("No chapter content")
 87 | 
 88 |         # clean up some invalid xhtml attributes
 89 |         # TODO: be more selective about this somehow
 90 |         try:
 91 |             for tag in text.find_all(True):
 92 |                 tag.attrs.clear()
 93 |         except Exception:
 94 |             logger.exception("Trouble cleaning attributes")
 95 | 
 96 |         self._clean(text, base)
 97 | 
 98 |         return text.prettify()
 99 | 
100 |     def _soup(self, url, *args, **kwargs):
101 |         if self._cloudflared:
102 |             fallback = f"https://archive.org/wayback/available?url={urllib.parse.quote(url)}"
103 |             try:
104 |                 response = self.session.get(fallback)
105 |                 wayback = response.json()
106 |                 closest = wayback['archived_snapshots']['closest']['url']
107 |                 return super()._soup(closest, *args, delay=1, **kwargs)
108 |             except Exception:
109 |                 self.session.cache.delete_url(fallback)
110 |                 raise CloudflareException("Couldn't fetch, presumably because of Cloudflare protection, and falling back to archive.org failed; if some chapters were succeeding, try again?", url, fallback)
111 |         try:
112 |             return super()._soup(self, url, *args, **kwargs)
113 |         except CloudflareException:
114 |             self._cloudflared = True
115 |             return self._soup(url, *args, **kwargs)
116 | 
117 | 
118 | @register
119 | class FictionPress(FanFictionNet):
120 |     @staticmethod
121 |     def matches(url):
122 |         # e.g. https://www.fictionpress.com/s/2961893/1/Mother-of-Learning
123 |         match = re.match(r'^https?://(?:www|m)\.fictionpress\.com/s/(\d+)/?.*', url)
124 |         if match:
125 |             return 'https://www.fictionpress.com/s/' + match.group(1) + '/'
126 | 


--------------------------------------------------------------------------------
/sites/fictionlive.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import logging
  4 | import itertools
  5 | import datetime
  6 | import re
  7 | from . import register, Site, Section, Chapter
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | 
 12 | @register
 13 | class FictionLive(Site):
 14 |     """fiction.live: it's... mostly smut, I think? Terrible smut. But, hey, I had a rec to follow."""
 15 |     @staticmethod
 16 |     def matches(url):
 17 |         # e.g. https://fiction.live/stories/Descendant-of-a-Demon-Lord/SBBA49fQavNQMWxFT
 18 |         match = re.match(r'^(https?://fiction\.live/(?:stories|Sci-fi)/[^\/]+/[0-9a-zA-Z\-]+)/?.*', url)
 19 |         if match:
 20 |             return match.group(1)
 21 | 
 22 |     def extract(self, url):
 23 |         workid = re.match(r'^https?://fiction\.live/(?:stories|Sci-fi)/[^\/]+/([0-9a-zA-Z\-]+)/?.*', url).group(1)
 24 | 
 25 |         response = self.session.get(f'https://fiction.live/api/node/{workid}').json()
 26 | 
 27 |         story = Section(
 28 |             title=response['t'],
 29 |             author=response['u'][0]['n'],
 30 |             # Could normalize the URL here from the returns, but I'd have to
 31 |             # go look up how they handle special characters in titles...
 32 |             url=url
 33 |         )
 34 |         # There's a summary (or similar) in `d` and `b`, if I want to use that later.
 35 | 
 36 |         # TODO: extract these #special ones and send them off to an endnotes section?
 37 |         chapters = ({'ct': 0},) + tuple(c for c in response['bm'] if not c['title'].startswith('#special')) + ({'ct': 9999999999999999},)
 38 | 
 39 |         for prevc, currc, nextc in contextiterate(chapters):
 40 |             # `id`, `title`, `ct`, `isFirst`
 41 |             # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/0/1448245168594
 42 |             # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/1449266444062/1449615394752
 43 |             # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/1502823848216/9999999999999998
 44 |             # i.e. format is [current timestamp] / [next timestamp - 1]
 45 |             chapter_url = f'https://fiction.live/api/anonkun/chapters/{workid}/{currc["ct"]}/{nextc["ct"] - 1}'
 46 |             logger.info("Extracting chapter \"%s\" @ %s", currc['title'], chapter_url)
 47 |             data = self.session.get(chapter_url).json()
 48 |             html = []
 49 | 
 50 |             updated = currc['ct']
 51 |             for segment in (d for d in data if not d.get('t', '').startswith('#special')):
 52 |                 updated = max(updated, segment['ct'])
 53 |                 # TODO: work out if this is actually enough types handled
 54 |                 # There's at least also a reader post type, which mostly seems to be used for die rolls.
 55 |                 try:
 56 |                     if segment['nt'] == 'chapter':
 57 |                         html.extend(('<div>', segment['b'].replace('<br>', '<br/>'), '</div>'))
 58 |                     elif segment['nt'] == 'choice':
 59 |                         if 'votes' not in segment:
 60 |                             # Somehow, sometime, we end up with a choice without votes (or choices)
 61 |                             continue
 62 |                         votes = {}
 63 |                         for vote in segment['votes']:
 64 |                             votechoices = segment['votes'][vote]
 65 |                             if isinstance(votechoices, str):
 66 |                                 # This caused issue #30, where for some reason one
 67 |                                 # choice on a story was a string rather than an
 68 |                                 # index into the choices array.
 69 |                                 continue
 70 |                             if isinstance(votechoices, int):
 71 |                                 votechoices = (votechoices,)
 72 |                             for choice in votechoices:
 73 |                                 if int(choice) < len(segment['choices']):
 74 |                                     # sometimes someone has voted for a presumably-deleted choice
 75 |                                     choice = segment['choices'][int(choice)]
 76 |                                     votes[choice] = votes.get(choice, 0) + 1
 77 |                         choices = [(votes[v], v) for v in votes]
 78 |                         choices.sort(reverse=True)
 79 |                         html.append('<hr/><ul>')
 80 |                         for votecount, choice in choices:
 81 |                             html.append(f'<li>{choice}: {votecount}</li>')
 82 |                         html.append('</ul><hr/>')
 83 |                     elif segment['nt'] == 'readerPost':
 84 |                         pass
 85 |                     else:
 86 |                         logger.info("Skipped chapter-segment of unhandled type: %s", segment['nt'])
 87 |                 except Exception as e:
 88 |                     logger.error("Skipped chapter-segment due to parsing error", exc_info=e)
 89 | 
 90 |             story.add(Chapter(
 91 |                 title=currc['title'],
 92 |                 contents='\n'.join(html),
 93 |                 date=datetime.datetime.fromtimestamp(updated / 1000.0)
 94 |             ))
 95 | 
 96 |         self._finalize(story)
 97 | 
 98 |         return story
 99 | 
100 | 
101 | # Stolen from the itertools docs
102 | def contextiterate(iterable):
103 |     "s -> (s0,s1), (s1,s2), (s2, s3), ..."
104 |     a, b, c = itertools.tee(iterable, 3)
105 |     next(b, None)
106 |     next(c, None)
107 |     next(c, None)
108 |     return zip(a, b, c)
109 | 


--------------------------------------------------------------------------------
/sites/royalroad.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import http.client
  4 | import logging
  5 | import datetime
  6 | import re
  7 | from . import register, Site, Section, Chapter, SiteSpecificOption
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | 
 12 | @register
 13 | class RoyalRoad(Site):
 14 |     domain = r'royalroad'
 15 | 
 16 |     @staticmethod
 17 |     def get_site_specific_option_defs():
 18 |         return Site.get_site_specific_option_defs() + [
 19 |             SiteSpecificOption(
 20 |                 'offset',
 21 |                 '--offset',
 22 |                 type=int,
 23 |                 help="The chapter index to start in the chapter marks."
 24 |             ),
 25 |             SiteSpecificOption(
 26 |                 'limit',
 27 |                 '--limit',
 28 |                 type=int,
 29 |                 help="The chapter to end at at in the chapter marks."
 30 |             ),
 31 |         ]
 32 | 
 33 |     """Royal Road: a place where people write novels, mostly seeming to be light-novel in tone."""
 34 |     @classmethod
 35 |     def matches(cls, url):
 36 |         # e.g. https://royalroad.com/fiction/6752/lament-of-the-fallen
 37 |         match = re.match(r'^(https?://(?:www\.)?%s\.com/fiction/\d+)/?.*' % cls.domain, url)
 38 |         if match:
 39 |             return match.group(1) + '/'
 40 | 
 41 |     def extract(self, url):
 42 |         workid = re.match(r'^https?://(?:www\.)?%s\.com/fiction/(\d+)/?.*' % self.domain, url).group(1)
 43 |         soup, base = self._soup(f'https://www.{self.domain}.com/fiction/{workid}')
 44 |         # should have gotten redirected, for a valid title
 45 | 
 46 |         original_maxheaders = http.client._MAXHEADERS
 47 |         http.client._MAXHEADERS = 1000
 48 | 
 49 |         story = Section(
 50 |             title=soup.find('h1').string.strip(),
 51 |             author=soup.find('meta', property='books:author').get('content').strip(),
 52 |             url=soup.find('meta', property='og:url').get('content').strip(),
 53 |             cover_url=self._join_url(base, soup.find('img', class_='thumbnail')['src']),
 54 |             summary=str(soup.find('div', class_='description')).strip(),
 55 |             tags=[tag.get_text().strip() for tag in soup.select('span.tags a.fiction-tag')]
 56 |         )
 57 | 
 58 |         for index, chapter in enumerate(soup.select('#chapters tbody tr[data-url]')):
 59 |             if self.options['offset'] and index < self.options['offset']:
 60 |                 continue
 61 |             if self.options['limit'] and index >= self.options['limit']:
 62 |                 continue
 63 |             chapter_url = str(self._join_url(story.url, str(chapter.get('data-url'))))
 64 | 
 65 |             contents, updated = self._chapter(chapter_url, len(story) + 1)
 66 | 
 67 |             story.add(Chapter(title=chapter.find('a', href=True).string.strip(), contents=contents, date=updated))
 68 | 
 69 |         http.client._MAXHEADERS = original_maxheaders
 70 | 
 71 |         self._finalize(story)
 72 | 
 73 |         return story
 74 | 
 75 |     def _chapter(self, url, chapterid):
 76 |         logger.info("Extracting chapter @ %s", url)
 77 |         soup, base = self._soup(url)
 78 |         content = soup.find('div', class_='chapter-content')
 79 | 
 80 |         self._clean(content, full_page=soup, base=base)
 81 |         self._clean_spoilers(content, chapterid)
 82 | 
 83 |         content = str(content)
 84 | 
 85 |         author_note = soup.find_all('div', class_='author-note-portlet')
 86 | 
 87 |         if len(author_note) == 1:
 88 |             # Find the parent of chapter-content and check if the author's note is the first child div
 89 |             if 'author-note-portlet' in soup.find('div', class_='chapter-content').parent.find('div')['class']:
 90 |                 content = str(author_note[0]) + '<hr/>' + content
 91 |             else:  # The author note must be after the chapter content
 92 |                 content = content + '<hr/>' + str(author_note[0])
 93 |         elif len(author_note) == 2:
 94 |             content = str(author_note[0]) + '<hr/>' + content + '<hr/>' + str(author_note[1])
 95 | 
 96 |         updated = datetime.datetime.fromtimestamp(
 97 |             int(soup.find(class_="profile-info").find('time').get('unixtime'))
 98 |         )
 99 | 
100 |         return content, updated
101 | 
102 |     def _clean(self, contents, full_page, base=False):
103 |         contents = super()._clean(contents, base=base)
104 | 
105 |         # Royalroad has started inserting "this was stolen" notices into its
106 |         # HTML, and hiding them with CSS. Currently the CSS is very easy to
107 |         # find, so do so and filter them out.
108 |         for style in full_page.find_all('style'):
109 |             if m := re.match(r'\s*\.(\w+)\s*{[^}]*display:\s*none;[^}]*}', style.string):
110 |                 for warning in contents.find_all(class_=m.group(1)):
111 |                     warning.decompose()
112 | 
113 |         return contents
114 | 
115 |     def _clean_spoilers(self, content, chapterid):
116 |         # Spoilers to footnotes
117 |         for spoiler in content.find_all(class_=('spoiler-new')):
118 |             spoiler_title = spoiler.get('data-caption')
119 |             new_spoiler = self._new_tag('div', class_="leech-spoiler")
120 |             if self.options['spoilers'] == 'skip':
121 |                 new_spoiler.append(spoiler_title and f'[SPOILER: {spoiler_title}]' or '[SPOILER]')
122 |             elif self.options['spoilers'] == 'inline':
123 |                 if spoiler_title:
124 |                     new_spoiler.append(f"{spoiler_title}: ")
125 |                 new_spoiler.append(spoiler)
126 |             else:
127 |                 link = self._footnote(spoiler, chapterid)
128 |                 if spoiler_title:
129 |                     link.string = spoiler_title
130 |                 new_spoiler.append(link)
131 |             spoiler.replace_with(new_spoiler)
132 | 
133 | 
134 | @register
135 | class RoyalRoadL(RoyalRoad):
136 |     domain = 'royalroadl'
137 | 


--------------------------------------------------------------------------------
/sites/stash.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import logging
 4 | import datetime
 5 | import re
 6 | from . import register, Site, SiteException, Section, Chapter
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | @register
12 | class Stash(Site):
13 |     @staticmethod
14 |     def matches(url):
15 |         # Need a stack page
16 |         match = re.match(r'^(https?://sta\.sh/2.+)/?.*', url)
17 |         if match:
18 |             return match.group(1) + '/'
19 | 
20 |     def extract(self, url):
21 |         soup, base = self._soup(url)
22 |         content = soup.find(id="stash-body")
23 |         if not content:
24 |             return
25 | 
26 |         # metadata = content.find(id='profile_top')
27 |         story = Section(
28 |             title=str(soup.find(class_="stash-folder-name").h2.string),
29 |             author=str(soup.find('span', class_="oh-stashlogo-name").string).rstrip("'s"),
30 |             url=url
31 |         )
32 | 
33 |         thumbs = content.select(".stash-folder-stream .thumb")
34 |         if not thumbs:
35 |             return
36 |         for thumb in thumbs:
37 |             try:
38 |                 if thumb['href'] != '#':
39 |                     story.add(self._chapter(thumb['href']))
40 |             except Exception:
41 |                 logger.exception("Couldn't extract chapters from thumbs")
42 | 
43 |         self._finalize(story)
44 | 
45 |         return story
46 | 
47 |     def _chapter(self, url):
48 |         logger.info("Fetching chapter @ %s", url)
49 |         soup, base = self._soup(url)
50 | 
51 |         content = soup.find(class_="journal-wrapper")
52 |         if not content:
53 |             raise SiteException("No content")
54 | 
55 |         title = str(content.find(class_="gr-top").find(class_='metadata').h2.a.string)
56 | 
57 |         text = content.find(class_="text")
58 | 
59 |         # clean up some invalid xhtml attributes
60 |         # TODO: be more selective about this somehow
61 |         try:
62 |             for tag in text.find_all(True):
63 |                 tag.attrs = None
64 |         except Exception as e:
65 |             raise SiteException("Trouble cleaning attributes", e)
66 | 
67 |         self._clean(text, base)
68 | 
69 |         return Chapter(title=title, contents=text.prettify(), date=self._date(soup))
70 | 
71 |     def _date(self, soup):
72 |         maybe_date = soup.find('div', class_="dev-metainfo-details").find('span', ts=True)
73 |         return datetime.datetime.fromtimestamp(int(maybe_date['ts']))
74 | 


--------------------------------------------------------------------------------
/sites/wattpad.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import logging
 4 | import datetime
 5 | import re
 6 | from . import register, Site, Section, Chapter
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | @register
12 | class Wattpad(Site):
13 |     """Wattpad"""
14 |     @classmethod
15 |     def matches(cls, url):
16 |         # e.g. https://www.wattpad.com/story/208753031-summoned-to-have-tea-with-the-demon-lord-i-guess
17 |         # chapter URLs are e.g. https://www.wattpad.com/818687865-summoned-to-have-tea-with-the-demon-lord-i-guess
18 |         match = re.match(r'^(https?://(?:www\.)?wattpad\.com/story/\d+)?.*', url)
19 |         if match:
20 |             # the story-title part is unnecessary
21 |             return match.group(1)
22 | 
23 |     def extract(self, url):
24 |         workid = re.match(r'^https?://(?:www\.)?wattpad\.com/story/(\d+)?.*', url).group(1)
25 |         info = self.session.get(f"https://www.wattpad.com/api/v3/stories/{workid}").json()
26 | 
27 |         story = Section(
28 |             title=info['title'],
29 |             author=info['user']['name'],
30 |             url=url,
31 |             cover_url=info['cover']
32 |         )
33 | 
34 |         for chapter in info['parts']:
35 |             story.add(Chapter(
36 |                 title=chapter['title'],
37 |                 contents=self._chapter(chapter['id']),
38 |                 # "2020-05-03T22:14:29Z"
39 |                 date=datetime.datetime.fromisoformat(chapter['createDate'].rstrip('Z'))  # modifyDate also?
40 |             ))
41 | 
42 |         self._finalize(story)
43 | 
44 |         return story
45 | 
46 |     def _chapter(self, chapterid):
47 |         logger.info(f"Extracting chapter @ {chapterid}")
48 |         api = self.session.get(f"https://www.wattpad.com/apiv2/storytext?id={chapterid}")
49 |         return '<div>' + api.text + '</div>'
50 | 


--------------------------------------------------------------------------------
/sites/xenforo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import datetime
  4 | import re
  5 | import logging
  6 | import requests_cache
  7 | 
  8 | from . import Site, SiteException, SiteSpecificOption, Section, Chapter
  9 | import mintotp
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | class XenForo(Site):
 15 |     """XenForo is forum software that powers a number of fiction-related forums."""
 16 | 
 17 |     domain = False
 18 |     index_urls = False
 19 | 
 20 |     @staticmethod
 21 |     def get_site_specific_option_defs():
 22 |         return Site.get_site_specific_option_defs() + [
 23 |             SiteSpecificOption(
 24 |                 'include_index',
 25 |                 '--include-index/--no-include-index',
 26 |                 default=False,
 27 |                 help="If true, the post marked as an index will be included as a chapter."
 28 |             ),
 29 |             SiteSpecificOption(
 30 |                 'offset',
 31 |                 '--offset',
 32 |                 type=int,
 33 |                 help="The chapter index to start in the chapter marks."
 34 |             ),
 35 |             SiteSpecificOption(
 36 |                 'limit',
 37 |                 '--limit',
 38 |                 type=int,
 39 |                 help="The chapter to end at at in the chapter marks."
 40 |             ),
 41 |         ]
 42 | 
 43 |     @classmethod
 44 |     def matches(cls, url):
 45 |         match = re.match(r'^(https?://%s/(?:index\.php\?)?threads/[^/]*\d+/(?:\d+/)?reader)/?.*' % cls.domain, url)
 46 |         if match:
 47 |             return match.group(1)
 48 |         match = re.match(r'^(https?://%s/(?:index\.php\?)?threads/[^/]*\d+)/?.*' % cls.domain, url)
 49 |         if match:
 50 |             return match.group(1) + '/'
 51 | 
 52 |     def siteurl(self, path):
 53 |         if self.index_urls:
 54 |             return f'https://{self.domain}/index.php?{path}'
 55 |         return f'https://{self.domain}/{path}'
 56 | 
 57 |     def login(self, login_details):
 58 |         with requests_cache.disabled():
 59 |             # Can't just pass this url to _soup because I need the cookies later
 60 |             login = self.session.get(self.siteurl('login/'))
 61 |             soup, nobase = self._soup(login.text)
 62 |             post, action, method = self._form_data(soup.find(class_='p-body-content'))
 63 |             post['login'] = login_details[0]
 64 |             post['password'] = login_details[1]
 65 |             # I feel the session *should* handle this cookies bit for me. But
 66 |             # it doesn't. And I don't know why.
 67 |             result = self.session.post(
 68 |                 self._join_url(login.url, action),
 69 |                 data=post, cookies=login.cookies
 70 |             )
 71 |             if not result.ok:
 72 |                 return logger.error("Failed to log in as %s", login_details[0])
 73 |             soup, nobase = self._soup(result.text)
 74 |             if twofactor := soup.find('form', action="/login/two-step"):
 75 |                 if len(login_details) < 3:
 76 |                     return logger.error("Failed to log in as %s; login requires 2FA secret", login_details[0])
 77 |                 post, action, method = self._form_data(twofactor)
 78 |                 post['code'] = mintotp.totp(login_details[2])
 79 |                 result = self.session.post(
 80 |                     self._join_url(login.url, action),
 81 |                     data=post, cookies=login.cookies
 82 |                 )
 83 |                 if not result.ok:
 84 |                     return logger.error("Failed to log in as %s; 2FA failed", login_details[0])
 85 |             logger.info("Logged in as %s", login_details[0])
 86 | 
 87 |     def extract(self, url):
 88 |         soup, base = self._soup(url)
 89 | 
 90 |         story = self._base_story(soup)
 91 | 
 92 |         threadmark_categories = {}
 93 |         # Note to self: in the source this is data-categoryId, but the parser
 94 |         # in bs4 lowercases tags and attributes...
 95 |         for cat in soup.find_all('a', attrs={'data-categoryid': True}):
 96 |             threadmark_categories[int(cat['data-categoryid'])] = cat['title']
 97 | 
 98 |         if url.endswith('/reader'):
 99 |             reader_url = url
100 |         elif soup.find('a', class_='readerToggle'):
101 |             reader_url = soup.find('a', class_='readerToggle').get('href')
102 |         elif soup.find('div', class_='threadmarks-reader'):
103 |             # Technically this is the xenforo2 bit, but :shrug:
104 |             reader_url = soup.find('div', class_='threadmarks-reader').find('a').get('href')
105 |         else:
106 |             reader_url = False
107 | 
108 |         if reader_url:
109 |             match = re.search(r'\d+/(\d+)/reader', reader_url)
110 |             if match:
111 |                 cat = int(match.group(1))
112 |                 if cat != 1 and cat in threadmark_categories:
113 |                     story.title = f'{story.title} ({threadmark_categories[cat]})'
114 |             idx = 0
115 |             while reader_url:
116 |                 reader_url = self._join_url(base, reader_url)
117 |                 logger.info("Fetching chapters @ %s", reader_url)
118 |                 reader_soup, reader_base = self._soup(reader_url)
119 |                 posts = self._posts_from_page(reader_soup)
120 | 
121 |                 for post in posts:
122 |                     idx = idx + 1
123 |                     if self.options['offset'] and idx < self.options['offset']:
124 |                         continue
125 |                     if self.options['limit'] and idx >= self.options['limit']:
126 |                         continue
127 |                     title = self._threadmark_title(post)
128 |                     logger.info("Extracting chapter \"%s\"", title)
129 | 
130 |                     story.add(Chapter(
131 |                         title=title,
132 |                         contents=self._clean_chapter(post, len(story) + 1, base),
133 |                         date=self._post_date(post)
134 |                     ))
135 | 
136 |                 reader_url = False
137 |                 if reader_soup.find('link', rel='next'):
138 |                     reader_url = reader_soup.find('link', rel='next').get('href')
139 |         else:
140 |             # TODO: Research whether reader mode is guaranteed to be enabled
141 |             # when threadmarks are; if so, can delete this branch.
142 |             marks = [
143 |                 mark for mark in self._chapter_list(url)
144 |                 if '/members' not in mark.get('href') and '/threadmarks' not in mark.get('href')
145 |             ]
146 |             marks = marks[self.options['offset']:self.options['limit']]
147 | 
148 |             for idx, mark in enumerate(marks, 1):
149 |                 href = self._join_url(base, mark.get('href'))
150 |                 title = str(mark.string).strip()
151 |                 logger.info("Fetching chapter \"%s\" @ %s", title, href)
152 |                 contents, post_date = self._chapter(href, idx)
153 |                 chapter = Chapter(title=title, contents=contents, date=post_date)
154 |                 story.add(chapter)
155 | 
156 |         self._finalize(story)
157 | 
158 |         return story
159 | 
160 |     def _base_story(self, soup):
161 |         url = soup.find('meta', property='og:url').get('content')
162 |         title = soup.select('div.titleBar > h1')[0]
163 |         # clean out informational bits from the title
164 |         for tag in title.find_all(class_='prefix'):
165 |             tag.decompose()
166 |         tags = [tag.get_text().strip() for tag in soup.select('div.tagBlock a.tag')]
167 |         return Section(
168 |             title=title.get_text().strip(),
169 |             author=soup.find('p', id='pageDescription').find('a', class_='username').get_text(),
170 |             url=url,
171 |             tags=tags
172 |         )
173 | 
174 |     def _posts_from_page(self, soup, postid=False):
175 |         if postid:
176 |             return soup.find('li', id='post-' + postid)
177 |         return soup.select('#messageList > li.hasThreadmark')
178 | 
179 |     def _threadmark_title(self, post):
180 |         # Get the title, removing "<strong>Threadmark:</strong>" which precedes it
181 |         return ''.join(post.select('div.threadmarker > span.label')[0].findAll(text=True, recursive=False)).strip()
182 | 
183 |     def _chapter_list(self, url):
184 |         try:
185 |             return self._chapter_list_threadmarks(url)
186 |         except SiteException as e:
187 |             logger.debug("Tried threadmarks (%r)", e.args)
188 |             return self._chapter_list_index(url)
189 | 
190 |     def _chapter_list_threadmarks(self, url):
191 |         soup, base = self._soup(url)
192 | 
193 |         threadmarks_link = soup.find(class_="threadmarksTrigger", href=True)
194 |         if not threadmarks_link:
195 |             try:
196 |                 threadmarks_link = soup.select('.threadmarkMenus a.OverlayTrigger')[0]
197 |             except IndexError:
198 |                 pass
199 | 
200 |         if not threadmarks_link:
201 |             raise SiteException("No threadmarks")
202 | 
203 |         href = threadmarks_link.get('href')
204 |         soup, base = self._soup(self._join_url(base, href))
205 | 
206 |         fetcher = soup.find(class_='ThreadmarkFetcher')
207 |         while fetcher:
208 |             # ThreadmarksPro, hiding some threadmarks. Means the API is available to do this.
209 |             # Note: the fetched threadmarks can contain more placeholder elements to fetch. Ergo, loop.
210 |             # Good test case: https://forums.sufficientvelocity.com/threads/ignition-mtg-multicross-planeswalker-pc.26099/threadmarks
211 |             # e.g.: <li class="primaryContent threadmarkListItem ThreadmarkFetcher _depth0 filler" data-range-min="0" data-range-max="306" data-thread-id="26099" data-category-id="1" title="305 hidden">
212 |             response = self.session.post(self.siteurl('threads/threadmarks/load-range'), data={
213 |                 # I did try a fetch on min/data-min+data-max, but there seems
214 |                 # to be an absolute limit which the API fetch won't override
215 |                 'min': fetcher.get('data-range-min'),
216 |                 'max': fetcher.get('data-range-max'),
217 |                 'thread_id': fetcher.get('data-thread-id'),
218 |                 'category_id': fetcher.get('data-category-id'),
219 |                 '_xfResponseType': 'json',
220 |             }).json()
221 |             responseSoup, nobase = self._soup(response['templateHtml'])
222 |             fetcher.replace_with(responseSoup)
223 |             fetcher = soup.find(class_='ThreadmarkFetcher')
224 | 
225 |         marks = soup.find(class_='threadmarks').select('li.primaryContent.threadmarkListItem a, li.primaryContent.threadmarkItem a')
226 |         if not marks:
227 |             raise SiteException("No marks on threadmarks page")
228 | 
229 |         return marks
230 | 
231 |     def _chapter_list_index(self, url):
232 |         post = self._post_from_url(url)
233 |         if not post:
234 |             raise SiteException("Unparseable post URL", url)
235 | 
236 |         links = post.find('blockquote', class_='messageText').find_all('a', class_='internalLink')
237 |         if not links:
238 |             raise SiteException("No links in index?")
239 | 
240 |         if self.options['include_index']:
241 |             fake_link = self._new_tag('a', href=url)
242 |             fake_link.string = "Index"
243 |             links.insert(0, fake_link)
244 | 
245 |         return links
246 | 
247 |     def _chapter(self, url, chapterid):
248 |         post, base = self._post_from_url(url)
249 | 
250 |         return self._clean_chapter(post, chapterid, base), self._post_date(post)
251 | 
252 |     def _post_from_url(self, url):
253 |         # URLs refer to specific posts, so get just that one
254 |         # if no specific post referred to, get the first one
255 |         match = re.search(r'posts/(\d+)/?', url)
256 |         if not match:
257 |             match = re.match(r'.+#post-(\d+)$', url)
258 |             # could still be nothing here
259 |         postid = match and match.group(1)
260 |         if postid:
261 |             # create a proper post-url, because threadmarks can sometimes
262 |             # mess up page-wise with anchors
263 |             url = self.siteurl(f'posts/{postid}/')
264 |         soup, base = self._soup(url, 'lxml')
265 | 
266 |         if postid:
267 |             return self._posts_from_page(soup, postid), base
268 | 
269 |         # just the first one in the thread, then
270 |         return soup.find('li', class_='message'), base
271 | 
272 |     def _chapter_contents(self, post):
273 |         return post.find('blockquote', class_='messageText')
274 | 
275 |     def _clean_chapter(self, post, chapterid, base):
276 |         post = self._chapter_contents(post)
277 |         post.name = 'div'
278 |         # mostly, we want to remove colors because the Kindle is terrible at them
279 |         # TODO: find a way to denote colors, because it can be relevant
280 |         # TODO: at least invisitext, because outside of silly DC Lantern stuff, it's the most common
281 |         for tag in post.find_all(style=True):
282 |             if tag['style'] == 'color: transparent' and tag.text == 'TAB':
283 |                 # Some stories fake paragraph indents like this. The output
284 |                 # stylesheet will handle this just fine.
285 |                 tag.decompose()
286 |             else:
287 |                 # There's a few things which xenforo does as styles, despite there being perfectly good tags
288 |                 # TODO: more robust CSS parsing? This is very whitespace dependent, if nothing else.
289 |                 if "font-family: 'Courier New'" in tag['style']:
290 |                     tag.wrap(self._new_tag('code'))
291 |                 if "text-decoration: strikethrough" in tag['style']:
292 |                     tag.wrap(self._new_tag('strike'))
293 |                 if "margin-left" in tag['style']:
294 |                     continue
295 |                 del tag['style']
296 |         for tag in post.select('.quoteExpand, .bbCodeBlock-expandLink, .bbCodeBlock-shrinkLink'):
297 |             tag.decompose()
298 |         for tag in post.find_all('noscript'):
299 |             # TODO: strip the noscript from these?
300 |             # mostly this will be the lazyload images
301 |             tag.decompose()
302 |         for tag in post.select('img.lazyload[data-src]'):
303 |             tag['src'] = tag['data-url']
304 |             if tag['src'].startswith('proxy.php'):
305 |                 tag['src'] = f"{self.domain}/{tag['src']}"
306 |         self._clean(post, base)
307 |         self._clean_spoilers(post, chapterid)
308 |         return post.prettify()
309 | 
310 |     def _clean_spoilers(self, post, chapterid):
311 |         # spoilers don't work well, so turn them into epub footnotes
312 |         for spoiler in post.find_all(class_='ToggleTriggerAnchor'):
313 |             spoiler_title = spoiler.find(class_='SpoilerTitle')
314 |             if self.options['skip_spoilers']:
315 |                 link = self._footnote(spoiler.find(class_='SpoilerTarget').extract(), chapterid)
316 |                 if spoiler_title:
317 |                     link.string = spoiler_title.get_text()
318 |             else:
319 |                 if spoiler_title:
320 |                     link = f'[SPOILER: {spoiler_title.get_text()}]'
321 |                 else:
322 |                     link = '[SPOILER]'
323 |             new_spoiler = self._new_tag('div', class_="leech-spoiler")
324 |             new_spoiler.append(link)
325 |             spoiler.replace_with(new_spoiler)
326 | 
327 |     def _post_date(self, post):
328 |         maybe_date = post.find(class_='DateTime')
329 |         if 'data-time' in maybe_date.attrs:
330 |             return datetime.datetime.fromtimestamp(int(maybe_date['data-time']))
331 |         if 'title' in maybe_date.attrs:
332 |             # title="Feb 24, 2015 at 1:17 PM"
333 |             return datetime.datetime.strptime(maybe_date['title'], "%b %d, %Y at %I:%M %p")
334 |         raise SiteException("No date", maybe_date)
335 | 
336 | 
337 | class XenForoIndex(XenForo):
338 |     @classmethod
339 |     def matches(cls, url):
340 |         match = re.match(r'^(https?://%s/posts/\d+)/?.*' % cls.domain, url)
341 |         if match:
342 |             return match.group(1) + '/'
343 | 
344 |     def _chapter_list(self, url):
345 |         return self._chapter_list_index(url)
346 | 


--------------------------------------------------------------------------------
/sites/xenforo2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import datetime
  4 | import logging
  5 | 
  6 | from . import register, Section, SiteException
  7 | from .xenforo import XenForo, XenForoIndex
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | 
 12 | class XenForo2(XenForo):
 13 |     def _base_story(self, soup):
 14 |         url = soup.find('meta', property='og:url').get('content')
 15 |         title = soup.select('h1.p-title-value')[0]
 16 |         # clean out informational bits from the title
 17 |         for tag in title.select('.labelLink,.label-append'):
 18 |             tag.decompose()
 19 |         tags = [tag.get_text().strip() for tag in soup.select('.tagList a.tagItem')]
 20 |         return Section(
 21 |             title=title.get_text().strip(),
 22 |             author=soup.find('div', class_='p-description').find('a', class_='username').get_text(),
 23 |             url=url,
 24 |             tags=tags
 25 |         )
 26 | 
 27 |     def _posts_from_page(self, soup, postid=False):
 28 |         if postid:
 29 |             return soup.find('article', id='js-post-' + postid)
 30 |         return soup.select('article.message--post')
 31 | 
 32 |     def _threadmark_title(self, post):
 33 |         # Get the title, removing "<strong>Threadmark:</strong>" which precedes it
 34 |         return post.find('span', class_='threadmarkLabel').get_text()
 35 | 
 36 |     def _chapter_contents(self, post):
 37 |         return post.find('div', class_='message-userContent')
 38 | 
 39 |     def _clean_spoilers(self, post, chapterid):
 40 |         # spoilers don't work well, so turn them into epub footnotes
 41 |         for spoiler in post.find_all(class_='bbCodeSpoiler'):
 42 |             spoiler_title = spoiler.find(class_='bbCodeSpoiler-button-title')
 43 |             spoiler_contents = spoiler.find(class_='bbCodeBlock-content').extract()
 44 |             new_spoiler = self._new_tag('div', class_="leech-spoiler")
 45 |             if self.options['spoilers'] == 'skip':
 46 |                 new_spoiler.append(spoiler_title and f'[SPOILER: {spoiler_title.get_text()}]' or '[SPOILER]')
 47 |             elif self.options['spoilers'] == 'inline':
 48 |                 if spoiler_title:
 49 |                     new_spoiler.append(f"{spoiler_title.get_text()}: ")
 50 |                 new_spoiler.append(spoiler_contents)
 51 |             else:
 52 |                 link = self._footnote(spoiler_contents, chapterid)
 53 |                 if spoiler_title:
 54 |                     link.string = spoiler_title.get_text()
 55 |                 new_spoiler.append(link)
 56 |             spoiler.replace_with(new_spoiler)
 57 | 
 58 |     def _post_date(self, post):
 59 |         if post.find('time'):
 60 |             return datetime.datetime.fromtimestamp(int(post.find('time').get('data-time')))
 61 |         raise SiteException("No date")
 62 | 
 63 | 
 64 | @register
 65 | class SpaceBattles(XenForo2):
 66 |     domain = 'forums.spacebattles.com'
 67 | 
 68 | 
 69 | @register
 70 | class SpaceBattlesIndex(SpaceBattles, XenForoIndex):
 71 |     _key = "SpaceBattles"
 72 | 
 73 | 
 74 | @register
 75 | class SufficientVelocity(XenForo2):
 76 |     domain = 'forums.sufficientvelocity.com'
 77 | 
 78 | 
 79 | @register
 80 | class TheSietch(XenForo2):
 81 |     domain = 'www.the-sietch.com'
 82 |     index_urls = True
 83 | 
 84 | 
 85 | @register
 86 | class QuestionableQuesting(XenForo2):
 87 |     domain = 'forum.questionablequesting.com'
 88 | 
 89 | 
 90 | @register
 91 | class QuestionableQuestingIndex(QuestionableQuesting, XenForoIndex):
 92 |     _key = "QuestionableQuesting"
 93 | 
 94 | 
 95 | @register
 96 | class AlternateHistory(XenForo2):
 97 |     domain = 'www.alternatehistory.com/forum'
 98 | 
 99 | 
100 | @register
101 | class AlternateHistoryIndex(AlternateHistory, XenForoIndex):
102 |     _key = "AlternateHistory"
103 | 


--------------------------------------------------------------------------------