├── .editorconfig
├── .flake8
├── .github
└── workflows
│ └── python-package-poetry.yml
├── .gitignore
├── Dockerfile
├── LICENSE.txt
├── README.markdown
├── ebook
├── __init__.py
├── cover.py
├── epub.py
└── image.py
├── examples
├── cultivationchatgroup.json
├── dungeonkeeperami.json
├── fifthdefiance.json
├── heretical-edge-2.json
├── heretical-edge.json
├── pact.json
├── paeantosmac.json
├── pale-lights.json
├── pale-withextras.json
├── pale.json
├── phoenixdestiny.json
├── practical1.json
├── practical2.json
├── practical3.json
├── practical4.json
├── practical5.json
├── practical6.json
├── practical7.json
├── practicalall.json
├── practicalextra.json
├── sagaofsoul.json
├── shouldthesun.json
├── thegodsarebastards.json
├── twig.json
├── unsong.json
├── vacantthrone.json
├── wanderinginn.json
├── ward.json
└── worm.json
├── leech.py
├── poetry.lock
├── pyproject.toml
└── sites
├── __init__.py
├── ao3.py
├── arbitrary.py
├── deviantart.py
├── fanfictionnet.py
├── fictionlive.py
├── royalroad.py
├── stash.py
├── wattpad.py
├── xenforo.py
└── xenforo2.py
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | end_of_line = lf
5 | insert_final_newline = true
6 | trim_trailing_whitespace = true
7 |
8 | [*.py]
9 | indent_style = space
10 | indent_size = 4
11 | charset = utf-8
12 |
13 | [{package.json,.travis.yml}]
14 | indent_style = space
15 | indent_size = 2
16 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | extend_ignore =
3 | # E128 continuation line under-indented for visual indent
4 | # E128,
5 | # E501 line too long
6 | E501
7 | exclude = .git,__pycache__,venv
8 |
--------------------------------------------------------------------------------
/.github/workflows/python-package-poetry.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Python package
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 | strategy:
17 | matrix:
18 | python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
19 |
20 | steps:
21 | - uses: actions/checkout@v2
22 | - name: Set up Python ${{ matrix.python-version }}
23 | uses: actions/setup-python@v4
24 | with:
25 | python-version: ${{ matrix.python-version }}
26 | - name: Install tooling
27 | run: |
28 | python -m ensurepip
29 | python -m pip install --upgrade pip
30 | python -m pip install flake8 poetry
31 | - name: Install dependencies
32 | run: |
33 | poetry install
34 | - name: Lint with flake8
35 | run: |
36 | flake8 .
37 | - name: Make sure help runs
38 | run: |
39 | poetry run leech --help
40 | - name: Build a cover
41 | run: |
42 | poetry run python -m 'ebook.cover' && file -E output.png && rm output.png
43 | - name: Verify poetry build
44 | run: |
45 | poetry build && ls -og dist/*
46 | - name: eclint
47 | uses: snow-actions/eclint@v1.0.1
48 | with:
49 | args: 'check *.py sites/*.py'
50 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.epub
2 | *.mobi
3 | ./*.json
4 | leech.db
5 | leech.sqlite
6 | leech.cookies
7 | leech.json
8 | venv/
9 | .venv
10 |
11 | # Byte-compiled / optimized / DLL files
12 | __pycache__/
13 | *.py[cod]
14 |
15 | # C extensions
16 | *.so
17 |
18 | # Distribution / packaging
19 | bin/
20 | build/
21 | develop-eggs/
22 | dist/
23 | eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | .tox/
39 | .coverage
40 | .cache
41 | nosetests.xml
42 | coverage.xml
43 |
44 | # Translations
45 | *.mo
46 |
47 | # Mr Developer
48 | .mr.developer.cfg
49 | .project
50 | .pydevproject
51 |
52 | # Rope
53 | .ropeproject
54 |
55 | # Django stuff:
56 | *.log
57 | *.pot
58 |
59 | # Sphinx documentation
60 | docs/_build/
61 |
62 | # Pycharm
63 | .idea/
64 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:latest
2 |
3 | # Package list taken from Pillow documentation:
4 | # https://pillow.readthedocs.io/en/stable/installation.html#building-on-linux
5 | RUN apk add tiff-dev jpeg-dev openjpeg-dev zlib-dev freetype-dev lcms2-dev \
6 | libwebp-dev tcl-dev tk-dev harfbuzz-dev fribidi-dev libimagequant-dev \
7 | libxcb-dev libpng-dev gcc musl-dev python3 python3-dev py3-pip py3-cryptography
8 | RUN pip3 config set global.break-system-packages true && pip3 install poetry
9 |
10 | COPY . /leech
11 |
12 | RUN cd /leech \
13 | && poetry config virtualenvs.create false \
14 | && poetry install --without dev
15 |
16 | WORKDIR /work
17 |
18 | ENTRYPOINT ["/leech/leech.py"]
19 |
20 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2013-2017 David Lynch
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
1 | Leech
2 | ===
3 |
4 | Let's say you want to read some sort of fiction. You're a fan of it, perhaps. But mobile websites are kind of non-ideal, so you'd like a proper ebook made from whatever you're reading.
5 |
6 | Setup
7 | ---
8 |
9 | You need Python 3.9+ and poetry.
10 |
11 | My recommended setup process is:
12 |
13 | $ pip install poetry
14 | $ poetry install
15 | $ poetry shell
16 |
17 | ...adjust as needed. Just make sure the dependencies from `pyproject.toml` get installed somehow.
18 |
19 | Usage
20 | ---
21 |
22 | Basic
23 |
24 | $ python3 leech.py [[URL]]
25 |
26 | A new file will appear named `Title of the Story.epub`.
27 |
28 | This is equivalent to the slightly longer
29 |
30 | $ python3 leech.py download [[URL]]
31 |
32 | Flushing the cache
33 |
34 | $ python3 leech.py flush
35 |
36 | Learn about other options
37 |
38 | $ python3 leech.py --help
39 |
40 | If you want to put an ePub on a Kindle you'll have to either use Amazon's send-to-kindle tools or convert it. For the latter I'd recommend [Calibre](http://calibre-ebook.com/), though you could also try using [kindlegen](http://www.amazon.com/gp/feature.html?docId=1000765211) directly.
41 |
42 | Supports
43 | ---
44 |
45 | * Fanfiction.net
46 | * FictionPress
47 | * ArchiveOfOurOwn
48 | * Yes, it has its own built-in EPUB export, but the formatting is horrible
49 | * Various XenForo-based sites: SpaceBattles and SufficientVelocity, most notably
50 | * RoyalRoad
51 | * Fiction.live (Anonkun)
52 | * DeviantArt galleries/collections
53 | * Sta.sh
54 | * Completely arbitrary sites, with a bit more work (see below)
55 |
56 | Configuration
57 | ---
58 |
59 | A very small amount of configuration is possible by creating a file called `leech.json` in the project directory. Currently you can define login information for sites that support it, and some options for book covers.
60 |
61 | Example:
62 |
63 | ```
64 | {
65 | "logins": {
66 | "QuestionableQuesting": ["username", "password"]
67 | },
68 | "images": {
69 | "image_fetch": true,
70 | "image_format": "png",
71 | "compress_images": true,
72 | "max_image_size": 100000,
73 | "always_convert_images": true
74 | },
75 | "cover": {
76 | "fontname": "Comic Sans MS",
77 | "fontsize": 30,
78 | "bgcolor": [20, 120, 20],
79 | "textcolor": [180, 20, 180],
80 | "cover_url": "https://website.com/image.png"
81 | },
82 | "output_dir": "/tmp/ebooks",
83 | "site_options": {
84 | "RoyalRoad": {
85 | "output_dir": "/tmp/litrpg_isekai_trash",
86 | "image_fetch": false
87 | }
88 | }
89 | }
90 | ```
91 | > Note: The `image_fetch` key is a boolean and can only be `true` or `false`. Booleans in JSON are written in lowercase.
92 | > If it is `false`, Leech will not download any images.
93 | > Leech will also ignore the `image_format` key if `images` is `false`.
94 |
95 | > Note: If the `image_format` key does not exist, Leech will default to `jpeg`.
96 | > The three image formats are `jpeg`, `png`, and `gif`. The `image_format` key is case-insensitive.
97 |
98 | > Note: The `compress_images` key tells Leech to compress images. This is only supported for `jpeg` and `png` images.
99 | > This also goes hand-in-hand with the `max_image_size` key. If the `compress_images` key is `true` but there's no `max_image_size` key,
100 | > Leech will compress the image to a size less than 1MB (1000000 bytes). If the `max_image_size` key is present, Leech will compress the image
101 | > to a size less than the value of the `max_image_size` key. The `max_image_size` key is in bytes.
102 | > If `compress_images` is `false`, Leech will ignore the `max_image_size` key.
103 |
104 | > Warning: Compressing images might make Leech take a lot longer to download images.
105 |
106 | > Warning: Compressing images might make the image quality worse.
107 |
108 | > Warning: `max_image_size` is not a hard limit. Leech will try to compress the image to the size of the `max_image_size` key, but Leech might
109 | > not be able to compress the image to the exact size of the `max_image_size` key.
110 |
111 | > Warning: `max_image_size` should not be too small. For instance, if you set `max_image_size` to 1000, Leech will probably not be able to
112 | > compress the image to 1000 bytes. If you set `max_image_size` to 1000000, Leech will probably be able to compress the image to 1000000 bytes.
113 |
114 | > Warning: Leech will not compress GIFs, that might damage the animation.
115 |
116 | > Note: if `always_convert_images` is `true`, Leech will convert all non-GIF images to the specified `image_format`.
117 |
118 | Arbitrary Sites
119 | ---
120 |
121 | If you want to just download a one-off story from a site, you can create a definition file to describe it. This requires investigation and understanding of things like CSS selectors, which may take some trial and error.
122 |
123 | Example `practical.json`:
124 |
125 | ```
126 | {
127 | "url": "https://practicalguidetoevil.wordpress.com/table-of-contents/",
128 | "title": "A Practical Guide To Evil: Book 1",
129 | "author": "erraticerrata",
130 | "chapter_selector": "#main .entry-content > ul:nth-of-type(1) > li > a",
131 | "content_selector": "#main .entry-content",
132 | "filter_selector": ".sharedaddy, .wpcnt, style",
133 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
134 | }
135 | ```
136 |
137 | Run as:
138 |
139 | $ ./leech.py practical.json
140 |
141 | This tells leech to load `url`, follow the links described by `chapter_selector`, extract the content from those pages as described by `content_selector`, and remove any content from *that* which matches `filter_selector`. Optionally, `cover_url` will replace the default cover with the image of your choice.
142 |
143 | If `chapter_selector` isn't given, it'll create a single-chapter book by applying `content_selector` to `url`.
144 |
145 | This is a fairly viable way to extract a story from, say, a random Wordpress installation with a convenient table of contents. It's relatively likely to get you at least *most* of the way to the ebook you want, with maybe some manual editing needed.
146 |
147 | A more advanced example with JSON would be:
148 |
149 | ```
150 | {
151 | "url": "https://practicalguidetoevil.wordpress.com/2015/03/25/prologue/",
152 | "title": "A Practical Guide To Evil: Book 1",
153 | "author": "erraticerrata",
154 | "content_selector": "#main .entry-wrapper",
155 | "content_title_selector": "h1.entry-title",
156 | "content_text_selector": ".entry-content",
157 | "filter_selector": ".sharedaddy, .wpcnt, style",
158 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
159 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
160 | }
161 | ```
162 |
163 | Because there's no `chapter_selector` here, leech will keep on looking for a link which it can find with `next_selector` and following that link. We also see more advanced metadata acquisition here, with `content_title_selector` and `content_text_selector` being used to find specific elements from within the content.
164 |
165 | If multiple matches for `content_selector` are found, leech will assume multiple chapters are present on one page, and will handle that. If you find a story that you want on a site which has all the chapters in the right order and next-page links, this is a notably efficient way to download it. See `examples/dungeonkeeperami.json` for this being used.
166 |
167 | If you need more advanced behavior, consider looking at...
168 |
169 | Adding new site handlers
170 | ---
171 |
172 | To add support for a new site, create a file in the `sites` directory that implements the `Site` interface. Take a look at `ao3.py` for a minimal example of what you have to do.
173 |
174 | Images support
175 | ---
176 |
177 | Leech creates EPUB 2.01 files, which means that Leech can only save images in the following
178 | format:
179 | - JPEG (JPG/JFIF)
180 | - PNG
181 | - GIF
182 |
183 | See the [Open Publication Structure (OPS) 2.0.1](https://idpf.org/epub/20/spec/OPS_2.0.1_draft.htm#TOC2.3.4) for more information.
184 |
185 | Leech can not save images in SVG because it is not supported by Pillow.
186 |
187 | Leech uses [Pillow](https://pillow.readthedocs.io/en/stable/index.html) for image manipulation and conversion. If you want to use a different
188 | image format, you can install the required dependencies for Pillow and you will probably have to tinker with Leech. See the [Pillow documentation](https://pillow.readthedocs.io/en/stable/installation.html#external-libraries) for more information.
189 |
190 | To configure image support, you will need to create a file called `leech.json`. See the section below for more information.
191 |
192 | Docker
193 | ---
194 |
195 | You can build the project's Docker container like this:
196 |
197 | ```shell
198 | docker build . -t kemayo/leech:snapshot
199 | ```
200 |
201 | The container's entrypoint runs `leech` directly and sets the current working directory to `/work`, so you can mount any directory there:
202 |
203 | ```shell
204 | docker run -it --rm -v ${DIR}:/work kemayo/leech:snapshot download [[URL]]
205 | ```
206 |
207 | Contributing
208 | ---
209 |
210 | If you submit a pull request to add support for another reasonably-general-purpose site, I will nigh-certainly accept it.
211 |
212 | Run [EpubCheck](https://github.com/IDPF/epubcheck) on epubs you generate to make sure they're not breaking.
213 |
--------------------------------------------------------------------------------
/ebook/__init__.py:
--------------------------------------------------------------------------------
1 | from .epub import make_epub, EpubFile
2 | from .cover import make_cover, make_cover_from_url
3 | from .image import get_image_from_url
4 |
5 | import html
6 | import unicodedata
7 | import datetime
8 | from attrs import define, asdict
9 |
10 | html_template = '''
11 |
12 |
' for k, v in extra_metadata.items())
172 |
173 | valid_image_options = ('image_fetch', 'image_format', 'compress_images',
174 | 'max_image_size', 'always_convert_images')
175 | image_options = ImageOptions(
176 | **{k: v for k, v in image_options.items() if k in valid_image_options})
177 | image_options = asdict(image_options, filter=lambda k, v: v is not None)
178 |
179 | valid_cover_options = ('fontname', 'fontsize', 'width',
180 | 'height', 'wrapat', 'bgcolor', 'textcolor', 'cover_url')
181 | cover_options = CoverOptions(
182 | **{k: v for k, v in cover_options.items() if k in valid_cover_options})
183 | cover_options = asdict(cover_options, filter=lambda k, v: v is not None)
184 |
185 | if cover_options and "cover_url" in cover_options:
186 | image = make_cover_from_url(
187 | cover_options["cover_url"], story.title, story.author)
188 | elif story.cover_url:
189 | image = make_cover_from_url(story.cover_url, story.title, story.author)
190 | else:
191 | image = make_cover(story.title, story.author, **cover_options)
192 |
193 | return make_epub(
194 | output_filename or story.title + '.epub',
195 | [
196 | # The cover is static, and the only change comes from the image which we generate
197 | EpubFile(title='Cover', path='cover.html', contents=cover_template),
198 | EpubFile(title='Front Matter', path='frontmatter.html', contents=frontmatter_template.format(
199 | now=datetime.datetime.now(), **metadata)),
200 | *chapter_html(
201 | story,
202 | image_options=image_options,
203 | normalize=normalize,
204 | session=session
205 | ),
206 | EpubFile(
207 | path='Styles/base.css',
208 | contents=session.get(
209 | 'https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text,
210 | filetype='text/css'
211 | ),
212 | EpubFile(path='images/cover.png',
213 | contents=image.read(), filetype='image/png'),
214 | ],
215 | metadata,
216 | output_dir=output_dir,
217 | allow_spaces=allow_spaces
218 | )
219 |
--------------------------------------------------------------------------------
/ebook/cover.py:
--------------------------------------------------------------------------------
1 |
2 | from PIL import Image, ImageDraw
3 | from io import BytesIO
4 | import textwrap
5 | import requests
6 | import logging
7 | from . import image
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | def make_cover(title, author, width=600, height=800, fontname="Helvetica", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30):
13 | img = Image.new("RGBA", (width, height), bgcolor)
14 | draw = ImageDraw.Draw(img)
15 |
16 | title = textwrap.fill(title, wrapat)
17 | author = textwrap.fill(author, wrapat)
18 |
19 | font = image._safe_font(fontname, size=fontsize)
20 | title_size = image.textsize(draw, title, font=font)
21 | image.draw_text_outlined(draw, ((width - title_size[0]) / 2, 100), title, textcolor, font=font)
22 | # draw.text(((width - title_size[0]) / 2, 100), title, textcolor, font=font)
23 |
24 | font = image._safe_font(fontname, size=fontsize - 2)
25 | author_size = image.textsize(draw, author, font=font)
26 | image.draw_text_outlined(draw, ((width - author_size[0]) / 2, 100 + title_size[1] + 70), author, textcolor, font=font)
27 |
28 | output = BytesIO()
29 | img.save(output, "PNG")
30 | output.name = 'cover.png'
31 | # writing left the cursor at the end of the file, so reset it
32 | output.seek(0)
33 | return output
34 |
35 |
36 | def make_cover_from_url(url, title, author):
37 | try:
38 | logger.info("Downloading cover from " + url)
39 | img = requests.Session().get(url)
40 | cover = BytesIO(img.content)
41 |
42 | imgformat = Image.open(cover).format
43 | # The `Image.open` read a few bytes from the stream to work out the
44 | # format, so reset it:
45 | cover.seek(0)
46 |
47 | if imgformat != "PNG":
48 | cover = image._convert_to_new_format(cover, "PNG")
49 | except Exception as e:
50 | logger.info("Encountered an error downloading cover: " + str(e))
51 | cover = make_cover(title, author)
52 |
53 | return cover
54 |
55 |
56 | if __name__ == '__main__':
57 | f = make_cover('Test of a Title which is quite long and will require multiple lines', 'Some Dude')
58 | with open('output.png', 'wb') as out:
59 | out.write(f.read())
60 |
--------------------------------------------------------------------------------
/ebook/epub.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import os.path
4 | import zipfile
5 | import xml.etree.ElementTree as etree
6 | import uuid
7 | import string
8 | from collections import namedtuple
9 |
10 | """
11 | So, an epub is approximately a zipfile of HTML files, with
12 | a bit of metadata thrown in for good measure.
13 |
14 | This totally started from http://www.manuel-strehl.de/dev/simple_epub_ebooks_with_python.en.html
15 | """
16 |
17 |
18 | EpubFile = namedtuple('EbookFile', 'path, contents, title, filetype', defaults=(False, False, "application/xhtml+xml"))
19 |
20 |
21 | def sanitize_filename(s, allow_spaces=False):
22 | """Take a string and return a valid filename constructed from the string.
23 | Uses a whitelist approach: any characters not present in valid_chars are
24 | removed. Also spaces are replaced with underscores.
25 |
26 | Note: this method may produce invalid filenames such as ``, `.` or `..`
27 | When I use this method I prepend a date string like '2009_01_15_19_46_32_'
28 | and append a file extension like '.txt', so I avoid the potential of using
29 | an invalid filename.
30 |
31 | """
32 | valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
33 | filename = ''.join(c for c in s if c in valid_chars)
34 | if not allow_spaces:
35 | filename = filename.replace(' ', '_') # I don't like spaces in filenames.
36 | return filename
37 |
38 |
39 | def make_epub(filename, files, meta, compress=True, output_dir=False, allow_spaces=False):
40 | unique_id = meta.get('unique_id', False)
41 | if not unique_id:
42 | unique_id = 'leech_book_' + str(uuid.uuid4())
43 |
44 | filename = sanitize_filename(filename, allow_spaces)
45 | if output_dir:
46 | filename = os.path.join(output_dir, filename)
47 | epub = zipfile.ZipFile(filename, 'w', compression=compress and zipfile.ZIP_DEFLATED or zipfile.ZIP_STORED)
48 |
49 | # The first file must be named "mimetype", and shouldn't be compressed
50 | epub.writestr("mimetype", "application/epub+zip", compress_type=zipfile.ZIP_STORED)
51 |
52 | # We need an index file, that lists all other HTML files
53 | # This index file itself is referenced in the META_INF/container.xml
54 | # file
55 | container = etree.Element('container', version="1.0", xmlns="urn:oasis:names:tc:opendocument:xmlns:container")
56 | rootfiles = etree.SubElement(container, 'rootfiles')
57 | etree.SubElement(rootfiles, 'rootfile', {
58 | 'full-path': "OEBPS/Content.opf",
59 | 'media-type': "application/oebps-package+xml",
60 | })
61 | epub.writestr("META-INF/container.xml", etree.tostring(container))
62 |
63 | package = etree.Element('package', {
64 | 'version': "2.0",
65 | 'xmlns': "http://www.idpf.org/2007/opf",
66 | 'unique-identifier': 'book_identifier', # could plausibly be based on the name
67 | })
68 |
69 | # build the metadata
70 | metadata = etree.SubElement(package, 'metadata', {
71 | 'xmlns:dc': "http://purl.org/dc/elements/1.1/",
72 | 'xmlns:opf': "http://www.idpf.org/2007/opf",
73 | })
74 | identifier = etree.SubElement(metadata, 'dc:identifier', id='book_identifier')
75 | if unique_id.find('://') != -1:
76 | identifier.set('opf:scheme', "URI")
77 | identifier.text = unique_id
78 | etree.SubElement(metadata, 'dc:title').text = meta.get('title', 'Untitled')
79 | etree.SubElement(metadata, 'dc:language').text = meta.get('language', 'en')
80 | etree.SubElement(metadata, 'dc:creator', {'opf:role': 'aut'}).text = meta.get('author', 'Unknown')
81 | etree.SubElement(metadata, 'meta', {'name': 'generator', 'content': 'leech'})
82 |
83 | # we'll need a manifest and spine
84 | manifest = etree.SubElement(package, 'manifest')
85 | spine = etree.SubElement(package, 'spine', toc="ncx")
86 | guide = etree.SubElement(package, 'guide')
87 |
88 | # ...and the ncx index
89 | ncx = etree.Element('ncx', {
90 | 'xmlns': "http://www.daisy.org/z3986/2005/ncx/",
91 | 'version': "2005-1",
92 | 'xml:lang': "en-US",
93 | })
94 | etree.SubElement(etree.SubElement(ncx, 'head'), 'meta', name="dtb:uid", content=unique_id)
95 | etree.SubElement(etree.SubElement(ncx, 'docTitle'), 'text').text = meta.get('title', 'Untitled')
96 | etree.SubElement(etree.SubElement(ncx, 'docAuthor'), 'text').text = meta.get('author', 'Unknown')
97 | navmap = etree.SubElement(ncx, 'navMap')
98 |
99 | # Write each HTML file to the ebook, collect information for the index
100 | for i, file in enumerate(files):
101 | file_id = 'file_%d' % (i + 1)
102 | etree.SubElement(manifest, 'item', {
103 | 'id': file_id,
104 | 'href': file.path,
105 | 'media-type': file.filetype,
106 | })
107 | if file.filetype == "application/xhtml+xml":
108 | itemref = etree.SubElement(spine, 'itemref', idref=file_id)
109 | point = etree.SubElement(navmap, 'navPoint', {
110 | 'class': "h1",
111 | 'id': file_id,
112 | })
113 | etree.SubElement(etree.SubElement(point, 'navLabel'), 'text').text = file.title
114 | etree.SubElement(point, 'content', src=file.path)
115 |
116 | if 'cover.html' == os.path.basename(file.path):
117 | etree.SubElement(guide, 'reference', {
118 | 'type': 'cover',
119 | 'title': 'Cover',
120 | 'href': file.path,
121 | })
122 | itemref.set('linear', 'no')
123 | if 'images/cover.png' == file.path:
124 | etree.SubElement(metadata, 'meta', {
125 | 'name': 'cover',
126 | 'content': file_id,
127 | })
128 |
129 | # and add the actual html to the zip
130 | if file.contents:
131 | epub.writestr('OEBPS/' + file.path, file.contents)
132 | else:
133 | epub.write(file.path, 'OEBPS/' + file.path)
134 |
135 | # ...and add the ncx to the manifest
136 | etree.SubElement(manifest, 'item', {
137 | 'id': 'ncx',
138 | 'href': 'toc.ncx',
139 | 'media-type': "application/x-dtbncx+xml",
140 | })
141 | epub.writestr('OEBPS/toc.ncx', etree.tostring(ncx))
142 |
143 | # Finally, write the index
144 | epub.writestr('OEBPS/Content.opf', etree.tostring(package))
145 |
146 | epub.close()
147 |
148 | return filename
149 |
150 |
151 | if __name__ == '__main__':
152 | make_epub('test.epub', [EpubFile(title='Chapter 1', path='a.html', contents="Test"), EpubFile(title='Chapter 2', path='test/b.html', contents="Still a test")], {})
153 |
--------------------------------------------------------------------------------
/ebook/image.py:
--------------------------------------------------------------------------------
1 | # Basically the same as cover.py with some minor differences
2 | import PIL
3 | from PIL import Image, ImageDraw, ImageFont
4 | from io import BytesIO
5 | from base64 import b64decode
6 | import math
7 | import textwrap
8 | import requests
9 | import logging
10 |
11 | from typing import Tuple
12 |
13 | logger = logging.getLogger(__name__)
14 |
15 |
16 | def get_size_format(b, factor=1000, suffix="B"):
17 | """
18 | Scale bytes to its proper byte format
19 | e.g:
20 | 1253656 => '1.20MB'
21 | 1253656678 => '1.17GB'
22 | """
23 | for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
24 | if b < factor:
25 | return f"{b:.2f}{unit}{suffix}"
26 | b /= factor
27 | return f"{b:.2f}Y{suffix}"
28 |
29 |
30 | def compress_image(image: BytesIO, target_size: int, image_format: str) -> PIL.Image.Image:
31 | image_size = get_size_format(len(image.getvalue()))
32 | logger.info(f"Image size: {image_size}")
33 |
34 | big_photo = Image.open(image).convert("RGBA")
35 |
36 | target_pixel_count = 2.8114 * target_size
37 | if len(image.getvalue()) > target_size:
38 | logger.info(f"Image is greater than {get_size_format(target_size)}, compressing")
39 | scale_factor = target_pixel_count / math.prod(big_photo.size)
40 | if scale_factor < 1:
41 | x, y = tuple(int(scale_factor * dim) for dim in big_photo.size)
42 | logger.info(f"Resizing image dimensions from {big_photo.size} to ({x}, {y})")
43 | sml_photo = big_photo.resize((x, y), resample=Image.LANCZOS)
44 | else:
45 | sml_photo = big_photo
46 | compressed_image_size = get_size_format(len(PIL_Image_to_bytes(sml_photo, image_format)))
47 | logger.info(f"Compressed image size: {compressed_image_size}")
48 | return sml_photo
49 | else:
50 | logger.info(f"Image is less than {get_size_format(target_size)}, not compressing")
51 | return big_photo
52 |
53 |
54 | def PIL_Image_to_bytes(
55 | pil_image: PIL.Image.Image,
56 | image_format: str
57 | ) -> bytes:
58 | out_io = BytesIO()
59 | if image_format.lower().startswith("gif"):
60 | frames = []
61 | current = pil_image.convert('RGBA')
62 | while True:
63 | try:
64 | frames.append(current)
65 | pil_image.seek(pil_image.tell() + 1)
66 | current = Image.alpha_composite(current, pil_image.convert('RGBA'))
67 | except EOFError:
68 | break
69 | frames[0].save(out_io, format=image_format, save_all=True, append_images=frames[1:], optimize=True, loop=0)
70 | return out_io.getvalue()
71 |
72 | elif image_format.lower() in ["jpeg", "jpg"]:
73 | # Create a new image with a white background
74 | background_img = Image.new('RGBA', pil_image.size, "white")
75 |
76 | # Paste the image on top of the background
77 | background_img.paste(pil_image.convert("RGBA"), (0, 0), pil_image.convert("RGBA"))
78 | pil_image = background_img.convert('RGB')
79 |
80 | pil_image.save(out_io, format=image_format, optimize=True, quality=95)
81 | return out_io.getvalue()
82 |
83 |
84 | def get_image_from_url(
85 | url: str,
86 | image_format: str = "JPEG",
87 | compress_images: bool = False,
88 | max_image_size: int = 1_000_000,
89 | always_convert: bool = False,
90 | session: requests.Session = None
91 | ) -> Tuple[bytes, str, str]:
92 | """
93 | Based on make_cover_from_url(), this function takes in the image url usually gotten from the `src` attribute of
94 | an image tag and returns the image data, the image format and the image mime type
95 |
96 | @param url: The url of the image
97 | @param image_format: The format to convert the image to if it's not in the supported formats
98 | @param compress_images: Whether to compress the image or not
99 | @param max_image_size: The maximum size of the image in bytes
100 | @return: A tuple of the image data, the image format and the image mime type
101 | """
102 | logger.info("Downloading image: %s", url)
103 | session = session or requests.Session()
104 | try:
105 | if url.startswith("https://www.filepicker.io/api/"):
106 | logger.warning("Filepicker.io image detected, converting to Fiction.live image. This might fail.")
107 | url = f"https://cdn3.fiction.live/fp/{url.split('/')[-1]}?&quality=95"
108 | elif url.startswith("https://cdn3.fiction.live/images/") or url.startswith("https://ddx5i92cqts4o.cloudfront.net/images/"):
109 | logger.warning("Converting url to cdn6. This might fail.")
110 | url = f"https://cdn6.fiction.live/file/fictionlive/images/{url.split('/images/')[-1]}"
111 | elif url.startswith("data:image") and 'base64' in url:
112 | logger.info("Base64 image detected")
113 | head, base64data = url.split(',')
114 | file_ext = str(head.split(';')[0].split('/')[1])
115 | imgdata = b64decode(base64data)
116 | if compress_images:
117 | if file_ext.lower() == "gif":
118 | logger.info("GIF images should not be compressed, skipping compression")
119 | else:
120 | compressed_base64_image = compress_image(BytesIO(imgdata), max_image_size, file_ext)
121 | imgdata = PIL_Image_to_bytes(compressed_base64_image, file_ext)
122 |
123 | if file_ext.lower() not in ["jpg", "jpeg", "png", "gif"]:
124 | logger.info(f"Image format {file_ext} not supported by EPUB2.0.1, converting to {image_format}")
125 | return _convert_to_new_format(imgdata, image_format).read(), image_format.lower(), f"image/{image_format.lower()}"
126 | return imgdata, file_ext, f"image/{file_ext}"
127 |
128 | img = session.get(url, timeout=(6.01, 30))
129 | image = BytesIO(img.content)
130 | image.seek(0)
131 |
132 | PIL_image = Image.open(image)
133 |
134 | current_format = str(PIL_image.format)
135 |
136 | if current_format.lower() == "gif":
137 | PIL_image = Image.open(image)
138 | if PIL_image.info['version'] not in [b"GIF89a", "GIF89a"]:
139 | PIL_image.info['version'] = b"GIF89a"
140 | return PIL_Image_to_bytes(PIL_image, "GIF"), "gif", "image/gif"
141 |
142 | if compress_images:
143 | PIL_image = compress_image(image, max_image_size, current_format)
144 |
145 | if always_convert:
146 | current_format = image_format
147 |
148 | return PIL_Image_to_bytes(PIL_image, current_format), current_format, f"image/{current_format.lower()}"
149 |
150 | except Exception as e:
151 | logger.info("Encountered an error downloading image: " + str(e))
152 | image = make_fallback_image("There was a problem downloading this image.").read()
153 | return image, "jpeg", "image/jpeg"
154 |
155 |
156 | def make_fallback_image(
157 | message: str,
158 | width=600,
159 | height=300,
160 | fontname="Helvetica",
161 | font_size=40,
162 | bg_color=(0, 0, 0),
163 | textcolor=(255, 255, 255),
164 | wrap_at=30
165 | ):
166 | """
167 | This function should only be called if get_image_from_url() fails
168 | """
169 | img = Image.new("RGB", (width, height), bg_color)
170 | draw = ImageDraw.Draw(img)
171 |
172 | message = textwrap.fill(message, wrap_at)
173 |
174 | font = _safe_font(fontname, size=font_size)
175 | message_size = textsize(draw, message, font=font)
176 | draw_text_outlined(
177 | draw, ((width - message_size[0]) / 2, 100), message, textcolor, font=font)
178 | # draw.text(((width - title_size[0]) / 2, 100), title, textcolor, font=font)
179 |
180 | output = BytesIO()
181 | img.save(output, "JPEG")
182 | # writing left the cursor at the end of the file, so reset it
183 | output.seek(0)
184 | return output
185 |
186 |
187 | def _convert_to_new_format(image_bytestream, image_format: str):
188 | new_image = BytesIO()
189 | try:
190 | Image.open(image_bytestream).save(new_image, format=image_format.upper())
191 | new_image.seek(0)
192 | except Exception as e:
193 | logger.info(f"Encountered an error converting image to {image_format}\nError: {e}")
194 | new_image = make_fallback_image("There was a problem converting this image.")
195 | return new_image
196 |
197 |
198 | def _safe_font(preferred, *args, **kwargs):
199 | for font in (preferred, "Helvetica", "FreeSans", "Arial"):
200 | try:
201 | return ImageFont.truetype(*args, font=font, **kwargs)
202 | except IOError:
203 | pass
204 |
205 | # This is pretty terrible, but it'll work regardless of what fonts the
206 | # system has. Worst issue: can't set the size.
207 | return ImageFont.load_default()
208 |
209 |
210 | def textsize(draw, text, **kwargs):
211 | left, top, right, bottom = draw.multiline_textbbox((0, 0), text, **kwargs)
212 | width, height = right - left, bottom - top
213 | return width, height
214 |
215 |
216 | def draw_text_outlined(draw, xy, text, fill=None, font=None, anchor=None):
217 | x, y = xy
218 |
219 | # Outline
220 | draw.text((x - 1, y), text=text, fill=(0, 0, 0), font=font, anchor=anchor)
221 | draw.text((x + 1, y), text=text, fill=(0, 0, 0), font=font, anchor=anchor)
222 | draw.text((x, y - 1), text=text, fill=(0, 0, 0), font=font, anchor=anchor)
223 | draw.text((x, y + 1), text=text, fill=(0, 0, 0), font=font, anchor=anchor)
224 |
225 | # Fill
226 | draw.text(xy, text=text, fill=fill, font=font, anchor=anchor)
227 |
228 |
229 | if __name__ == '__main__':
230 | f = make_fallback_image(
231 | 'Test of a Title which is quite long and will require multiple lines',
232 | 'output.png'
233 | )
234 | with open('output.png', 'wb') as out:
235 | out.write(f.read())
236 |
--------------------------------------------------------------------------------
/examples/cultivationchatgroup.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://novelfull.com/cultivation-chat-group/chapter-1-mt-yellows-true-monarch-and-nine-provinces-1-group.html",
3 | "title": "Cultivation Chat Group",
4 | "author": "Legend of the Paladin",
5 | "content_selector": "#chapter",
6 | "content_title_selector": "h2 .chapter-text",
7 | "content_text_selector": "#chapter-content",
8 | "filter_selector": "style, script, .adsbygoogle, .ads",
9 | "next_selector": "#next_chap[href]"
10 | }
11 |
--------------------------------------------------------------------------------
/examples/dungeonkeeperami.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://forums.sufficientvelocity.com/threads/dungeon-keeper-ami-sailor-moon-dungeon-keeper-story-only-thread.30066/",
3 | "title": "Dungeon Keeper Ami",
4 | "author": "Pusakuronu",
5 | "content_selector": "article.message-body .bbWrapper",
6 | "filter_selector": ".sharedaddy, .wpcnt, style",
7 | "next_selector": "link[rel=next]"
8 | }
9 |
--------------------------------------------------------------------------------
/examples/fifthdefiance.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://thefifthdefiance.com/chapters/",
3 | "title": "The Fifth Defiance",
4 | "author": "Walter",
5 | "chapter_selector": ".entry-content > p > a",
6 | "content_selector": ".entry-content",
7 | "content_title_selector": ".entry-title",
8 | "filter_selector": ".sharedaddy, .wpcnt, style"
9 | }
--------------------------------------------------------------------------------
/examples/heretical-edge-2.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://ceruleanscrawling.wordpress.com/heretical-edge-2-table-of-contents/",
3 | "title": "Heretical Edge 2",
4 | "author": "Ceruelean",
5 | "chapter_selector": "article .entry-content > p > a:not([href*=patreon])",
6 | "content_selector": "article .entry-content",
7 | "filter_selector": ".sharedaddy, .wpcnt, style"
8 | }
9 |
--------------------------------------------------------------------------------
/examples/heretical-edge.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://ceruleanscrawling.wordpress.com/table-of-contents/",
3 | "title": "Heretical Edge",
4 | "author": "Ceruelean",
5 | "chapter_selector": "article .entry-content > p > a",
6 | "content_selector": "article .entry-content",
7 | "filter_selector": ".sharedaddy, .wpcnt, style"
8 | }
9 |
--------------------------------------------------------------------------------
/examples/pact.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://pactwebserial.wordpress.com/2013/12/17/bonds-1-1/",
3 | "title": "Pact",
4 | "author": "Wildbow",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "filter_selector": ".sharedaddy, style, a[href*='pactwebserial.wordpress.com']",
9 | "next_selector": "a[rel=\"next\"]",
10 | "cover_url": "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/a456e440-ea22-45c0-8b39-dacf9bbddade/d7dxaz4-64cfabe8-f957-44af-aaea-82346c401b27.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOiIsImlzcyI6InVybjphcHA6Iiwib2JqIjpbW3sicGF0aCI6IlwvZlwvYTQ1NmU0NDAtZWEyMi00NWMwLThiMzktZGFjZjliYmRkYWRlXC9kN2R4YXo0LTY0Y2ZhYmU4LWY5NTctNDRhZi1hYWVhLTgyMzQ2YzQwMWIyNy5qcGcifV1dLCJhdWQiOlsidXJuOnNlcnZpY2U6ZmlsZS5kb3dubG9hZCJdfQ.J-Wn8bDrKmoKKZW8mkJdi3uRoDV2FDJQZ_TuTWvQazY"
11 | }
12 |
--------------------------------------------------------------------------------
/examples/paeantosmac.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://paeantosmac.wordpress.com/2015/02/17/introduction/",
3 | "title": "Paean to SMAC",
4 | "author": "Nick Stipanovich",
5 | "content_selector": "article.post",
6 | "content_title_selector": "header h1",
7 | "content_text_selector": "div.entry-content",
8 | "filter_selector": ".sharedaddy, .wpcnt, style",
9 | "next_selector": "link[rel=next]"
10 | }
11 |
--------------------------------------------------------------------------------
/examples/pale-lights.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://palelights.com/2022/08/17/chapter-1/",
3 | "title": "Pale Lights",
4 | "author": "erraticerrata",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "filter_selector": ".sharedaddy, .wpcnt, style",
9 | "next_selector": "a[rel=\"next\"]",
10 | "cover_url": "https://www.royalroadcdn.com/public/covers-large/pale-lights-aaaay6-1-bi.jpg"
11 | }
12 |
--------------------------------------------------------------------------------
/examples/pale-withextras.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://palewebserial.wordpress.com/2020/05/05/blood-run-cold-0-0/",
3 | "title": "Pale",
4 | "author": "Wildbow",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']",
9 | "next_selector": "a[rel=\"next\"]"
10 | }
11 |
--------------------------------------------------------------------------------
/examples/pale.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://palewebserial.wordpress.com/table-of-contents/",
3 | "title": "Pale",
4 | "author": "Wildbow",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "chapter_selector": "article .entry-content > p a",
9 | "filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']"
10 | }
11 |
--------------------------------------------------------------------------------
/examples/phoenixdestiny.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://gravitytales.com/novel/phoenix-destiny/pd-chapter-1",
3 | "title": "Phoenix Destiny",
4 | "author": "Yun Ji",
5 | "content_selector": "#contentElement",
6 | "content_title_selector": "h4",
7 | "content_text_selector": "#chapterContent",
8 | "filter_selector": ".sharedaddy, .wpcnt, style",
9 | "next_selector": ".chapter-navigation > a:last-child[href*=\"pd-chapter\"]"
10 | }
11 |
--------------------------------------------------------------------------------
/examples/practical1.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://practicalguidetoevil.wordpress.com/2015/03/25/prologue/",
3 | "title": "A Practical Guide To Evil: Book 1",
4 | "author": "erraticerrata",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "filter_selector": ".sharedaddy, .wpcnt, style",
9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }
12 |
--------------------------------------------------------------------------------
/examples/practical2.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://practicalguidetoevil.wordpress.com/2015/11/04/prologue-2/",
3 | "title": "A Practical Guide To Evil: Book 2",
4 | "author": "erraticerrata",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "filter_selector": ".sharedaddy, .wpcnt, style",
9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }
--------------------------------------------------------------------------------
/examples/practical3.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://practicalguidetoevil.wordpress.com/2017/02/08/prologue-3/",
3 | "title": "A Practical Guide To Evil: Book 3",
4 | "author": "erraticerrata",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "filter_selector": ".sharedaddy, .wpcnt, style",
9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }
12 |
--------------------------------------------------------------------------------
/examples/practical4.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://practicalguidetoevil.wordpress.com/2018/04/09/prologue-4/",
3 | "title": "A Practical Guide To Evil: Book 4",
4 | "author": "erraticerrata",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "filter_selector": ".sharedaddy, .wpcnt, style",
9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }
12 |
--------------------------------------------------------------------------------
/examples/practical5.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://practicalguidetoevil.wordpress.com/2019/01/14/prologue-5/",
3 | "title": "A Practical Guide To Evil: Book 5",
4 | "author": "erraticerrata",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "filter_selector": ".sharedaddy, .wpcnt, style",
9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }
12 |
--------------------------------------------------------------------------------
/examples/practical6.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://practicalguidetoevil.wordpress.com/2020/01/06/prologue-6/",
3 | "title": "A Practical Guide To Evil: Book 6",
4 | "author": "erraticerrata",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "filter_selector": ".sharedaddy, .wpcnt, style",
9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }
12 |
--------------------------------------------------------------------------------
/examples/practical7.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://practicalguidetoevil.wordpress.com/2021/03/02/prologue-7/",
3 | "title": "A Practical Guide To Evil: Book 7",
4 | "author": "erraticerrata",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "filter_selector": ".sharedaddy, .wpcnt, style",
9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }
--------------------------------------------------------------------------------
/examples/practicalall.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://practicalguidetoevil.wordpress.com/2015/03/25/prologue/",
3 | "title": "A Practical Guide To Evil",
4 | "author": "erraticerrata",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "filter_selector": ".sharedaddy, .wpcnt, style",
9 | "next_selector": "a[rel=\"next\"]",
10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
11 | }
--------------------------------------------------------------------------------
/examples/practicalextra.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://practicalguidetoevil.wordpress.com/extra-chapters/",
3 | "title": "A Practical Guide To Evil: Extra Chapters",
4 | "author": "erraticerrata",
5 | "chapter_selector": "#main .entry-content > ul > li > a",
6 | "content_selector": "#main .entry-content",
7 | "filter_selector": ".sharedaddy, .wpcnt, style",
8 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
9 | }
10 |
--------------------------------------------------------------------------------
/examples/sagaofsoul.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://www.sagaofsoul.com/story.html",
3 | "title": "Saga of Soul",
4 | "author": "Ouri Maler",
5 | "chapter_selector": "#mainbody li a",
6 | "content_selector": "#mainbody",
7 | "filter_selector": "script, noscript"
8 | }
9 |
--------------------------------------------------------------------------------
/examples/shouldthesun.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://shouldthesun.wordpress.com/",
3 | "title": "Should The Sun Not Rise",
4 | "author": "Omicron",
5 | "chapter_selector": "#text-1 li a",
6 | "content_selector": ".entry-content",
7 | "filter_selector": ".sharedaddy, style, a[href*='shouldthesun.wordpress.com']",
8 | "cover_url": "https://shouldthesun.files.wordpress.com/2017/09/itzpapalotl.jpg"
9 | }
10 |
--------------------------------------------------------------------------------
/examples/thegodsarebastards.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://tiraas.wordpress.com/table-of-contents/",
3 | "title": "The Gods Are Bastards",
4 | "author": "D. D. Webb",
5 | "chapter_selector": "article .entry-content a[href*='20']",
6 | "content_selector": "article .entry-content",
7 | "filter_selector": ".sharedaddy, .wpcnt, style, a[href*='tiraas.wordpress.com']",
8 | "cover_url": "https://tiraas.files.wordpress.com/2016/02/classof1182byhoarous.png"
9 | }
10 |
--------------------------------------------------------------------------------
/examples/twig.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://twigserial.wordpress.com/2014/12/24/taking-root-1-1/",
3 | "title": "Twig",
4 | "author": "Wildbow",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "filter_selector": ".sharedaddy, style, a[href*='twigserial.wordpress.com']",
9 | "next_selector": "a[rel=\"next\"]",
10 | "cover_url": "https://twigserial.files.wordpress.com/2015/03/cropped-twig-commission-titled1.png"
11 | }
12 |
--------------------------------------------------------------------------------
/examples/unsong.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://unsongbook.com/prologue-2/",
3 | "title": "Unsong",
4 | "author": "Scott Alexander",
5 | "content_selector": "#pjgm-content",
6 | "content_title_selector": "h1.pjgm-posttitle",
7 | "content_text_selector": ".pjgm-postcontent",
8 | "filter_selector": ".sharedaddy, style",
9 | "next_selector": "a[rel=\"next\"]",
10 | "cover_url": "https://i.imgur.com/d9LvKMc.png"
11 | }
12 |
--------------------------------------------------------------------------------
/examples/vacantthrone.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://tcthrone.wordpress.com/",
3 | "title": "Vacant Throne",
4 | "author": "TCurator",
5 | "chapter_selector": "#main .entry-content > p a[href*=\"vacant-throne-\"]",
6 | "content_selector": "#main .entry-content",
7 | "filter_selector": ".sharedaddy, style, p:nth-of-type(1), a[href*='tcthrone.wordpress.com']"
8 | }
9 |
--------------------------------------------------------------------------------
/examples/wanderinginn.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://wanderinginn.com/table-of-contents/",
3 | "title": "The Wandering Inn",
4 | "author": "pirate aba",
5 | "cover_url": "https://i0.wp.com/wanderinginn.com/wp-content/uploads/2023/03/Wandering_Inn-Vol1-eCover.jpg?ssl=1",
6 | "chapter_selector": "#table-of-contents .chapter-entry .body-web > a",
7 | "content_selector": ".entry-content",
8 | "filter_selector": "hr:last-of-type, hr:last-of-type ~ *"
9 | }
10 |
--------------------------------------------------------------------------------
/examples/ward.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://www.parahumans.net/table-of-contents/",
3 | "title": "Ward",
4 | "author": "Wildbow",
5 | "chapter_selector": "#main .entry-content a",
6 | "content_selector": "#main .entry-content",
7 | "filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com'], p:first-of-type, p:last-of-type"
8 | }
--------------------------------------------------------------------------------
/examples/worm.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://parahumans.wordpress.com/2011/06/11/1-1/",
3 | "title": "Worm",
4 | "author": "Wildbow",
5 | "content_selector": "#main",
6 | "content_title_selector": "h1.entry-title",
7 | "content_text_selector": ".entry-content",
8 | "filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com']",
9 | "next_selector": "a[rel=\"next\"]",
10 | "cover_url": "https://pre00.deviantart.net/969a/th/pre/i/2015/051/8/7/worm_cover_by_cactusfantastico-d8ivj4b.png"
11 | }
12 |
--------------------------------------------------------------------------------
/leech.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import click
4 | import http.cookiejar
5 | import json
6 | import logging
7 | import os
8 | import requests
9 | import requests_cache
10 | import sqlite3
11 | from click_default_group import DefaultGroup
12 | from functools import reduce
13 |
14 | import sites
15 | import ebook
16 |
17 | __version__ = 2
18 | USER_AGENT = 'Leech/%s +http://davidlynch.org' % __version__
19 |
20 | logger = logging.getLogger(__name__)
21 |
22 |
23 | def configure_logging(verbose):
24 | if verbose:
25 | logging.basicConfig(
26 | level=logging.DEBUG,
27 | format="[%(name)s @ %(levelname)s] %(message)s"
28 | )
29 | else:
30 | logging.basicConfig(
31 | level=logging.INFO,
32 | format="[%(name)s] %(message)s"
33 | )
34 |
35 |
36 | def create_session(cache):
37 | if cache:
38 | session = requests_cache.CachedSession('leech', expire_after=4 * 3600)
39 | else:
40 | session = requests.Session()
41 |
42 | lwp_cookiejar = http.cookiejar.LWPCookieJar()
43 | try:
44 | lwp_cookiejar.load('leech.cookies', ignore_discard=True)
45 | except Exception:
46 | # This file is very much optional, so this log isn't really necessary
47 | # logging.exception("Couldn't load cookies from leech.cookies")
48 | pass
49 | session.cookies.update(lwp_cookiejar)
50 | session.headers.update({
51 | 'User-Agent': USER_AGENT,
52 | 'Accept-Language': 'en-US,en;q=0.5',
53 | 'Accept-Encoding': 'gzip, deflate',
54 | 'Accept': '*/*', # this is essential for imgur
55 | })
56 | return session
57 |
58 |
59 | def load_on_disk_options(site):
60 | try:
61 | with open('leech.json') as store_file:
62 | store = json.load(store_file)
63 | login = store.get('logins', {}).get(site.site_key(), False)
64 | cover_options = store.get('cover', {})
65 | image_options = store.get('images', {})
66 | consolidated_options = {
67 | **{k: v for k, v in store.items() if k not in ('cover', 'images', 'logins')},
68 | **store.get('site_options', {}).get(site.site_key(), {})
69 | }
70 | except FileNotFoundError:
71 | logger.info("Unable to locate leech.json. Continuing assuming it does not exist.")
72 | login = False
73 | image_options = {}
74 | cover_options = {}
75 | consolidated_options = {}
76 | return consolidated_options, login, cover_options, image_options
77 |
78 |
79 | def create_options(site, site_options, unused_flags):
80 | """Compiles options provided from multiple different sources
81 | (e.g. on disk, via flags, via defaults, via JSON provided as a flag value)
82 | into a single options object."""
83 | default_site_options = site.get_default_options()
84 |
85 | flag_specified_site_options = site.interpret_site_specific_options(**unused_flags)
86 |
87 | configured_site_options, login, cover_options, image_options = load_on_disk_options(site)
88 |
89 | overridden_site_options = json.loads(site_options)
90 |
91 | # The final options dictionary is computed by layering the default, configured,
92 | # and overridden, and flag-specified options together in that order.
93 | options = dict(
94 | list(default_site_options.items()) +
95 | list(cover_options.items()) +
96 | list(image_options.items()) +
97 | list(configured_site_options.items()) +
98 | list(overridden_site_options.items()) +
99 | list(flag_specified_site_options.items())
100 | )
101 | return options, login
102 |
103 |
104 | def open_story(site, url, session, login, options):
105 | handler = site(
106 | session,
107 | options=options
108 | )
109 |
110 | if login:
111 | handler.login(login)
112 |
113 | try:
114 | story = handler.extract(url)
115 | except sites.SiteException as e:
116 | logger.error(e)
117 | return
118 | if not story:
119 | logger.error("Couldn't extract story")
120 | return
121 | return story
122 |
123 |
124 | def site_specific_options(f):
125 | option_list = sites.list_site_specific_options()
126 | return reduce(lambda cmd, decorator: decorator(cmd), [f] + option_list)
127 |
128 |
129 | @click.group(cls=DefaultGroup, default='download', default_if_no_args=True)
130 | def cli():
131 | """Top level click group. Uses click-default-group to preserve most behavior from leech v1."""
132 | pass
133 |
134 |
135 | @cli.command()
136 | @click.option('--verbose', '-v', is_flag=True, help="verbose output")
137 | def flush(verbose):
138 | """Flushes the contents of the cache."""
139 | configure_logging(verbose)
140 | requests_cache.install_cache('leech')
141 | requests_cache.clear()
142 |
143 | conn = sqlite3.connect('leech.sqlite')
144 | conn.execute("VACUUM")
145 | conn.close()
146 |
147 | logger.info("Flushed cache")
148 |
149 |
150 | @cli.command()
151 | @click.argument('urls', nargs=-1, required=True)
152 | @click.option(
153 | '--site-options',
154 | default='{}',
155 | help='JSON object encoding any site specific option.'
156 | )
157 | @click.option(
158 | '--output-dir',
159 | default=None,
160 | help='Directory to save generated ebooks'
161 | )
162 | @click.option('--cache/--no-cache', default=True)
163 | @click.option('--normalize/--no-normalize', default=True, help="Whether to normalize strange unicode text")
164 | @click.option('--verbose', '-v', is_flag=True, help="Verbose debugging output")
165 | @site_specific_options # Includes other click.options specific to sites
166 | def download(urls, site_options, cache, verbose, normalize, output_dir, **other_flags):
167 | """Downloads a story and saves it on disk as an epub ebook."""
168 | configure_logging(verbose)
169 | session = create_session(cache)
170 |
171 | for url in urls:
172 | site, url = sites.get(url)
173 | options, login = create_options(site, site_options, other_flags)
174 | story = open_story(site, url, session, login, options)
175 | if story:
176 | filename = ebook.generate_epub(
177 | story, options,
178 | image_options={
179 | 'image_fetch': options.get('image_fetch', True),
180 | 'image_format': options.get('image_format', 'jpeg'),
181 | 'compress_images': options.get('compress_images', False),
182 | 'max_image_size': options.get('max_image_size', 1_000_000),
183 | 'always_convert_images': options.get('always_convert_images', False)
184 | },
185 | normalize=normalize,
186 | output_dir=output_dir or options.get('output_dir', os.getcwd()),
187 | allow_spaces=options.get('allow_spaces', False),
188 | session=session,
189 | parser=options.get('parser', 'lxml')
190 | )
191 | logger.info("File created: " + filename)
192 | else:
193 | logger.warning("No ebook created")
194 |
195 |
196 | if __name__ == '__main__':
197 | cli()
198 |
--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
2 |
3 | [[package]]
4 | name = "attrs"
5 | version = "25.1.0"
6 | description = "Classes Without Boilerplate"
7 | optional = false
8 | python-versions = ">=3.8"
9 | groups = ["main"]
10 | files = [
11 | {file = "attrs-25.1.0-py3-none-any.whl", hash = "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a"},
12 | {file = "attrs-25.1.0.tar.gz", hash = "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e"},
13 | ]
14 |
15 | [package.extras]
16 | benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
17 | cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
18 | dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
19 | docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
20 | tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
21 | tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""]
22 |
23 | [[package]]
24 | name = "beautifulsoup4"
25 | version = "4.13.3"
26 | description = "Screen-scraping library"
27 | optional = false
28 | python-versions = ">=3.7.0"
29 | groups = ["main"]
30 | files = [
31 | {file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"},
32 | {file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"},
33 | ]
34 |
35 | [package.dependencies]
36 | soupsieve = ">1.2"
37 | typing-extensions = ">=4.0.0"
38 |
39 | [package.extras]
40 | cchardet = ["cchardet"]
41 | chardet = ["chardet"]
42 | charset-normalizer = ["charset-normalizer"]
43 | html5lib = ["html5lib"]
44 | lxml = ["lxml"]
45 |
46 | [[package]]
47 | name = "cattrs"
48 | version = "24.1.2"
49 | description = "Composable complex class support for attrs and dataclasses."
50 | optional = false
51 | python-versions = ">=3.8"
52 | groups = ["main"]
53 | files = [
54 | {file = "cattrs-24.1.2-py3-none-any.whl", hash = "sha256:67c7495b760168d931a10233f979b28dc04daf853b30752246f4f8471c6d68d0"},
55 | {file = "cattrs-24.1.2.tar.gz", hash = "sha256:8028cfe1ff5382df59dd36474a86e02d817b06eaf8af84555441bac915d2ef85"},
56 | ]
57 |
58 | [package.dependencies]
59 | attrs = ">=23.1.0"
60 | exceptiongroup = {version = ">=1.1.1", markers = "python_version < \"3.11\""}
61 | typing-extensions = {version = ">=4.1.0,<4.6.3 || >4.6.3", markers = "python_version < \"3.11\""}
62 |
63 | [package.extras]
64 | bson = ["pymongo (>=4.4.0)"]
65 | cbor2 = ["cbor2 (>=5.4.6)"]
66 | msgpack = ["msgpack (>=1.0.5)"]
67 | msgspec = ["msgspec (>=0.18.5) ; implementation_name == \"cpython\""]
68 | orjson = ["orjson (>=3.9.2) ; implementation_name == \"cpython\""]
69 | pyyaml = ["pyyaml (>=6.0)"]
70 | tomlkit = ["tomlkit (>=0.11.8)"]
71 | ujson = ["ujson (>=5.7.0)"]
72 |
73 | [[package]]
74 | name = "certifi"
75 | version = "2024.8.30"
76 | description = "Python package for providing Mozilla's CA Bundle."
77 | optional = false
78 | python-versions = ">=3.6"
79 | groups = ["main"]
80 | files = [
81 | {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"},
82 | {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"},
83 | ]
84 |
85 | [[package]]
86 | name = "charset-normalizer"
87 | version = "3.4.0"
88 | description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
89 | optional = false
90 | python-versions = ">=3.7.0"
91 | groups = ["main"]
92 | files = [
93 | {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4f9fc98dad6c2eaa32fc3af1417d95b5e3d08aff968df0cd320066def971f9a6"},
94 | {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0de7b687289d3c1b3e8660d0741874abe7888100efe14bd0f9fd7141bcbda92b"},
95 | {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5ed2e36c3e9b4f21dd9422f6893dec0abf2cca553af509b10cd630f878d3eb99"},
96 | {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d3ff7fc90b98c637bda91c89d51264a3dcf210cade3a2c6f838c7268d7a4ca"},
97 | {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1110e22af8ca26b90bd6364fe4c763329b0ebf1ee213ba32b68c73de5752323d"},
98 | {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86f4e8cca779080f66ff4f191a685ced73d2f72d50216f7112185dc02b90b9b7"},
99 | {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f683ddc7eedd742e2889d2bfb96d69573fde1d92fcb811979cdb7165bb9c7d3"},
100 | {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27623ba66c183eca01bf9ff833875b459cad267aeeb044477fedac35e19ba907"},
101 | {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f606a1881d2663630ea5b8ce2efe2111740df4b687bd78b34a8131baa007f79b"},
102 | {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0b309d1747110feb25d7ed6b01afdec269c647d382c857ef4663bbe6ad95a912"},
103 | {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:136815f06a3ae311fae551c3df1f998a1ebd01ddd424aa5603a4336997629e95"},
104 | {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:14215b71a762336254351b00ec720a8e85cada43b987da5a042e4ce3e82bd68e"},
105 | {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:79983512b108e4a164b9c8d34de3992f76d48cadc9554c9e60b43f308988aabe"},
106 | {file = "charset_normalizer-3.4.0-cp310-cp310-win32.whl", hash = "sha256:c94057af19bc953643a33581844649a7fdab902624d2eb739738a30e2b3e60fc"},
107 | {file = "charset_normalizer-3.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:55f56e2ebd4e3bc50442fbc0888c9d8c94e4e06a933804e2af3e89e2f9c1c749"},
108 | {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0d99dd8ff461990f12d6e42c7347fd9ab2532fb70e9621ba520f9e8637161d7c"},
109 | {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c57516e58fd17d03ebe67e181a4e4e2ccab1168f8c2976c6a334d4f819fe5944"},
110 | {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6dba5d19c4dfab08e58d5b36304b3f92f3bd5d42c1a3fa37b5ba5cdf6dfcbcee"},
111 | {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf4475b82be41b07cc5e5ff94810e6a01f276e37c2d55571e3fe175e467a1a1c"},
112 | {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce031db0408e487fd2775d745ce30a7cd2923667cf3b69d48d219f1d8f5ddeb6"},
113 | {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ff4e7cdfdb1ab5698e675ca622e72d58a6fa2a8aa58195de0c0061288e6e3ea"},
114 | {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3710a9751938947e6327ea9f3ea6332a09bf0ba0c09cae9cb1f250bd1f1549bc"},
115 | {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82357d85de703176b5587dbe6ade8ff67f9f69a41c0733cf2425378b49954de5"},
116 | {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47334db71978b23ebcf3c0f9f5ee98b8d65992b65c9c4f2d34c2eaf5bcaf0594"},
117 | {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8ce7fd6767a1cc5a92a639b391891bf1c268b03ec7e021c7d6d902285259685c"},
118 | {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f1a2f519ae173b5b6a2c9d5fa3116ce16e48b3462c8b96dfdded11055e3d6365"},
119 | {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:63bc5c4ae26e4bc6be6469943b8253c0fd4e4186c43ad46e713ea61a0ba49129"},
120 | {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bcb4f8ea87d03bc51ad04add8ceaf9b0f085ac045ab4d74e73bbc2dc033f0236"},
121 | {file = "charset_normalizer-3.4.0-cp311-cp311-win32.whl", hash = "sha256:9ae4ef0b3f6b41bad6366fb0ea4fc1d7ed051528e113a60fa2a65a9abb5b1d99"},
122 | {file = "charset_normalizer-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cee4373f4d3ad28f1ab6290684d8e2ebdb9e7a1b74fdc39e4c211995f77bec27"},
123 | {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0713f3adb9d03d49d365b70b84775d0a0d18e4ab08d12bc46baa6132ba78aaf6"},
124 | {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de7376c29d95d6719048c194a9cf1a1b0393fbe8488a22008610b0361d834ecf"},
125 | {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a51b48f42d9358460b78725283f04bddaf44a9358197b889657deba38f329db"},
126 | {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b295729485b06c1a0683af02a9e42d2caa9db04a373dc38a6a58cdd1e8abddf1"},
127 | {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee803480535c44e7f5ad00788526da7d85525cfefaf8acf8ab9a310000be4b03"},
128 | {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d59d125ffbd6d552765510e3f31ed75ebac2c7470c7274195b9161a32350284"},
129 | {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cda06946eac330cbe6598f77bb54e690b4ca93f593dee1568ad22b04f347c15"},
130 | {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07afec21bbbbf8a5cc3651aa96b980afe2526e7f048fdfb7f1014d84acc8b6d8"},
131 | {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6b40e8d38afe634559e398cc32b1472f376a4099c75fe6299ae607e404c033b2"},
132 | {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b8dcd239c743aa2f9c22ce674a145e0a25cb1566c495928440a181ca1ccf6719"},
133 | {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:84450ba661fb96e9fd67629b93d2941c871ca86fc38d835d19d4225ff946a631"},
134 | {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:44aeb140295a2f0659e113b31cfe92c9061622cadbc9e2a2f7b8ef6b1e29ef4b"},
135 | {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1db4e7fefefd0f548d73e2e2e041f9df5c59e178b4c72fbac4cc6f535cfb1565"},
136 | {file = "charset_normalizer-3.4.0-cp312-cp312-win32.whl", hash = "sha256:5726cf76c982532c1863fb64d8c6dd0e4c90b6ece9feb06c9f202417a31f7dd7"},
137 | {file = "charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:b197e7094f232959f8f20541ead1d9862ac5ebea1d58e9849c1bf979255dfac9"},
138 | {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dd4eda173a9fcccb5f2e2bd2a9f423d180194b1bf17cf59e3269899235b2a114"},
139 | {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9e3c4c9e1ed40ea53acf11e2a386383c3304212c965773704e4603d589343ed"},
140 | {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92a7e36b000bf022ef3dbb9c46bfe2d52c047d5e3f3343f43204263c5addc250"},
141 | {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54b6a92d009cbe2fb11054ba694bc9e284dad30a26757b1e372a1fdddaf21920"},
142 | {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ffd9493de4c922f2a38c2bf62b831dcec90ac673ed1ca182fe11b4d8e9f2a64"},
143 | {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:35c404d74c2926d0287fbd63ed5d27eb911eb9e4a3bb2c6d294f3cfd4a9e0c23"},
144 | {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4796efc4faf6b53a18e3d46343535caed491776a22af773f366534056c4e1fbc"},
145 | {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7fdd52961feb4c96507aa649550ec2a0d527c086d284749b2f582f2d40a2e0d"},
146 | {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:92db3c28b5b2a273346bebb24857fda45601aef6ae1c011c0a997106581e8a88"},
147 | {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ab973df98fc99ab39080bfb0eb3a925181454d7c3ac8a1e695fddfae696d9e90"},
148 | {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4b67fdab07fdd3c10bb21edab3cbfe8cf5696f453afce75d815d9d7223fbe88b"},
149 | {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:aa41e526a5d4a9dfcfbab0716c7e8a1b215abd3f3df5a45cf18a12721d31cb5d"},
150 | {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ffc519621dce0c767e96b9c53f09c5d215578e10b02c285809f76509a3931482"},
151 | {file = "charset_normalizer-3.4.0-cp313-cp313-win32.whl", hash = "sha256:f19c1585933c82098c2a520f8ec1227f20e339e33aca8fa6f956f6691b784e67"},
152 | {file = "charset_normalizer-3.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:707b82d19e65c9bd28b81dde95249b07bf9f5b90ebe1ef17d9b57473f8a64b7b"},
153 | {file = "charset_normalizer-3.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dbe03226baf438ac4fda9e2d0715022fd579cb641c4cf639fa40d53b2fe6f3e2"},
154 | {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd9a8bd8900e65504a305bf8ae6fa9fbc66de94178c420791d0293702fce2df7"},
155 | {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8831399554b92b72af5932cdbbd4ddc55c55f631bb13ff8fe4e6536a06c5c51"},
156 | {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a14969b8691f7998e74663b77b4c36c0337cb1df552da83d5c9004a93afdb574"},
157 | {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dcaf7c1524c0542ee2fc82cc8ec337f7a9f7edee2532421ab200d2b920fc97cf"},
158 | {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425c5f215d0eecee9a56cdb703203dda90423247421bf0d67125add85d0c4455"},
159 | {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:d5b054862739d276e09928de37c79ddeec42a6e1bfc55863be96a36ba22926f6"},
160 | {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:f3e73a4255342d4eb26ef6df01e3962e73aa29baa3124a8e824c5d3364a65748"},
161 | {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:2f6c34da58ea9c1a9515621f4d9ac379871a8f21168ba1b5e09d74250de5ad62"},
162 | {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:f09cb5a7bbe1ecae6e87901a2eb23e0256bb524a79ccc53eb0b7629fbe7677c4"},
163 | {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:0099d79bdfcf5c1f0c2c72f91516702ebf8b0b8ddd8905f97a8aecf49712c621"},
164 | {file = "charset_normalizer-3.4.0-cp37-cp37m-win32.whl", hash = "sha256:9c98230f5042f4945f957d006edccc2af1e03ed5e37ce7c373f00a5a4daa6149"},
165 | {file = "charset_normalizer-3.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:62f60aebecfc7f4b82e3f639a7d1433a20ec32824db2199a11ad4f5e146ef5ee"},
166 | {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:af73657b7a68211996527dbfeffbb0864e043d270580c5aef06dc4b659a4b578"},
167 | {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cab5d0b79d987c67f3b9e9c53f54a61360422a5a0bc075f43cab5621d530c3b6"},
168 | {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9289fd5dddcf57bab41d044f1756550f9e7cf0c8e373b8cdf0ce8773dc4bd417"},
169 | {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b493a043635eb376e50eedf7818f2f322eabbaa974e948bd8bdd29eb7ef2a51"},
170 | {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fa2566ca27d67c86569e8c85297aaf413ffab85a8960500f12ea34ff98e4c41"},
171 | {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8e538f46104c815be19c975572d74afb53f29650ea2025bbfaef359d2de2f7f"},
172 | {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fd30dc99682dc2c603c2b315bded2799019cea829f8bf57dc6b61efde6611c8"},
173 | {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2006769bd1640bdf4d5641c69a3d63b71b81445473cac5ded39740a226fa88ab"},
174 | {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:dc15e99b2d8a656f8e666854404f1ba54765871104e50c8e9813af8a7db07f12"},
175 | {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ab2e5bef076f5a235c3774b4f4028a680432cded7cad37bba0fd90d64b187d19"},
176 | {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:4ec9dd88a5b71abfc74e9df5ebe7921c35cbb3b641181a531ca65cdb5e8e4dea"},
177 | {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:43193c5cda5d612f247172016c4bb71251c784d7a4d9314677186a838ad34858"},
178 | {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:aa693779a8b50cd97570e5a0f343538a8dbd3e496fa5dcb87e29406ad0299654"},
179 | {file = "charset_normalizer-3.4.0-cp38-cp38-win32.whl", hash = "sha256:7706f5850360ac01d80c89bcef1640683cc12ed87f42579dab6c5d3ed6888613"},
180 | {file = "charset_normalizer-3.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:c3e446d253bd88f6377260d07c895816ebf33ffffd56c1c792b13bff9c3e1ade"},
181 | {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:980b4f289d1d90ca5efcf07958d3eb38ed9c0b7676bf2831a54d4f66f9c27dfa"},
182 | {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f28f891ccd15c514a0981f3b9db9aa23d62fe1a99997512b0491d2ed323d229a"},
183 | {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8aacce6e2e1edcb6ac625fb0f8c3a9570ccc7bfba1f63419b3769ccf6a00ed0"},
184 | {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd7af3717683bea4c87acd8c0d3d5b44d56120b26fd3f8a692bdd2d5260c620a"},
185 | {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ff2ed8194587faf56555927b3aa10e6fb69d931e33953943bc4f837dfee2242"},
186 | {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e91f541a85298cf35433bf66f3fab2a4a2cff05c127eeca4af174f6d497f0d4b"},
187 | {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:309a7de0a0ff3040acaebb35ec45d18db4b28232f21998851cfa709eeff49d62"},
188 | {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:285e96d9d53422efc0d7a17c60e59f37fbf3dfa942073f666db4ac71e8d726d0"},
189 | {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5d447056e2ca60382d460a604b6302d8db69476fd2015c81e7c35417cfabe4cd"},
190 | {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:20587d20f557fe189b7947d8e7ec5afa110ccf72a3128d61a2a387c3313f46be"},
191 | {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:130272c698667a982a5d0e626851ceff662565379baf0ff2cc58067b81d4f11d"},
192 | {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:ab22fbd9765e6954bc0bcff24c25ff71dcbfdb185fcdaca49e81bac68fe724d3"},
193 | {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7782afc9b6b42200f7362858f9e73b1f8316afb276d316336c0ec3bd73312742"},
194 | {file = "charset_normalizer-3.4.0-cp39-cp39-win32.whl", hash = "sha256:2de62e8801ddfff069cd5c504ce3bc9672b23266597d4e4f50eda28846c322f2"},
195 | {file = "charset_normalizer-3.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:95c3c157765b031331dd4db3c775e58deaee050a3042fcad72cbc4189d7c8dca"},
196 | {file = "charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079"},
197 | {file = "charset_normalizer-3.4.0.tar.gz", hash = "sha256:223217c3d4f82c3ac5e29032b3f1c2eb0fb591b72161f86d93f5719079dae93e"},
198 | ]
199 |
200 | [[package]]
201 | name = "click"
202 | version = "8.1.8"
203 | description = "Composable command line interface toolkit"
204 | optional = false
205 | python-versions = ">=3.7"
206 | groups = ["main"]
207 | files = [
208 | {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
209 | {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
210 | ]
211 |
212 | [package.dependencies]
213 | colorama = {version = "*", markers = "platform_system == \"Windows\""}
214 |
215 | [[package]]
216 | name = "click-default-group"
217 | version = "1.2.4"
218 | description = "click_default_group"
219 | optional = false
220 | python-versions = ">=2.7"
221 | groups = ["main"]
222 | files = [
223 | {file = "click_default_group-1.2.4-py2.py3-none-any.whl", hash = "sha256:9b60486923720e7fc61731bdb32b617039aba820e22e1c88766b1125592eaa5f"},
224 | {file = "click_default_group-1.2.4.tar.gz", hash = "sha256:eb3f3c99ec0d456ca6cd2a7f08f7d4e91771bef51b01bdd9580cc6450fe1251e"},
225 | ]
226 |
227 | [package.dependencies]
228 | click = "*"
229 |
230 | [package.extras]
231 | test = ["pytest"]
232 |
233 | [[package]]
234 | name = "colorama"
235 | version = "0.4.6"
236 | description = "Cross-platform colored terminal text."
237 | optional = false
238 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
239 | groups = ["main"]
240 | markers = "platform_system == \"Windows\""
241 | files = [
242 | {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
243 | {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
244 | ]
245 |
246 | [[package]]
247 | name = "exceptiongroup"
248 | version = "1.2.2"
249 | description = "Backport of PEP 654 (exception groups)"
250 | optional = false
251 | python-versions = ">=3.7"
252 | groups = ["main"]
253 | markers = "python_version < \"3.11\""
254 | files = [
255 | {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
256 | {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
257 | ]
258 |
259 | [package.extras]
260 | test = ["pytest (>=6)"]
261 |
262 | [[package]]
263 | name = "flake8"
264 | version = "6.1.0"
265 | description = "the modular source code checker: pep8 pyflakes and co"
266 | optional = false
267 | python-versions = ">=3.8.1"
268 | groups = ["dev"]
269 | files = [
270 | {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"},
271 | {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"},
272 | ]
273 |
274 | [package.dependencies]
275 | mccabe = ">=0.7.0,<0.8.0"
276 | pycodestyle = ">=2.11.0,<2.12.0"
277 | pyflakes = ">=3.1.0,<3.2.0"
278 |
279 | [[package]]
280 | name = "idna"
281 | version = "3.10"
282 | description = "Internationalized Domain Names in Applications (IDNA)"
283 | optional = false
284 | python-versions = ">=3.6"
285 | groups = ["main"]
286 | files = [
287 | {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
288 | {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
289 | ]
290 |
291 | [package.extras]
292 | all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
293 |
294 | [[package]]
295 | name = "lxml"
296 | version = "5.3.1"
297 | description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
298 | optional = false
299 | python-versions = ">=3.6"
300 | groups = ["main"]
301 | files = [
302 | {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a4058f16cee694577f7e4dd410263cd0ef75644b43802a689c2b3c2a7e69453b"},
303 | {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:364de8f57d6eda0c16dcfb999af902da31396949efa0e583e12675d09709881b"},
304 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:528f3a0498a8edc69af0559bdcf8a9f5a8bf7c00051a6ef3141fdcf27017bbf5"},
305 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db4743e30d6f5f92b6d2b7c86b3ad250e0bad8dee4b7ad8a0c44bfb276af89a3"},
306 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17b5d7f8acf809465086d498d62a981fa6a56d2718135bb0e4aa48c502055f5c"},
307 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:928e75a7200a4c09e6efc7482a1337919cc61fe1ba289f297827a5b76d8969c2"},
308 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a997b784a639e05b9d4053ef3b20c7e447ea80814a762f25b8ed5a89d261eac"},
309 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7b82e67c5feb682dbb559c3e6b78355f234943053af61606af126df2183b9ef9"},
310 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:f1de541a9893cf8a1b1db9bf0bf670a2decab42e3e82233d36a74eda7822b4c9"},
311 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:de1fc314c3ad6bc2f6bd5b5a5b9357b8c6896333d27fdbb7049aea8bd5af2d79"},
312 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:7c0536bd9178f754b277a3e53f90f9c9454a3bd108b1531ffff720e082d824f2"},
313 | {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:68018c4c67d7e89951a91fbd371e2e34cd8cfc71f0bb43b5332db38497025d51"},
314 | {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa826340a609d0c954ba52fd831f0fba2a4165659ab0ee1a15e4aac21f302406"},
315 | {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:796520afa499732191e39fc95b56a3b07f95256f2d22b1c26e217fb69a9db5b5"},
316 | {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3effe081b3135237da6e4c4530ff2a868d3f80be0bda027e118a5971285d42d0"},
317 | {file = "lxml-5.3.1-cp310-cp310-win32.whl", hash = "sha256:a22f66270bd6d0804b02cd49dae2b33d4341015545d17f8426f2c4e22f557a23"},
318 | {file = "lxml-5.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:0bcfadea3cdc68e678d2b20cb16a16716887dd00a881e16f7d806c2138b8ff0c"},
319 | {file = "lxml-5.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e220f7b3e8656ab063d2eb0cd536fafef396829cafe04cb314e734f87649058f"},
320 | {file = "lxml-5.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f2cfae0688fd01f7056a17367e3b84f37c545fb447d7282cf2c242b16262607"},
321 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67d2f8ad9dcc3a9e826bdc7802ed541a44e124c29b7d95a679eeb58c1c14ade8"},
322 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db0c742aad702fd5d0c6611a73f9602f20aec2007c102630c06d7633d9c8f09a"},
323 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:198bb4b4dd888e8390afa4f170d4fa28467a7eaf857f1952589f16cfbb67af27"},
324 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d2a3e412ce1849be34b45922bfef03df32d1410a06d1cdeb793a343c2f1fd666"},
325 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b8969dbc8d09d9cd2ae06362c3bad27d03f433252601ef658a49bd9f2b22d79"},
326 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5be8f5e4044146a69c96077c7e08f0709c13a314aa5315981185c1f00235fe65"},
327 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:133f3493253a00db2c870d3740bc458ebb7d937bd0a6a4f9328373e0db305709"},
328 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:52d82b0d436edd6a1d22d94a344b9a58abd6c68c357ed44f22d4ba8179b37629"},
329 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b6f92e35e2658a5ed51c6634ceb5ddae32053182851d8cad2a5bc102a359b33"},
330 | {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:203b1d3eaebd34277be06a3eb880050f18a4e4d60861efba4fb946e31071a295"},
331 | {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:155e1a5693cf4b55af652f5c0f78ef36596c7f680ff3ec6eb4d7d85367259b2c"},
332 | {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:22ec2b3c191f43ed21f9545e9df94c37c6b49a5af0a874008ddc9132d49a2d9c"},
333 | {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7eda194dd46e40ec745bf76795a7cccb02a6a41f445ad49d3cf66518b0bd9cff"},
334 | {file = "lxml-5.3.1-cp311-cp311-win32.whl", hash = "sha256:fb7c61d4be18e930f75948705e9718618862e6fc2ed0d7159b2262be73f167a2"},
335 | {file = "lxml-5.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c809eef167bf4a57af4b03007004896f5c60bd38dc3852fcd97a26eae3d4c9e6"},
336 | {file = "lxml-5.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e69add9b6b7b08c60d7ff0152c7c9a6c45b4a71a919be5abde6f98f1ea16421c"},
337 | {file = "lxml-5.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4e52e1b148867b01c05e21837586ee307a01e793b94072d7c7b91d2c2da02ffe"},
338 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4b382e0e636ed54cd278791d93fe2c4f370772743f02bcbe431a160089025c9"},
339 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2e49dc23a10a1296b04ca9db200c44d3eb32c8d8ec532e8c1fd24792276522a"},
340 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4399b4226c4785575fb20998dc571bc48125dc92c367ce2602d0d70e0c455eb0"},
341 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5412500e0dc5481b1ee9cf6b38bb3b473f6e411eb62b83dc9b62699c3b7b79f7"},
342 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c93ed3c998ea8472be98fb55aed65b5198740bfceaec07b2eba551e55b7b9ae"},
343 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:63d57fc94eb0bbb4735e45517afc21ef262991d8758a8f2f05dd6e4174944519"},
344 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:b450d7cabcd49aa7ab46a3c6aa3ac7e1593600a1a0605ba536ec0f1b99a04322"},
345 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:4df0ec814b50275ad6a99bc82a38b59f90e10e47714ac9871e1b223895825468"},
346 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d184f85ad2bb1f261eac55cddfcf62a70dee89982c978e92b9a74a1bfef2e367"},
347 | {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b725e70d15906d24615201e650d5b0388b08a5187a55f119f25874d0103f90dd"},
348 | {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a31fa7536ec1fb7155a0cd3a4e3d956c835ad0a43e3610ca32384d01f079ea1c"},
349 | {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3c3c8b55c7fc7b7e8877b9366568cc73d68b82da7fe33d8b98527b73857a225f"},
350 | {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d61ec60945d694df806a9aec88e8f29a27293c6e424f8ff91c80416e3c617645"},
351 | {file = "lxml-5.3.1-cp312-cp312-win32.whl", hash = "sha256:f4eac0584cdc3285ef2e74eee1513a6001681fd9753b259e8159421ed28a72e5"},
352 | {file = "lxml-5.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:29bfc8d3d88e56ea0a27e7c4897b642706840247f59f4377d81be8f32aa0cfbf"},
353 | {file = "lxml-5.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c093c7088b40d8266f57ed71d93112bd64c6724d31f0794c1e52cc4857c28e0e"},
354 | {file = "lxml-5.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b0884e3f22d87c30694e625b1e62e6f30d39782c806287450d9dc2fdf07692fd"},
355 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1637fa31ec682cd5760092adfabe86d9b718a75d43e65e211d5931809bc111e7"},
356 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a364e8e944d92dcbf33b6b494d4e0fb3499dcc3bd9485beb701aa4b4201fa414"},
357 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:779e851fd0e19795ccc8a9bb4d705d6baa0ef475329fe44a13cf1e962f18ff1e"},
358 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c4393600915c308e546dc7003d74371744234e8444a28622d76fe19b98fa59d1"},
359 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:673b9d8e780f455091200bba8534d5f4f465944cbdd61f31dc832d70e29064a5"},
360 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2e4a570f6a99e96c457f7bec5ad459c9c420ee80b99eb04cbfcfe3fc18ec6423"},
361 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:71f31eda4e370f46af42fc9f264fafa1b09f46ba07bdbee98f25689a04b81c20"},
362 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:42978a68d3825eaac55399eb37a4d52012a205c0c6262199b8b44fcc6fd686e8"},
363 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8b1942b3e4ed9ed551ed3083a2e6e0772de1e5e3aca872d955e2e86385fb7ff9"},
364 | {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:85c4f11be9cf08917ac2a5a8b6e1ef63b2f8e3799cec194417e76826e5f1de9c"},
365 | {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:231cf4d140b22a923b1d0a0a4e0b4f972e5893efcdec188934cc65888fd0227b"},
366 | {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5865b270b420eda7b68928d70bb517ccbe045e53b1a428129bb44372bf3d7dd5"},
367 | {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dbf7bebc2275016cddf3c997bf8a0f7044160714c64a9b83975670a04e6d2252"},
368 | {file = "lxml-5.3.1-cp313-cp313-win32.whl", hash = "sha256:d0751528b97d2b19a388b302be2a0ee05817097bab46ff0ed76feeec24951f78"},
369 | {file = "lxml-5.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:91fb6a43d72b4f8863d21f347a9163eecbf36e76e2f51068d59cd004c506f332"},
370 | {file = "lxml-5.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:016b96c58e9a4528219bb563acf1aaaa8bc5452e7651004894a973f03b84ba81"},
371 | {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82a4bb10b0beef1434fb23a09f001ab5ca87895596b4581fd53f1e5145a8934a"},
372 | {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d68eeef7b4d08a25e51897dac29bcb62aba830e9ac6c4e3297ee7c6a0cf6439"},
373 | {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:f12582b8d3b4c6be1d298c49cb7ae64a3a73efaf4c2ab4e37db182e3545815ac"},
374 | {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2df7ed5edeb6bd5590914cd61df76eb6cce9d590ed04ec7c183cf5509f73530d"},
375 | {file = "lxml-5.3.1-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:585c4dc429deebc4307187d2b71ebe914843185ae16a4d582ee030e6cfbb4d8a"},
376 | {file = "lxml-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:06a20d607a86fccab2fc15a77aa445f2bdef7b49ec0520a842c5c5afd8381576"},
377 | {file = "lxml-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:057e30d0012439bc54ca427a83d458752ccda725c1c161cc283db07bcad43cf9"},
378 | {file = "lxml-5.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4867361c049761a56bd21de507cab2c2a608c55102311d142ade7dab67b34f32"},
379 | {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3dddf0fb832486cc1ea71d189cb92eb887826e8deebe128884e15020bb6e3f61"},
380 | {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bcc211542f7af6f2dfb705f5f8b74e865592778e6cafdfd19c792c244ccce19"},
381 | {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaca5a812f050ab55426c32177091130b1e49329b3f002a32934cd0245571307"},
382 | {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:236610b77589faf462337b3305a1be91756c8abc5a45ff7ca8f245a71c5dab70"},
383 | {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:aed57b541b589fa05ac248f4cb1c46cbb432ab82cbd467d1c4f6a2bdc18aecf9"},
384 | {file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:75fa3d6946d317ffc7016a6fcc44f42db6d514b7fdb8b4b28cbe058303cb6e53"},
385 | {file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:96eef5b9f336f623ffc555ab47a775495e7e8846dde88de5f941e2906453a1ce"},
386 | {file = "lxml-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:ef45f31aec9be01379fc6c10f1d9c677f032f2bac9383c827d44f620e8a88407"},
387 | {file = "lxml-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0611da6b07dd3720f492db1b463a4d1175b096b49438761cc9f35f0d9eaaef5"},
388 | {file = "lxml-5.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b2aca14c235c7a08558fe0a4786a1a05873a01e86b474dfa8f6df49101853a4e"},
389 | {file = "lxml-5.3.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae82fce1d964f065c32c9517309f0c7be588772352d2f40b1574a214bd6e6098"},
390 | {file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7aae7a3d63b935babfdc6864b31196afd5145878ddd22f5200729006366bc4d5"},
391 | {file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8e0d177b1fe251c3b1b914ab64135475c5273c8cfd2857964b2e3bb0fe196a7"},
392 | {file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:6c4dd3bfd0c82400060896717dd261137398edb7e524527438c54a8c34f736bf"},
393 | {file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f1208c1c67ec9e151d78aa3435aa9b08a488b53d9cfac9b699f15255a3461ef2"},
394 | {file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:c6aacf00d05b38a5069826e50ae72751cb5bc27bdc4d5746203988e429b385bb"},
395 | {file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5881aaa4bf3a2d086c5f20371d3a5856199a0d8ac72dd8d0dbd7a2ecfc26ab73"},
396 | {file = "lxml-5.3.1-cp38-cp38-win32.whl", hash = "sha256:45fbb70ccbc8683f2fb58bea89498a7274af1d9ec7995e9f4af5604e028233fc"},
397 | {file = "lxml-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:7512b4d0fc5339d5abbb14d1843f70499cab90d0b864f790e73f780f041615d7"},
398 | {file = "lxml-5.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5885bc586f1edb48e5d68e7a4b4757b5feb2a496b64f462b4d65950f5af3364f"},
399 | {file = "lxml-5.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1b92fe86e04f680b848fff594a908edfa72b31bfc3499ef7433790c11d4c8cd8"},
400 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a091026c3bf7519ab1e64655a3f52a59ad4a4e019a6f830c24d6430695b1cf6a"},
401 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ffb141361108e864ab5f1813f66e4e1164181227f9b1f105b042729b6c15125"},
402 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3715cdf0dd31b836433af9ee9197af10e3df41d273c19bb249230043667a5dfd"},
403 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88b72eb7222d918c967202024812c2bfb4048deeb69ca328363fb8e15254c549"},
404 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa59974880ab5ad8ef3afaa26f9bda148c5f39e06b11a8ada4660ecc9fb2feb3"},
405 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3bb8149840daf2c3f97cebf00e4ed4a65a0baff888bf2605a8d0135ff5cf764e"},
406 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:0d6b2fa86becfa81f0a0271ccb9eb127ad45fb597733a77b92e8a35e53414914"},
407 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:136bf638d92848a939fd8f0e06fcf92d9f2e4b57969d94faae27c55f3d85c05b"},
408 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:89934f9f791566e54c1d92cdc8f8fd0009447a5ecdb1ec6b810d5f8c4955f6be"},
409 | {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a8ade0363f776f87f982572c2860cc43c65ace208db49c76df0a21dde4ddd16e"},
410 | {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:bfbbab9316330cf81656fed435311386610f78b6c93cc5db4bebbce8dd146675"},
411 | {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:172d65f7c72a35a6879217bcdb4bb11bc88d55fb4879e7569f55616062d387c2"},
412 | {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e3c623923967f3e5961d272718655946e5322b8d058e094764180cdee7bab1af"},
413 | {file = "lxml-5.3.1-cp39-cp39-win32.whl", hash = "sha256:ce0930a963ff593e8bb6fda49a503911accc67dee7e5445eec972668e672a0f0"},
414 | {file = "lxml-5.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:f7b64fcd670bca8800bc10ced36620c6bbb321e7bc1214b9c0c0df269c1dddc2"},
415 | {file = "lxml-5.3.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:afa578b6524ff85fb365f454cf61683771d0170470c48ad9d170c48075f86725"},
416 | {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67f5e80adf0aafc7b5454f2c1cb0cde920c9b1f2cbd0485f07cc1d0497c35c5d"},
417 | {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dd0b80ac2d8f13ffc906123a6f20b459cb50a99222d0da492360512f3e50f84"},
418 | {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:422c179022ecdedbe58b0e242607198580804253da220e9454ffe848daa1cfd2"},
419 | {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:524ccfded8989a6595dbdda80d779fb977dbc9a7bc458864fc9a0c2fc15dc877"},
420 | {file = "lxml-5.3.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:48fd46bf7155def2e15287c6f2b133a2f78e2d22cdf55647269977b873c65499"},
421 | {file = "lxml-5.3.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:05123fad495a429f123307ac6d8fd6f977b71e9a0b6d9aeeb8f80c017cb17131"},
422 | {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a243132767150a44e6a93cd1dde41010036e1cbc63cc3e9fe1712b277d926ce3"},
423 | {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c92ea6d9dd84a750b2bae72ff5e8cf5fdd13e58dda79c33e057862c29a8d5b50"},
424 | {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2f1be45d4c15f237209bbf123a0e05b5d630c8717c42f59f31ea9eae2ad89394"},
425 | {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:a83d3adea1e0ee36dac34627f78ddd7f093bb9cfc0a8e97f1572a949b695cb98"},
426 | {file = "lxml-5.3.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:3edbb9c9130bac05d8c3fe150c51c337a471cc7fdb6d2a0a7d3a88e88a829314"},
427 | {file = "lxml-5.3.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2f23cf50eccb3255b6e913188291af0150d89dab44137a69e14e4dcb7be981f1"},
428 | {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df7e5edac4778127f2bf452e0721a58a1cfa4d1d9eac63bdd650535eb8543615"},
429 | {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:094b28ed8a8a072b9e9e2113a81fda668d2053f2ca9f2d202c2c8c7c2d6516b1"},
430 | {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:514fe78fc4b87e7a7601c92492210b20a1b0c6ab20e71e81307d9c2e377c64de"},
431 | {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8fffc08de02071c37865a155e5ea5fce0282e1546fd5bde7f6149fcaa32558ac"},
432 | {file = "lxml-5.3.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4b0d5cdba1b655d5b18042ac9c9ff50bda33568eb80feaaca4fc237b9c4fbfde"},
433 | {file = "lxml-5.3.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3031e4c16b59424e8d78522c69b062d301d951dc55ad8685736c3335a97fc270"},
434 | {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb659702a45136c743bc130760c6f137870d4df3a9e14386478b8a0511abcfca"},
435 | {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a11b16a33656ffc43c92a5343a28dc71eefe460bcc2a4923a96f292692709f6"},
436 | {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c5ae125276f254b01daa73e2c103363d3e99e3e10505686ac7d9d2442dd4627a"},
437 | {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c76722b5ed4a31ba103e0dc77ab869222ec36efe1a614e42e9bcea88a36186fe"},
438 | {file = "lxml-5.3.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:33e06717c00c788ab4e79bc4726ecc50c54b9bfb55355eae21473c145d83c2d2"},
439 | {file = "lxml-5.3.1.tar.gz", hash = "sha256:106b7b5d2977b339f1e97efe2778e2ab20e99994cbb0ec5e55771ed0795920c8"},
440 | ]
441 |
442 | [package.extras]
443 | cssselect = ["cssselect (>=0.7)"]
444 | html-clean = ["lxml_html_clean"]
445 | html5 = ["html5lib"]
446 | htmlsoup = ["BeautifulSoup4"]
447 | source = ["Cython (>=3.0.11,<3.1.0)"]
448 |
449 | [[package]]
450 | name = "mccabe"
451 | version = "0.7.0"
452 | description = "McCabe checker, plugin for flake8"
453 | optional = false
454 | python-versions = ">=3.6"
455 | groups = ["dev"]
456 | files = [
457 | {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
458 | {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
459 | ]
460 |
461 | [[package]]
462 | name = "mintotp"
463 | version = "0.3.0"
464 | description = "MinTOTP - Minimal TOTP Generator"
465 | optional = false
466 | python-versions = "*"
467 | groups = ["main"]
468 | files = [
469 | {file = "mintotp-0.3.0-py3-none-any.whl", hash = "sha256:eadee8531d9ee95eda92fd17949137454acd1d2a001dcf68f99bb8de56f06468"},
470 | {file = "mintotp-0.3.0.tar.gz", hash = "sha256:d0f4db5edb38a7481120176a526e8c29539b9e80581dd2dcc1811557d77cfad5"},
471 | ]
472 |
473 | [[package]]
474 | name = "pillow"
475 | version = "11.1.0"
476 | description = "Python Imaging Library (Fork)"
477 | optional = false
478 | python-versions = ">=3.9"
479 | groups = ["main"]
480 | files = [
481 | {file = "pillow-11.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8"},
482 | {file = "pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192"},
483 | {file = "pillow-11.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a07dba04c5e22824816b2615ad7a7484432d7f540e6fa86af60d2de57b0fcee2"},
484 | {file = "pillow-11.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e267b0ed063341f3e60acd25c05200df4193e15a4a5807075cd71225a2386e26"},
485 | {file = "pillow-11.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd165131fd51697e22421d0e467997ad31621b74bfc0b75956608cb2906dda07"},
486 | {file = "pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:abc56501c3fd148d60659aae0af6ddc149660469082859fa7b066a298bde9482"},
487 | {file = "pillow-11.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:54ce1c9a16a9561b6d6d8cb30089ab1e5eb66918cb47d457bd996ef34182922e"},
488 | {file = "pillow-11.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:73ddde795ee9b06257dac5ad42fcb07f3b9b813f8c1f7f870f402f4dc54b5269"},
489 | {file = "pillow-11.1.0-cp310-cp310-win32.whl", hash = "sha256:3a5fe20a7b66e8135d7fd617b13272626a28278d0e578c98720d9ba4b2439d49"},
490 | {file = "pillow-11.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:b6123aa4a59d75f06e9dd3dac5bf8bc9aa383121bb3dd9a7a612e05eabc9961a"},
491 | {file = "pillow-11.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:a76da0a31da6fcae4210aa94fd779c65c75786bc9af06289cd1c184451ef7a65"},
492 | {file = "pillow-11.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e06695e0326d05b06833b40b7ef477e475d0b1ba3a6d27da1bb48c23209bf457"},
493 | {file = "pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96f82000e12f23e4f29346e42702b6ed9a2f2fea34a740dd5ffffcc8c539eb35"},
494 | {file = "pillow-11.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3cd561ded2cf2bbae44d4605837221b987c216cff94f49dfeed63488bb228d2"},
495 | {file = "pillow-11.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f189805c8be5ca5add39e6f899e6ce2ed824e65fb45f3c28cb2841911da19070"},
496 | {file = "pillow-11.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dd0052e9db3474df30433f83a71b9b23bd9e4ef1de13d92df21a52c0303b8ab6"},
497 | {file = "pillow-11.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:837060a8599b8f5d402e97197d4924f05a2e0d68756998345c829c33186217b1"},
498 | {file = "pillow-11.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa8dd43daa836b9a8128dbe7d923423e5ad86f50a7a14dc688194b7be5c0dea2"},
499 | {file = "pillow-11.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0a2f91f8a8b367e7a57c6e91cd25af510168091fb89ec5146003e424e1558a96"},
500 | {file = "pillow-11.1.0-cp311-cp311-win32.whl", hash = "sha256:c12fc111ef090845de2bb15009372175d76ac99969bdf31e2ce9b42e4b8cd88f"},
501 | {file = "pillow-11.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbd43429d0d7ed6533b25fc993861b8fd512c42d04514a0dd6337fb3ccf22761"},
502 | {file = "pillow-11.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:f7955ecf5609dee9442cbface754f2c6e541d9e6eda87fad7f7a989b0bdb9d71"},
503 | {file = "pillow-11.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2062ffb1d36544d42fcaa277b069c88b01bb7298f4efa06731a7fd6cc290b81a"},
504 | {file = "pillow-11.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a85b653980faad27e88b141348707ceeef8a1186f75ecc600c395dcac19f385b"},
505 | {file = "pillow-11.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9409c080586d1f683df3f184f20e36fb647f2e0bc3988094d4fd8c9f4eb1b3b3"},
506 | {file = "pillow-11.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fdadc077553621911f27ce206ffcbec7d3f8d7b50e0da39f10997e8e2bb7f6a"},
507 | {file = "pillow-11.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:93a18841d09bcdd774dcdc308e4537e1f867b3dec059c131fde0327899734aa1"},
508 | {file = "pillow-11.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9aa9aeddeed452b2f616ff5507459e7bab436916ccb10961c4a382cd3e03f47f"},
509 | {file = "pillow-11.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3cdcdb0b896e981678eee140d882b70092dac83ac1cdf6b3a60e2216a73f2b91"},
510 | {file = "pillow-11.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36ba10b9cb413e7c7dfa3e189aba252deee0602c86c309799da5a74009ac7a1c"},
511 | {file = "pillow-11.1.0-cp312-cp312-win32.whl", hash = "sha256:cfd5cd998c2e36a862d0e27b2df63237e67273f2fc78f47445b14e73a810e7e6"},
512 | {file = "pillow-11.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a697cd8ba0383bba3d2d3ada02b34ed268cb548b369943cd349007730c92bddf"},
513 | {file = "pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5"},
514 | {file = "pillow-11.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae98e14432d458fc3de11a77ccb3ae65ddce70f730e7c76140653048c71bfcbc"},
515 | {file = "pillow-11.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cc1331b6d5a6e144aeb5e626f4375f5b7ae9934ba620c0ac6b3e43d5e683a0f0"},
516 | {file = "pillow-11.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:758e9d4ef15d3560214cddbc97b8ef3ef86ce04d62ddac17ad39ba87e89bd3b1"},
517 | {file = "pillow-11.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b523466b1a31d0dcef7c5be1f20b942919b62fd6e9a9be199d035509cbefc0ec"},
518 | {file = "pillow-11.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:9044b5e4f7083f209c4e35aa5dd54b1dd5b112b108648f5c902ad586d4f945c5"},
519 | {file = "pillow-11.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:3764d53e09cdedd91bee65c2527815d315c6b90d7b8b79759cc48d7bf5d4f114"},
520 | {file = "pillow-11.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:31eba6bbdd27dde97b0174ddf0297d7a9c3a507a8a1480e1e60ef914fe23d352"},
521 | {file = "pillow-11.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b5d658fbd9f0d6eea113aea286b21d3cd4d3fd978157cbf2447a6035916506d3"},
522 | {file = "pillow-11.1.0-cp313-cp313-win32.whl", hash = "sha256:f86d3a7a9af5d826744fabf4afd15b9dfef44fe69a98541f666f66fbb8d3fef9"},
523 | {file = "pillow-11.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:593c5fd6be85da83656b93ffcccc2312d2d149d251e98588b14fbc288fd8909c"},
524 | {file = "pillow-11.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:11633d58b6ee5733bde153a8dafd25e505ea3d32e261accd388827ee987baf65"},
525 | {file = "pillow-11.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:70ca5ef3b3b1c4a0812b5c63c57c23b63e53bc38e758b37a951e5bc466449861"},
526 | {file = "pillow-11.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8000376f139d4d38d6851eb149b321a52bb8893a88dae8ee7d95840431977081"},
527 | {file = "pillow-11.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ee85f0696a17dd28fbcfceb59f9510aa71934b483d1f5601d1030c3c8304f3c"},
528 | {file = "pillow-11.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:dd0e081319328928531df7a0e63621caf67652c8464303fd102141b785ef9547"},
529 | {file = "pillow-11.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e63e4e5081de46517099dc30abe418122f54531a6ae2ebc8680bcd7096860eab"},
530 | {file = "pillow-11.1.0-cp313-cp313t-win32.whl", hash = "sha256:dda60aa465b861324e65a78c9f5cf0f4bc713e4309f83bc387be158b077963d9"},
531 | {file = "pillow-11.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ad5db5781c774ab9a9b2c4302bbf0c1014960a0a7be63278d13ae6fdf88126fe"},
532 | {file = "pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756"},
533 | {file = "pillow-11.1.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:bf902d7413c82a1bfa08b06a070876132a5ae6b2388e2712aab3a7cbc02205c6"},
534 | {file = "pillow-11.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c1eec9d950b6fe688edee07138993e54ee4ae634c51443cfb7c1e7613322718e"},
535 | {file = "pillow-11.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e275ee4cb11c262bd108ab2081f750db2a1c0b8c12c1897f27b160c8bd57bbc"},
536 | {file = "pillow-11.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4db853948ce4e718f2fc775b75c37ba2efb6aaea41a1a5fc57f0af59eee774b2"},
537 | {file = "pillow-11.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:ab8a209b8485d3db694fa97a896d96dd6533d63c22829043fd9de627060beade"},
538 | {file = "pillow-11.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:54251ef02a2309b5eec99d151ebf5c9904b77976c8abdcbce7891ed22df53884"},
539 | {file = "pillow-11.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5bb94705aea800051a743aa4874bb1397d4695fb0583ba5e425ee0328757f196"},
540 | {file = "pillow-11.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:89dbdb3e6e9594d512780a5a1c42801879628b38e3efc7038094430844e271d8"},
541 | {file = "pillow-11.1.0-cp39-cp39-win32.whl", hash = "sha256:e5449ca63da169a2e6068dd0e2fcc8d91f9558aba89ff6d02121ca8ab11e79e5"},
542 | {file = "pillow-11.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:3362c6ca227e65c54bf71a5f88b3d4565ff1bcbc63ae72c34b07bbb1cc59a43f"},
543 | {file = "pillow-11.1.0-cp39-cp39-win_arm64.whl", hash = "sha256:b20be51b37a75cc54c2c55def3fa2c65bb94ba859dde241cd0a4fd302de5ae0a"},
544 | {file = "pillow-11.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8c730dc3a83e5ac137fbc92dfcfe1511ce3b2b5d7578315b63dbbb76f7f51d90"},
545 | {file = "pillow-11.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d33d2fae0e8b170b6a6c57400e077412240f6f5bb2a342cf1ee512a787942bb"},
546 | {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8d65b38173085f24bc07f8b6c505cbb7418009fa1a1fcb111b1f4961814a442"},
547 | {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83"},
548 | {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d44ff19eea13ae4acdaaab0179fa68c0c6f2f45d66a4d8ec1eda7d6cecbcc15f"},
549 | {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d3d8da4a631471dfaf94c10c85f5277b1f8e42ac42bade1ac67da4b4a7359b73"},
550 | {file = "pillow-11.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0"},
551 | {file = "pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20"},
552 | ]
553 |
554 | [package.extras]
555 | docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"]
556 | fpx = ["olefile"]
557 | mic = ["olefile"]
558 | tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"]
559 | typing = ["typing-extensions ; python_version < \"3.10\""]
560 | xmp = ["defusedxml"]
561 |
562 | [[package]]
563 | name = "platformdirs"
564 | version = "4.3.6"
565 | description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
566 | optional = false
567 | python-versions = ">=3.8"
568 | groups = ["main"]
569 | files = [
570 | {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
571 | {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
572 | ]
573 |
574 | [package.extras]
575 | docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
576 | test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
577 | type = ["mypy (>=1.11.2)"]
578 |
579 | [[package]]
580 | name = "pycodestyle"
581 | version = "2.11.1"
582 | description = "Python style guide checker"
583 | optional = false
584 | python-versions = ">=3.8"
585 | groups = ["dev"]
586 | files = [
587 | {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"},
588 | {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"},
589 | ]
590 |
591 | [[package]]
592 | name = "pyflakes"
593 | version = "3.1.0"
594 | description = "passive checker of Python programs"
595 | optional = false
596 | python-versions = ">=3.8"
597 | groups = ["dev"]
598 | files = [
599 | {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"},
600 | {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"},
601 | ]
602 |
603 | [[package]]
604 | name = "requests"
605 | version = "2.32.3"
606 | description = "Python HTTP for Humans."
607 | optional = false
608 | python-versions = ">=3.8"
609 | groups = ["main"]
610 | files = [
611 | {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
612 | {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
613 | ]
614 |
615 | [package.dependencies]
616 | certifi = ">=2017.4.17"
617 | charset-normalizer = ">=2,<4"
618 | idna = ">=2.5,<4"
619 | urllib3 = ">=1.21.1,<3"
620 |
621 | [package.extras]
622 | socks = ["PySocks (>=1.5.6,!=1.5.7)"]
623 | use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
624 |
625 | [[package]]
626 | name = "requests-cache"
627 | version = "1.2.1"
628 | description = "A persistent cache for python requests"
629 | optional = false
630 | python-versions = ">=3.8"
631 | groups = ["main"]
632 | files = [
633 | {file = "requests_cache-1.2.1-py3-none-any.whl", hash = "sha256:1285151cddf5331067baa82598afe2d47c7495a1334bfe7a7d329b43e9fd3603"},
634 | {file = "requests_cache-1.2.1.tar.gz", hash = "sha256:68abc986fdc5b8d0911318fbb5f7c80eebcd4d01bfacc6685ecf8876052511d1"},
635 | ]
636 |
637 | [package.dependencies]
638 | attrs = ">=21.2"
639 | cattrs = ">=22.2"
640 | platformdirs = ">=2.5"
641 | requests = ">=2.22"
642 | url-normalize = ">=1.4"
643 | urllib3 = ">=1.25.5"
644 |
645 | [package.extras]
646 | all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "pymongo (>=3)", "pyyaml (>=6.0.1)", "redis (>=3)", "ujson (>=5.4)"]
647 | bson = ["bson (>=0.5)"]
648 | docs = ["furo (>=2023.3,<2024.0)", "linkify-it-py (>=2.0,<3.0)", "myst-parser (>=1.0,<2.0)", "sphinx (>=5.0.2,<6.0.0)", "sphinx-autodoc-typehints (>=1.19)", "sphinx-automodapi (>=0.14)", "sphinx-copybutton (>=0.5)", "sphinx-design (>=0.2)", "sphinx-notfound-page (>=0.8)", "sphinxcontrib-apidoc (>=0.3)", "sphinxext-opengraph (>=0.9)"]
649 | dynamodb = ["boto3 (>=1.15)", "botocore (>=1.18)"]
650 | json = ["ujson (>=5.4)"]
651 | mongodb = ["pymongo (>=3)"]
652 | redis = ["redis (>=3)"]
653 | security = ["itsdangerous (>=2.0)"]
654 | yaml = ["pyyaml (>=6.0.1)"]
655 |
656 | [[package]]
657 | name = "six"
658 | version = "1.16.0"
659 | description = "Python 2 and 3 compatibility utilities"
660 | optional = false
661 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
662 | groups = ["main"]
663 | files = [
664 | {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
665 | {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
666 | ]
667 |
668 | [[package]]
669 | name = "soupsieve"
670 | version = "2.6"
671 | description = "A modern CSS selector implementation for Beautiful Soup."
672 | optional = false
673 | python-versions = ">=3.8"
674 | groups = ["main"]
675 | files = [
676 | {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
677 | {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
678 | ]
679 |
680 | [[package]]
681 | name = "typing-extensions"
682 | version = "4.12.2"
683 | description = "Backported and Experimental Type Hints for Python 3.8+"
684 | optional = false
685 | python-versions = ">=3.8"
686 | groups = ["main"]
687 | files = [
688 | {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
689 | {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
690 | ]
691 |
692 | [[package]]
693 | name = "url-normalize"
694 | version = "1.4.3"
695 | description = "URL normalization for Python"
696 | optional = false
697 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
698 | groups = ["main"]
699 | files = [
700 | {file = "url-normalize-1.4.3.tar.gz", hash = "sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2"},
701 | {file = "url_normalize-1.4.3-py2.py3-none-any.whl", hash = "sha256:ec3c301f04e5bb676d333a7fa162fa977ad2ca04b7e652bfc9fac4e405728eed"},
702 | ]
703 |
704 | [package.dependencies]
705 | six = "*"
706 |
707 | [[package]]
708 | name = "urllib3"
709 | version = "2.2.3"
710 | description = "HTTP library with thread-safe connection pooling, file post, and more."
711 | optional = false
712 | python-versions = ">=3.8"
713 | groups = ["main"]
714 | files = [
715 | {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"},
716 | {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"},
717 | ]
718 |
719 | [package.extras]
720 | brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""]
721 | h2 = ["h2 (>=4,<5)"]
722 | socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
723 | zstd = ["zstandard (>=0.18.0)"]
724 |
725 | [metadata]
726 | lock-version = "2.1"
727 | python-versions = "^3.9"
728 | content-hash = "92cfb836603d3fa5af84e8b5de458c70cfa66ef8878a7125424609fa22921343"
729 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "leech"
3 | version = "1.0.0"
4 | description = "Turn a story on certain websites into an ebook for convenient reading"
5 | authors = ["David Lynch "]
6 | license = "MIT License"
7 | include = ["ebook/*", "sites/*"]
8 |
9 | [tool.poetry.scripts]
10 | leech = "leech:cli"
11 |
12 | [tool.poetry.dependencies]
13 | python = "^3.9"
14 | attrs = "^25.1.0"
15 | beautifulsoup4 = "^4.13.3"
16 | click-default-group = "^1.2.4"
17 | click = "^8.1.8"
18 | requests = "^2.32.3"
19 | requests-cache = "^1.2.1"
20 | Pillow = "^11.1.0"
21 | mintotp = "^0.3.0"
22 | lxml = "^5.3.1"
23 |
24 | [tool.poetry.group.dev.dependencies]
25 | flake8 = "^6.1.0"
26 |
27 | [build-system]
28 | requires = ["poetry-core>=1.0.0"]
29 | build-backend = "poetry.core.masonry.api"
--------------------------------------------------------------------------------
/sites/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | import click
3 | import glob
4 | import os
5 | import random
6 | import uuid
7 | import datetime
8 | import time
9 | import logging
10 | import urllib
11 | import re
12 | import hashlib
13 | from attrs import define, field, Factory
14 | from bs4 import BeautifulSoup
15 |
16 | logger = logging.getLogger(__name__)
17 | logger.addHandler(logging.NullHandler())
18 | _sites = []
19 |
20 |
21 | def _default_uuid_string(self):
22 | rd = random.Random(x=self.url)
23 | return str(uuid.UUID(int=rd.getrandbits(8*16), version=4))
24 |
25 |
26 | @define
27 | class Image:
28 | url: str
29 |
30 | def path(self):
31 | return f"images/{hashlib.sha1(self.url.encode()).hexdigest()}.{self.ext()}"
32 |
33 | def ext(self):
34 | if self.url.startswith("data:image") and 'base64' in self.url:
35 | head, base64data = self.url.split(',')
36 | return str(head.split(';')[0].split('/')[1])
37 | path = urllib.parse.urlparse(self.url).path
38 | return os.path.splitext(path)[1]
39 |
40 |
41 | @define
42 | class Chapter:
43 | title: str
44 | contents: str
45 | date: datetime.datetime = False
46 | images: dict = Factory(dict)
47 |
48 |
49 | @define
50 | class Section:
51 | title: str
52 | author: str
53 | url: str
54 | cover_url: str = ''
55 | id: str = Factory(_default_uuid_string, takes_self=True)
56 | contents: list = Factory(list)
57 | footnotes: list = Factory(list)
58 | tags: list = Factory(list)
59 | summary: str = ''
60 |
61 | def __iter__(self):
62 | return self.contents.__iter__()
63 |
64 | def __getitem__(self, index):
65 | return self.contents.__getitem__(index)
66 |
67 | def __setitem__(self, index, value):
68 | return self.contents.__setitem__(index, value)
69 |
70 | def __len__(self):
71 | return len(self.contents)
72 |
73 | def everychapter(self):
74 | for chapter in self.contents:
75 | if hasattr(chapter, '__iter__'):
76 | yield from chapter
77 | else:
78 | yield chapter
79 |
80 | def add(self, value, index=None):
81 | if index is not None:
82 | self.contents.insert(index, value)
83 | else:
84 | self.contents.append(value)
85 |
86 | def dates(self):
87 | for chapter in self.everychapter():
88 | yield chapter.date
89 |
90 |
91 | @define
92 | class Site:
93 | """A Site handles checking whether a URL might represent a site, and then
94 | extracting the content of a story from said site.
95 | """
96 | session: object = field()
97 | footnotes: list = field(factory=list, init=False)
98 | options: dict = Factory(
99 | lambda site: site.get_default_options(),
100 | takes_self=True
101 | )
102 |
103 | @classmethod
104 | def site_key(cls):
105 | if hasattr(cls, '_key'):
106 | return cls._key
107 | return cls.__name__
108 |
109 | @staticmethod
110 | def get_site_specific_option_defs():
111 | """Returns a list of click.option objects to add to CLI commands.
112 |
113 | It is best practice to ensure that these names are reasonably unique
114 | to ensure that they do not conflict with the core options, or other
115 | sites' options. It is OK for different site's options to have the
116 | same name, but pains should be taken to ensure they remain semantically
117 | similar in meaning.
118 | """
119 | return [
120 | SiteSpecificOption(
121 | 'strip_colors',
122 | '--strip-colors/--no-strip-colors',
123 | default=True,
124 | help="If true, colors will be stripped from the text."
125 | ),
126 | SiteSpecificOption(
127 | 'image_fetch',
128 | '--fetch-images/--no-fetch-images',
129 | default=True,
130 | help="If true, images embedded in the story will be downloaded"
131 | ),
132 | SiteSpecificOption(
133 | 'spoilers',
134 | '--spoilers',
135 | choices=('include', 'inline', 'skip'),
136 | default='include',
137 | help="Whether to include spoilers"
138 | ),
139 | SiteSpecificOption(
140 | 'deprecated_skip_spoilers',
141 | '--skip-spoilers/--include-spoilers',
142 | help="If true, do not transcribe any tags that are marked as a spoiler. (DEPRECATED)",
143 | exposed=False,
144 | click_kwargs={
145 | "callback": lambda ctx, param, value: ctx.params.update({"spoilers": value and "skip" or "include"}),
146 | },
147 | ),
148 | SiteSpecificOption(
149 | 'parser',
150 | '--parser',
151 | help="Which HTML parser to use",
152 | choices=('lxml', 'html5lib', 'html.parser', 'lxml-xml'),
153 | default='lxml',
154 | ),
155 | ]
156 |
157 | @classmethod
158 | def get_default_options(cls):
159 | options = {}
160 | for option in cls.get_site_specific_option_defs():
161 | if option.exposed:
162 | options[option.name] = option.default
163 | return options
164 |
165 | @classmethod
166 | def interpret_site_specific_options(cls, **kwargs):
167 | """Returns options summarizing CLI flags provided.
168 |
169 | Only includes entries the user has explicitly provided as flags
170 | / will not contain default values. For that, use get_default_options().
171 | """
172 | options = {}
173 | for option in cls.get_site_specific_option_defs():
174 | option_value = kwargs.get(option.name)
175 | if option.exposed and option_value is not None:
176 | options[option.name] = option_value
177 | return options
178 |
179 | @staticmethod
180 | def matches(url):
181 | raise NotImplementedError()
182 |
183 | def extract(self, url):
184 | """Download a story from a given URL
185 |
186 | Args:
187 | url (string): A valid URL for this Site
188 | Returns:
189 | story (dict) containing keys:
190 | title (string)
191 | author (string)
192 | chapters (list): list of Chapters (namedtuple, defined above)
193 | """
194 | raise NotImplementedError()
195 |
196 | def login(self, login_details):
197 | raise NotImplementedError()
198 |
199 | def _soup(self, url, method=False, delay=0, retry=3, retry_delay=10, **kw):
200 | if not method:
201 | method = self.options.get('parser', 'lxml')
202 | if url.startswith('http://') or url.startswith('https://'):
203 | page = self.session.get(url, **kw)
204 | if not page:
205 | if page.status_code == 403 and page.headers.get('Server', False) == 'cloudflare' and "captcha-bypass" in page.text:
206 | raise CloudflareException("Couldn't fetch, probably because of Cloudflare protection", url)
207 | if retry and retry > 0:
208 | real_delay = retry_delay
209 | if 'Retry-After' in page.headers:
210 | real_delay = int(page.headers['Retry-After'])
211 | logger.warning("Load failed: waiting %s to retry (%s: %s)", real_delay, page.status_code, page.url)
212 | time.sleep(real_delay)
213 | return self._soup(url, method=method, retry=retry - 1, retry_delay=retry_delay, **kw)
214 | raise SiteException("Couldn't fetch", url)
215 | if delay and delay > 0 and not page.from_cache:
216 | time.sleep(delay)
217 | text = page.text
218 | fallback_base = url
219 | else:
220 | text = url
221 | fallback_base = ''
222 | soup = BeautifulSoup(text, method)
223 | return soup, (soup.head and soup.head.base) and soup.head.base.get('href') or fallback_base
224 |
225 | def _form_in_soup(self, soup):
226 | if soup.name == 'form':
227 | return soup
228 | return soup.find('form')
229 |
230 | def _form_data(self, soup):
231 | data = {}
232 | form = self._form_in_soup(soup)
233 | if not form:
234 | return data, '', ''
235 | for tag in form.find_all('input'):
236 | itype = tag.attrs.get('type', 'text')
237 | name = tag.attrs.get('name')
238 | if not name:
239 | continue
240 | value = tag.attrs.get('value', '')
241 | if itype in ('checkbox', 'radio') and not tag.attrs.get('checked', False):
242 | continue
243 | data[name] = value
244 | for select in form.find_all('select'):
245 | # todo: multiple
246 | name = select.attrs.get('name')
247 | if not name:
248 | continue
249 | data[name] = ''
250 | for option in select.find_all('option'):
251 | value = option.attrs.get('value', '')
252 | if value and option.attrs.get('selected'):
253 | data[name] = value
254 | for textarea in form.find_all('textarea'):
255 | name = textarea.attrs.get('name')
256 | if not name:
257 | continue
258 | data[name] = textarea.attrs.get('value', '')
259 |
260 | return data, form.attrs.get('action'), form.attrs.get('method', 'get').lower()
261 |
262 | def _new_tag(self, *args, **kw):
263 | soup = BeautifulSoup("", self.options.get('parser'))
264 | return soup.new_tag(*args, **kw)
265 |
266 | def _join_url(self, *args, **kwargs):
267 | return urllib.parse.urljoin(*args, **kwargs)
268 |
269 | def _footnote(self, contents, chapterid):
270 | """Register a footnote and return a link to that footnote"""
271 |
272 | # TODO: This embeds knowledge of what the generated filenames will be. Work out a better way.
273 |
274 | idx = len(self.footnotes) + 1
275 |
276 | # epub spec footnotes are all about epub:type on the footnote and the link
277 | # http://www.idpf.org/accessibility/guidelines/content/semantics/epub-type.php
278 | contents.name = 'div'
279 | contents.attrs['id'] = f'footnote{idx}'
280 | contents.attrs['epub:type'] = 'rearnote'
281 |
282 | # a backlink is essential for Kindle to think of this as a footnote
283 | # otherwise it doesn't get the inline-popup treatment
284 | # http://kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.pdf
285 | # section 3.9.10
286 | backlink = self._new_tag('a', href=f'chapter{chapterid}.html#noteback{idx}')
287 | backlink.string = '^'
288 | contents.insert(0, backlink)
289 |
290 | self.footnotes.append(contents.prettify())
291 |
292 | # now build the link to the footnote to return, with appropriate
293 | # epub annotations.
294 | spoiler_link = self._new_tag('a')
295 | spoiler_link.attrs = {
296 | 'id': f'noteback{idx}',
297 | 'href': f'footnotes.html#footnote{idx}',
298 | 'epub:type': 'noteref',
299 | }
300 | spoiler_link.string = str(idx)
301 |
302 | return spoiler_link
303 |
304 | def _clean(self, contents, base=False):
305 | """Clean up story content to be more ebook-friendly
306 |
307 | TODO: this expects a soup as its argument, so the couple of API-driven sites can't use it as-is
308 | """
309 | # Cloudflare is used on many sites, and mangles things that look like email addresses
310 | # e.g. Point_Me_@_The_Sky becomes
311 | # [email protected]_The_Sky
312 | # or
313 | # [email protected]_The_Sky
314 | for tag in contents.find_all(class_='__cf_email__'):
315 | # See: https://usamaejaz.com/cloudflare-email-decoding/
316 | enc = bytes.fromhex(tag['data-cfemail'])
317 | email = bytes([c ^ enc[0] for c in enc[1:]]).decode('utf8')
318 | if tag.parent.name == 'a' and tag.parent['href'].startswith('/cdn-cgi/l/email-protection'):
319 | tag = tag.parent
320 | tag.insert_before(email)
321 | tag.decompose()
322 | # strip colors
323 | if self.options['strip_colors']:
324 | for tag in contents.find_all(style=re.compile(r'(?:color|background)\s*:')):
325 | tag['style'] = re.sub(r'(?:color|background)\s*:[^;]+;?', '', tag['style'])
326 |
327 | if base:
328 | for img in contents.find_all('img', src=True):
329 | # Later epub processing needs absolute image URLs
330 | # print("fixing img src", img['src'], self._join_url(base, img['src']))
331 | img['src'] = self._join_url(base, img['src'])
332 | del img['srcset']
333 | del img['sizes']
334 |
335 | return contents
336 |
337 | def _finalize(self, story):
338 | # Call this on a story after it's fully extracted to clean up things
339 | for chapter in story:
340 | if hasattr(chapter, '__iter__'):
341 | self._finalize(chapter, story)
342 | else:
343 | self._process_images(chapter)
344 |
345 | if self.footnotes:
346 | story.footnotes = Chapter('Footnotes', '\n\n'.join(self.footnotes))
347 | self.footnotes = []
348 | self._process_images(story.footnotes)
349 |
350 | def _process_images(self, chapter):
351 | soup, base = self._soup(chapter.contents)
352 |
353 | if self.options.get('image_fetch'):
354 | for count, img in enumerate(soup.find_all('img', src=True)):
355 | # logger.info(f"Image in {chapter.title}: {img['src']}")
356 | if img['src'] not in chapter.images:
357 | chapter.images[img['src']] = Image(img['src'])
358 |
359 | img['src'] = chapter.images.get(img['src']).path()
360 | else:
361 | # Remove all images from the chapter so you don't get that annoying grey background.
362 | for img in soup.find_all('img'):
363 | # Note: alt="" will be completely removed here, which is consitent with the semantics
364 | if img.parent.name.lower() == "figure":
365 | # TODO: figcaption?
366 | img.parent.replace_with(img.get('alt', '🖼'))
367 | else:
368 | img.replace_with(img.get('alt', '🖼'))
369 |
370 | chapter.contents = str(soup)
371 |
372 |
373 | @define
374 | class SiteSpecificOption:
375 | """Represents a site-specific option that can be configured.
376 |
377 | Will be added to the CLI as a click.option -- many of these
378 | fields correspond to click.option arguments."""
379 | name: str
380 | flag_pattern: str
381 | type: object = None
382 | default: bool = False
383 | help: str = None
384 | choices: tuple = None
385 | exposed: bool = True
386 | click_kwargs: frozenset = field(converter=lambda kwargs: frozenset(kwargs.items()), default={})
387 |
388 | def __eq__(self, other):
389 | return self.name == other.name
390 |
391 | def __hash__(self):
392 | return hash(self.name)
393 |
394 | def as_click_option(self):
395 | return click.option(
396 | str(self.name),
397 | str(self.flag_pattern),
398 | type=self.choices and click.Choice(self.choices) or self.type,
399 | # Note: This default not matching self.default is intentional.
400 | # It ensures that we know if a flag was explicitly provided,
401 | # which keeps it from overriding options set in leech.json etc.
402 | # Instead, default is used in site_cls.get_default_options()
403 | default=None,
404 | help=self.help if self.help is not None else "",
405 | expose_value=self.exposed,
406 | **dict(self.click_kwargs)
407 | )
408 |
409 |
410 | class SiteException(Exception):
411 | pass
412 |
413 |
414 | class CloudflareException(SiteException):
415 | pass
416 |
417 |
418 | def register(site_class):
419 | _sites.append(site_class)
420 | return site_class
421 |
422 |
423 | def get(url):
424 | for site_class in _sites:
425 | match = site_class.matches(url)
426 | if match:
427 | logger.info("Handler: %s (%s)", site_class, match)
428 | return site_class, match
429 | raise NotImplementedError("Could not find a handler for " + url)
430 |
431 |
432 | def list_site_specific_options():
433 | """Returns a list of all site's click options, which will be presented to the user."""
434 |
435 | # Ensures that duplicate options are not added twice.
436 | # Especially important for subclassed sites (e.g. Xenforo sites)
437 | options = set()
438 |
439 | for site_class in _sites:
440 | options.update(site_class.get_site_specific_option_defs())
441 | return [option.as_click_option() for option in options]
442 |
443 |
444 | # And now, a particularly hacky take on a plugin system:
445 | # Make an __all__ out of all the python files in this directory that don't start
446 | # with __. Then import * them.
447 |
448 | modules = glob.glob(os.path.join(os.path.dirname(__file__), "*.py"))
449 | __all__ = [os.path.basename(f)[:-3] for f in modules if not f.startswith("__")]
450 |
451 | from . import * # noqa
452 |
--------------------------------------------------------------------------------
/sites/ao3.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import logging
4 | import datetime
5 | import re
6 | import requests_cache
7 | from . import register, Site, Section, Chapter, SiteException
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | @register
13 | class ArchiveOfOurOwn(Site):
14 | """Archive of Our Own: it has its own epub export, but the formatting is awful"""
15 | @staticmethod
16 | def matches(url):
17 | # e.g. http://archiveofourown.org/works/5683105/chapters/13092007
18 | match = re.match(r'^(https?://(?:www\.)?archiveofourown\.org/works/\d+)/?.*', url)
19 | if match:
20 | return match.group(1) + '/'
21 |
22 | def login(self, login_details):
23 | with requests_cache.disabled():
24 | # Can't just pass this url to _soup because I need the cookies later
25 | login = self.session.get('https://archiveofourown.org/users/login')
26 | soup, nobase = self._soup(login.text)
27 | post, action, method = self._form_data(soup.find(id='new_user'))
28 | post['user[login]'] = login_details[0]
29 | post['user[password]'] = login_details[1]
30 | # I feel the session *should* handle this cookies bit for me. But
31 | # it doesn't. And I don't know why.
32 | result = self.session.post(
33 | self._join_url(login.url, action),
34 | data=post, cookies=login.cookies
35 | )
36 | if result.ok:
37 | logger.info("Logged in as %s", login_details[0])
38 | else:
39 | logger.error("Failed to log in as %s", login_details[0])
40 |
41 | def extract(self, url):
42 | workid = re.match(r'^https?://(?:www\.)?archiveofourown\.org/works/(\d+)/?.*', url).group(1)
43 | return self._extract_work(workid)
44 |
45 | def _extract_work(self, workid):
46 | # Fetch the full work
47 | url = f'http://archiveofourown.org/works/{workid}?view_adult=true&view_full_work=true'
48 | logger.info("Extracting full work @ %s", url)
49 | soup, base = self._soup(url)
50 |
51 | if not soup.find(id='workskin'):
52 | raise SiteException("Can't find the story text; you may need to log in or flush the cache")
53 |
54 | story = Section(
55 | title=soup.select('#workskin > .preface .title')[0].text.strip(),
56 | author=soup.select('#workskin .preface .byline a')[0].text.strip(),
57 | summary=soup.select('#workskin .preface .summary blockquote')[0].prettify(),
58 | url=f'http://archiveofourown.org/works/{workid}',
59 | tags=[tag.get_text().strip() for tag in soup.select('.work.meta .tags a.tag')]
60 | )
61 |
62 | # Fetch the chapter list as well because it contains info that's not in the full work
63 | nav_soup, nav_base = self._soup(f'https://archiveofourown.org/works/{workid}/navigate')
64 | chapters = soup.select('#chapters > div')
65 | if len(chapters) == 1:
66 | # in a single-chapter story the #chapters div is actually the chapter
67 | chapters = [soup.find(id='chapters').parent]
68 |
69 | for index, chapter in enumerate(nav_soup.select('#main ol[role="navigation"] li')):
70 | link = chapter.find('a')
71 | logger.info("Extracting chapter %s", link.string)
72 |
73 | updated = datetime.datetime.strptime(
74 | chapter.find('span', class_='datetime').string,
75 | "(%Y-%m-%d)"
76 | )
77 |
78 | chapter_soup = chapters[index]
79 | if not chapter_soup:
80 | logger.warning("Couldn't find chapter %s in full work", index + 1)
81 | continue
82 |
83 | story.add(Chapter(
84 | title=link.string,
85 | # the `or soup` fallback covers single-chapter works
86 | contents=self._chapter(chapter_soup, base),
87 | date=updated
88 | ))
89 |
90 | self._finalize(story)
91 |
92 | return story
93 |
94 | def _chapter(self, soup, base):
95 | content = soup.find('div', role='article')
96 |
97 | for landmark in content.find_all(class_='landmark'):
98 | landmark.decompose()
99 |
100 | # TODO: Maybe these should be footnotes instead?
101 | notes = soup.select('#chapters .end.notes')
102 | if notes:
103 | notes = notes[0]
104 | for landmark in notes.find_all(class_='landmark'):
105 | landmark.decompose()
106 |
107 | self._clean(content, base)
108 |
109 | return content.prettify() + (notes and notes.prettify() or '')
110 |
111 |
112 | @register
113 | class ArchiveOfOurOwnSeries(ArchiveOfOurOwn):
114 | _key = "ArchiveOfOurOwn"
115 |
116 | @staticmethod
117 | def matches(url):
118 | # e.g. http://archiveofourown.org/series/5683105/
119 | match = re.match(r'^(https?://archiveofourown\.org/series/\d+)/?.*', url)
120 | if match:
121 | return match.group(1) + '/'
122 |
123 | def extract(self, url):
124 | seriesid = re.match(r'^https?://archiveofourown\.org/series/(\d+)/?.*', url).group(1)
125 |
126 | soup, base = self._soup(f'http://archiveofourown.org/series/{seriesid}?view_adult=true')
127 |
128 | story = Section(
129 | title=soup.select('#main h2.heading')[0].text.strip(),
130 | author=soup.select('#main dl.series.meta a[rel="author"]')[0].string,
131 | url=f'http://archiveofourown.org/series/{seriesid}'
132 | )
133 |
134 | for work in soup.select('#main ul.series li.work'):
135 | workid = work.get('id').replace('work_', '')
136 | substory = self._extract_work(workid)
137 |
138 | # TODO: improve epub-writer to be able to generate a toc.ncx with nested headings
139 | story.add(substory)
140 |
141 | return story
142 |
--------------------------------------------------------------------------------
/sites/arbitrary.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import logging
4 | from attrs import define
5 | import datetime
6 | import json
7 | import re
8 | import os.path
9 | from . import register, Site, Section, Chapter, SiteException
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 | """
14 | Example JSON:
15 | {
16 | "url": "https://practicalguidetoevil.wordpress.com/table-of-contents/",
17 | "title": "A Practical Guide To Evil: Book 1",
18 | "author": "erraticerrata",
19 | "chapter_selector": "#main .entry-content > ul > li > a",
20 | "content_selector": "#main .entry-content",
21 | "filter_selector": ".sharedaddy, .wpcnt, style",
22 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png"
23 | }
24 | """
25 |
26 |
27 | @define
28 | class SiteDefinition:
29 | url: str
30 | title: str
31 | author: str
32 | content_selector: str
33 | # If present, find something within `content` to use a chapter title; if not found, the link text to it will be used
34 | content_title_selector: str = False
35 | # If present, find a specific element in the `content` to be the chapter text
36 | content_text_selector: str = False
37 | # If present, it looks for chapters linked from `url`. If not, it assumes `url` points to a chapter.
38 | chapter_selector: str = False
39 | # If present, use to find a link to the next content page (only used if not using chapter_selector)
40 | next_selector: str = False
41 | # If present, use to filter out content that matches the selector
42 | filter_selector: str = False
43 | cover_url: str = ''
44 |
45 |
46 | @register
47 | class Arbitrary(Site):
48 | """A way to describe an arbitrary side for a one-off fetch
49 | """
50 | @staticmethod
51 | def matches(url):
52 | # e.g. practical1.json
53 | if url.endswith('.json') and os.path.isfile(url):
54 | return url
55 |
56 | def extract(self, url):
57 | with open(url) as definition_file:
58 | definition = SiteDefinition(**json.load(definition_file))
59 |
60 | story = Section(
61 | title=definition.title,
62 | author=definition.author,
63 | url=url,
64 | cover_url=definition.cover_url
65 | )
66 |
67 | if definition.chapter_selector:
68 | soup, base = self._soup(definition.url)
69 | for chapter_link in soup.select(definition.chapter_selector):
70 | chapter_url = str(chapter_link.get('href'))
71 | if base:
72 | chapter_url = self._join_url(base, chapter_url)
73 | chapter_url = self._join_url(definition.url, chapter_url)
74 | for chapter in self._chapter(chapter_url, definition, title=chapter_link.string):
75 | story.add(chapter)
76 | else:
77 | # set of already processed urls. Stored to detect loops.
78 | found_content_urls = set()
79 | content_urls = [definition.url]
80 |
81 | def process_content_url(content_url):
82 | if content_url in found_content_urls:
83 | return None
84 | found_content_urls.add(content_url)
85 | for chapter in self._chapter(content_url, definition):
86 | story.add(chapter)
87 | return content_url
88 |
89 | while content_urls:
90 | for temp_url in content_urls:
91 | # stop inner loop once a new link is found
92 | if content_url := process_content_url(temp_url):
93 | break
94 | # reset url list
95 | content_urls = []
96 | if content_url and definition.next_selector:
97 | soup, base = self._soup(content_url)
98 | next_link = soup.select(definition.next_selector)
99 | if next_link:
100 | for next_link_item in next_link:
101 | next_link_url = str(next_link_item.get('href'))
102 | if base:
103 | next_link_url = self._join_url(base, next_link_url)
104 | content_urls.append(self._join_url(content_url, next_link_url))
105 |
106 | if not story:
107 | raise SiteException("No story content found; check the content selectors")
108 |
109 | self._finalize(story)
110 |
111 | return story
112 |
113 | def _chapter(self, url, definition, title=False):
114 | logger.info("Extracting chapter @ %s", url)
115 | soup, base = self._soup(url)
116 |
117 | chapters = []
118 |
119 | if not soup.select(definition.content_selector):
120 | return chapters
121 |
122 | # clean up a few things which will definitely break epubs:
123 | # TODO: expand this greatly, or make it configurable
124 | for namespaced in soup.find_all(re.compile(r'[a-z]+:[a-z]+')):
125 | # Namespaced elements are going to cause validation errors
126 | namespaced.decompose()
127 |
128 | for content in soup.select(definition.content_selector):
129 | if definition.filter_selector:
130 | for filtered in content.select(definition.filter_selector):
131 | filtered.decompose()
132 |
133 | if definition.content_title_selector:
134 | title_element = content.select(definition.content_title_selector)
135 | if title_element:
136 | title = title_element[0].get_text().strip()
137 |
138 | if definition.content_text_selector:
139 | # TODO: multiple text elements?
140 | content = content.select(definition.content_text_selector)[0]
141 |
142 | # TODO: consider `'\n'.join(map(str, content.contents))`
143 | content.name = 'div'
144 |
145 | self._clean(content, base)
146 |
147 | chapters.append(Chapter(
148 | title=title,
149 | contents=content.prettify(),
150 | # TODO: better date detection
151 | date=datetime.datetime.now()
152 | ))
153 |
154 | return chapters
155 |
--------------------------------------------------------------------------------
/sites/deviantart.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import logging
4 | import re
5 |
6 | from . import register, Section
7 | from .stash import Stash
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | @register
13 | class DeviantArt(Stash):
14 | @staticmethod
15 | def matches(url):
16 | # Need a collection page
17 | match = re.match(r'^https?://[^.]+\.deviantart\.com/(?:gallery|favourites)/\d+/?', url)
18 | if match:
19 | return match.group(0) + '/'
20 |
21 | def extract(self, url):
22 | soup, base = self._soup(url)
23 | content = soup.find(id="output")
24 | if not content:
25 | return
26 |
27 | if "gallery" in url:
28 | author = str(content.select('h1 a.u')[0].string)
29 | else:
30 | authors = set(str(author.string) for author in content.select('.stream .details a.u'))
31 | author = ', '.join(authors)
32 |
33 | story = Section(
34 | title=str(content.find(class_="folder-title").string),
35 | author=author,
36 | url=url
37 | )
38 |
39 | thumbs = content.select(".stream a.thumb")
40 | if not thumbs:
41 | return
42 | for thumb in thumbs:
43 | try:
44 | if thumb['href'] != '#':
45 | story.add(self._chapter(thumb['href']))
46 | except Exception:
47 | logger.exception("Couldn't extract chapters from thumbs")
48 |
49 | self._finalize(story)
50 |
51 | return story
52 |
--------------------------------------------------------------------------------
/sites/fanfictionnet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import logging
4 | import datetime
5 | import re
6 | import urllib.parse
7 | import attr
8 | from . import register, Site, SiteException, CloudflareException, Section, Chapter
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | @register
14 | class FanFictionNet(Site):
15 | _cloudflared = attr.ib(init=False, default=False)
16 |
17 | """FFN: it has a lot of stuff"""
18 | @staticmethod
19 | def matches(url):
20 | # e.g. https://www.fanfiction.net/s/4109686/3/Taking-Sights
21 | match = re.match(r'^https?://(?:www|m)\.fanfiction\.net/s/(\d+)/?.*', url)
22 | if match:
23 | return 'https://www.fanfiction.net/s/' + match.group(1) + '/'
24 |
25 | def extract(self, url):
26 | soup, base = self._soup(url)
27 |
28 | content = soup.find(id="content_wrapper_inner")
29 | if not content:
30 | raise SiteException("No content")
31 |
32 | metadata = content.find(id='profile_top')
33 |
34 | story = Section(
35 | title=str(metadata.find('b', class_="xcontrast_txt").string),
36 | author=str(metadata.find('a', class_="xcontrast_txt").string),
37 | url=url
38 | )
39 |
40 | dates = content.find_all('span', attrs={'data-xutime': True})
41 | published = False
42 | updated = False
43 | if len(dates) == 1:
44 | published = datetime.datetime.fromtimestamp(int(dates[0]['data-xutime']))
45 | elif len(dates) == 2:
46 | updated = datetime.datetime.fromtimestamp(int(dates[0]['data-xutime']))
47 | published = datetime.datetime.fromtimestamp(int(dates[1]['data-xutime']))
48 |
49 | chapter_select = content.find(id="chap_select")
50 | if chapter_select:
51 | base_url = re.search(r'(https?://[^/]+/s/\d+/?)', url)
52 | if not base_url:
53 | raise SiteException("Can't find base URL for chapters")
54 | base_url = base_url.group(0)
55 |
56 | suffix = re.search(r"'(/[^']+)';", chapter_select.attrs['onchange'])
57 | if not suffix:
58 | raise SiteException("Can't find URL suffix for chapters")
59 | suffix = suffix.group(1)
60 |
61 | # beautiful soup doesn't handle ffn's unclosed option tags at all well here
62 | options = re.findall(r']*>([^<]+)', str(chapter_select))
63 | for option in options:
64 | story.add(Chapter(title=option[1], contents=self._chapter(base_url + option[0] + suffix), date=False))
65 |
66 | # fix up the dates
67 | story[-1].date = updated
68 | story[0].date = published
69 | else:
70 | story.add(Chapter(title=story.title, contents=self._chapter(url), date=published))
71 |
72 | self._finalize(story)
73 |
74 | return story
75 |
76 | def _chapter(self, url):
77 | logger.info("Fetching chapter @ %s", url)
78 | soup, base = self._soup(url)
79 |
80 | content = soup.find(id="content_wrapper_inner")
81 | if not content:
82 | raise SiteException("No chapter content")
83 |
84 | text = content.find(id="storytext")
85 | if not text:
86 | raise SiteException("No chapter content")
87 |
88 | # clean up some invalid xhtml attributes
89 | # TODO: be more selective about this somehow
90 | try:
91 | for tag in text.find_all(True):
92 | tag.attrs.clear()
93 | except Exception:
94 | logger.exception("Trouble cleaning attributes")
95 |
96 | self._clean(text, base)
97 |
98 | return text.prettify()
99 |
100 | def _soup(self, url, *args, **kwargs):
101 | if self._cloudflared:
102 | fallback = f"https://archive.org/wayback/available?url={urllib.parse.quote(url)}"
103 | try:
104 | response = self.session.get(fallback)
105 | wayback = response.json()
106 | closest = wayback['archived_snapshots']['closest']['url']
107 | return super()._soup(closest, *args, delay=1, **kwargs)
108 | except Exception:
109 | self.session.cache.delete_url(fallback)
110 | raise CloudflareException("Couldn't fetch, presumably because of Cloudflare protection, and falling back to archive.org failed; if some chapters were succeeding, try again?", url, fallback)
111 | try:
112 | return super()._soup(self, url, *args, **kwargs)
113 | except CloudflareException:
114 | self._cloudflared = True
115 | return self._soup(url, *args, **kwargs)
116 |
117 |
118 | @register
119 | class FictionPress(FanFictionNet):
120 | @staticmethod
121 | def matches(url):
122 | # e.g. https://www.fictionpress.com/s/2961893/1/Mother-of-Learning
123 | match = re.match(r'^https?://(?:www|m)\.fictionpress\.com/s/(\d+)/?.*', url)
124 | if match:
125 | return 'https://www.fictionpress.com/s/' + match.group(1) + '/'
126 |
--------------------------------------------------------------------------------
/sites/fictionlive.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import logging
4 | import itertools
5 | import datetime
6 | import re
7 | from . import register, Site, Section, Chapter
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | @register
13 | class FictionLive(Site):
14 | """fiction.live: it's... mostly smut, I think? Terrible smut. But, hey, I had a rec to follow."""
15 | @staticmethod
16 | def matches(url):
17 | # e.g. https://fiction.live/stories/Descendant-of-a-Demon-Lord/SBBA49fQavNQMWxFT
18 | match = re.match(r'^(https?://fiction\.live/(?:stories|Sci-fi)/[^\/]+/[0-9a-zA-Z\-]+)/?.*', url)
19 | if match:
20 | return match.group(1)
21 |
22 | def extract(self, url):
23 | workid = re.match(r'^https?://fiction\.live/(?:stories|Sci-fi)/[^\/]+/([0-9a-zA-Z\-]+)/?.*', url).group(1)
24 |
25 | response = self.session.get(f'https://fiction.live/api/node/{workid}').json()
26 |
27 | story = Section(
28 | title=response['t'],
29 | author=response['u'][0]['n'],
30 | # Could normalize the URL here from the returns, but I'd have to
31 | # go look up how they handle special characters in titles...
32 | url=url
33 | )
34 | # There's a summary (or similar) in `d` and `b`, if I want to use that later.
35 |
36 | # TODO: extract these #special ones and send them off to an endnotes section?
37 | chapters = ({'ct': 0},) + tuple(c for c in response['bm'] if not c['title'].startswith('#special')) + ({'ct': 9999999999999999},)
38 |
39 | for prevc, currc, nextc in contextiterate(chapters):
40 | # `id`, `title`, `ct`, `isFirst`
41 | # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/0/1448245168594
42 | # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/1449266444062/1449615394752
43 | # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/1502823848216/9999999999999998
44 | # i.e. format is [current timestamp] / [next timestamp - 1]
45 | chapter_url = f'https://fiction.live/api/anonkun/chapters/{workid}/{currc["ct"]}/{nextc["ct"] - 1}'
46 | logger.info("Extracting chapter \"%s\" @ %s", currc['title'], chapter_url)
47 | data = self.session.get(chapter_url).json()
48 | html = []
49 |
50 | updated = currc['ct']
51 | for segment in (d for d in data if not d.get('t', '').startswith('#special')):
52 | updated = max(updated, segment['ct'])
53 | # TODO: work out if this is actually enough types handled
54 | # There's at least also a reader post type, which mostly seems to be used for die rolls.
55 | try:
56 | if segment['nt'] == 'chapter':
57 | html.extend(('
', segment['b'].replace(' ', ' '), '
'))
58 | elif segment['nt'] == 'choice':
59 | if 'votes' not in segment:
60 | # Somehow, sometime, we end up with a choice without votes (or choices)
61 | continue
62 | votes = {}
63 | for vote in segment['votes']:
64 | votechoices = segment['votes'][vote]
65 | if isinstance(votechoices, str):
66 | # This caused issue #30, where for some reason one
67 | # choice on a story was a string rather than an
68 | # index into the choices array.
69 | continue
70 | if isinstance(votechoices, int):
71 | votechoices = (votechoices,)
72 | for choice in votechoices:
73 | if int(choice) < len(segment['choices']):
74 | # sometimes someone has voted for a presumably-deleted choice
75 | choice = segment['choices'][int(choice)]
76 | votes[choice] = votes.get(choice, 0) + 1
77 | choices = [(votes[v], v) for v in votes]
78 | choices.sort(reverse=True)
79 | html.append('
')
80 | for votecount, choice in choices:
81 | html.append(f'
{choice}: {votecount}
')
82 | html.append('
')
83 | elif segment['nt'] == 'readerPost':
84 | pass
85 | else:
86 | logger.info("Skipped chapter-segment of unhandled type: %s", segment['nt'])
87 | except Exception as e:
88 | logger.error("Skipped chapter-segment due to parsing error", exc_info=e)
89 |
90 | story.add(Chapter(
91 | title=currc['title'],
92 | contents='\n'.join(html),
93 | date=datetime.datetime.fromtimestamp(updated / 1000.0)
94 | ))
95 |
96 | self._finalize(story)
97 |
98 | return story
99 |
100 |
101 | # Stolen from the itertools docs
102 | def contextiterate(iterable):
103 | "s -> (s0,s1), (s1,s2), (s2, s3), ..."
104 | a, b, c = itertools.tee(iterable, 3)
105 | next(b, None)
106 | next(c, None)
107 | next(c, None)
108 | return zip(a, b, c)
109 |
--------------------------------------------------------------------------------
/sites/royalroad.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import http.client
4 | import logging
5 | import datetime
6 | import re
7 | from . import register, Site, Section, Chapter, SiteSpecificOption
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | @register
13 | class RoyalRoad(Site):
14 | domain = r'royalroad'
15 |
16 | @staticmethod
17 | def get_site_specific_option_defs():
18 | return Site.get_site_specific_option_defs() + [
19 | SiteSpecificOption(
20 | 'offset',
21 | '--offset',
22 | type=int,
23 | help="The chapter index to start in the chapter marks."
24 | ),
25 | SiteSpecificOption(
26 | 'limit',
27 | '--limit',
28 | type=int,
29 | help="The chapter to end at at in the chapter marks."
30 | ),
31 | ]
32 |
33 | """Royal Road: a place where people write novels, mostly seeming to be light-novel in tone."""
34 | @classmethod
35 | def matches(cls, url):
36 | # e.g. https://royalroad.com/fiction/6752/lament-of-the-fallen
37 | match = re.match(r'^(https?://(?:www\.)?%s\.com/fiction/\d+)/?.*' % cls.domain, url)
38 | if match:
39 | return match.group(1) + '/'
40 |
41 | def extract(self, url):
42 | workid = re.match(r'^https?://(?:www\.)?%s\.com/fiction/(\d+)/?.*' % self.domain, url).group(1)
43 | soup, base = self._soup(f'https://www.{self.domain}.com/fiction/{workid}')
44 | # should have gotten redirected, for a valid title
45 |
46 | original_maxheaders = http.client._MAXHEADERS
47 | http.client._MAXHEADERS = 1000
48 |
49 | story = Section(
50 | title=soup.find('h1').string.strip(),
51 | author=soup.find('meta', property='books:author').get('content').strip(),
52 | url=soup.find('meta', property='og:url').get('content').strip(),
53 | cover_url=self._join_url(base, soup.find('img', class_='thumbnail')['src']),
54 | summary=str(soup.find('div', class_='description')).strip(),
55 | tags=[tag.get_text().strip() for tag in soup.select('span.tags a.fiction-tag')]
56 | )
57 |
58 | for index, chapter in enumerate(soup.select('#chapters tbody tr[data-url]')):
59 | if self.options['offset'] and index < self.options['offset']:
60 | continue
61 | if self.options['limit'] and index >= self.options['limit']:
62 | continue
63 | chapter_url = str(self._join_url(story.url, str(chapter.get('data-url'))))
64 |
65 | contents, updated = self._chapter(chapter_url, len(story) + 1)
66 |
67 | story.add(Chapter(title=chapter.find('a', href=True).string.strip(), contents=contents, date=updated))
68 |
69 | http.client._MAXHEADERS = original_maxheaders
70 |
71 | self._finalize(story)
72 |
73 | return story
74 |
75 | def _chapter(self, url, chapterid):
76 | logger.info("Extracting chapter @ %s", url)
77 | soup, base = self._soup(url)
78 | content = soup.find('div', class_='chapter-content')
79 |
80 | self._clean(content, full_page=soup, base=base)
81 | self._clean_spoilers(content, chapterid)
82 |
83 | content = str(content)
84 |
85 | author_note = soup.find_all('div', class_='author-note-portlet')
86 |
87 | if len(author_note) == 1:
88 | # Find the parent of chapter-content and check if the author's note is the first child div
89 | if 'author-note-portlet' in soup.find('div', class_='chapter-content').parent.find('div')['class']:
90 | content = str(author_note[0]) + '' + content
91 | else: # The author note must be after the chapter content
92 | content = content + '' + str(author_note[0])
93 | elif len(author_note) == 2:
94 | content = str(author_note[0]) + '' + content + '' + str(author_note[1])
95 |
96 | updated = datetime.datetime.fromtimestamp(
97 | int(soup.find(class_="profile-info").find('time').get('unixtime'))
98 | )
99 |
100 | return content, updated
101 |
102 | def _clean(self, contents, full_page, base=False):
103 | contents = super()._clean(contents, base=base)
104 |
105 | # Royalroad has started inserting "this was stolen" notices into its
106 | # HTML, and hiding them with CSS. Currently the CSS is very easy to
107 | # find, so do so and filter them out.
108 | for style in full_page.find_all('style'):
109 | if m := re.match(r'\s*\.(\w+)\s*{[^}]*display:\s*none;[^}]*}', style.string):
110 | for warning in contents.find_all(class_=m.group(1)):
111 | warning.decompose()
112 |
113 | return contents
114 |
115 | def _clean_spoilers(self, content, chapterid):
116 | # Spoilers to footnotes
117 | for spoiler in content.find_all(class_=('spoiler-new')):
118 | spoiler_title = spoiler.get('data-caption')
119 | new_spoiler = self._new_tag('div', class_="leech-spoiler")
120 | if self.options['spoilers'] == 'skip':
121 | new_spoiler.append(spoiler_title and f'[SPOILER: {spoiler_title}]' or '[SPOILER]')
122 | elif self.options['spoilers'] == 'inline':
123 | if spoiler_title:
124 | new_spoiler.append(f"{spoiler_title}: ")
125 | new_spoiler.append(spoiler)
126 | else:
127 | link = self._footnote(spoiler, chapterid)
128 | if spoiler_title:
129 | link.string = spoiler_title
130 | new_spoiler.append(link)
131 | spoiler.replace_with(new_spoiler)
132 |
133 |
134 | @register
135 | class RoyalRoadL(RoyalRoad):
136 | domain = 'royalroadl'
137 |
--------------------------------------------------------------------------------
/sites/stash.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import logging
4 | import datetime
5 | import re
6 | from . import register, Site, SiteException, Section, Chapter
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | @register
12 | class Stash(Site):
13 | @staticmethod
14 | def matches(url):
15 | # Need a stack page
16 | match = re.match(r'^(https?://sta\.sh/2.+)/?.*', url)
17 | if match:
18 | return match.group(1) + '/'
19 |
20 | def extract(self, url):
21 | soup, base = self._soup(url)
22 | content = soup.find(id="stash-body")
23 | if not content:
24 | return
25 |
26 | # metadata = content.find(id='profile_top')
27 | story = Section(
28 | title=str(soup.find(class_="stash-folder-name").h2.string),
29 | author=str(soup.find('span', class_="oh-stashlogo-name").string).rstrip("'s"),
30 | url=url
31 | )
32 |
33 | thumbs = content.select(".stash-folder-stream .thumb")
34 | if not thumbs:
35 | return
36 | for thumb in thumbs:
37 | try:
38 | if thumb['href'] != '#':
39 | story.add(self._chapter(thumb['href']))
40 | except Exception:
41 | logger.exception("Couldn't extract chapters from thumbs")
42 |
43 | self._finalize(story)
44 |
45 | return story
46 |
47 | def _chapter(self, url):
48 | logger.info("Fetching chapter @ %s", url)
49 | soup, base = self._soup(url)
50 |
51 | content = soup.find(class_="journal-wrapper")
52 | if not content:
53 | raise SiteException("No content")
54 |
55 | title = str(content.find(class_="gr-top").find(class_='metadata').h2.a.string)
56 |
57 | text = content.find(class_="text")
58 |
59 | # clean up some invalid xhtml attributes
60 | # TODO: be more selective about this somehow
61 | try:
62 | for tag in text.find_all(True):
63 | tag.attrs = None
64 | except Exception as e:
65 | raise SiteException("Trouble cleaning attributes", e)
66 |
67 | self._clean(text, base)
68 |
69 | return Chapter(title=title, contents=text.prettify(), date=self._date(soup))
70 |
71 | def _date(self, soup):
72 | maybe_date = soup.find('div', class_="dev-metainfo-details").find('span', ts=True)
73 | return datetime.datetime.fromtimestamp(int(maybe_date['ts']))
74 |
--------------------------------------------------------------------------------
/sites/wattpad.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import logging
4 | import datetime
5 | import re
6 | from . import register, Site, Section, Chapter
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | @register
12 | class Wattpad(Site):
13 | """Wattpad"""
14 | @classmethod
15 | def matches(cls, url):
16 | # e.g. https://www.wattpad.com/story/208753031-summoned-to-have-tea-with-the-demon-lord-i-guess
17 | # chapter URLs are e.g. https://www.wattpad.com/818687865-summoned-to-have-tea-with-the-demon-lord-i-guess
18 | match = re.match(r'^(https?://(?:www\.)?wattpad\.com/story/\d+)?.*', url)
19 | if match:
20 | # the story-title part is unnecessary
21 | return match.group(1)
22 |
23 | def extract(self, url):
24 | workid = re.match(r'^https?://(?:www\.)?wattpad\.com/story/(\d+)?.*', url).group(1)
25 | info = self.session.get(f"https://www.wattpad.com/api/v3/stories/{workid}").json()
26 |
27 | story = Section(
28 | title=info['title'],
29 | author=info['user']['name'],
30 | url=url,
31 | cover_url=info['cover']
32 | )
33 |
34 | for chapter in info['parts']:
35 | story.add(Chapter(
36 | title=chapter['title'],
37 | contents=self._chapter(chapter['id']),
38 | # "2020-05-03T22:14:29Z"
39 | date=datetime.datetime.fromisoformat(chapter['createDate'].rstrip('Z')) # modifyDate also?
40 | ))
41 |
42 | self._finalize(story)
43 |
44 | return story
45 |
46 | def _chapter(self, chapterid):
47 | logger.info(f"Extracting chapter @ {chapterid}")
48 | api = self.session.get(f"https://www.wattpad.com/apiv2/storytext?id={chapterid}")
49 | return '
' + api.text + '
'
50 |
--------------------------------------------------------------------------------
/sites/xenforo.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import datetime
4 | import re
5 | import logging
6 | import requests_cache
7 |
8 | from . import Site, SiteException, SiteSpecificOption, Section, Chapter
9 | import mintotp
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | class XenForo(Site):
15 | """XenForo is forum software that powers a number of fiction-related forums."""
16 |
17 | domain = False
18 | index_urls = False
19 |
20 | @staticmethod
21 | def get_site_specific_option_defs():
22 | return Site.get_site_specific_option_defs() + [
23 | SiteSpecificOption(
24 | 'include_index',
25 | '--include-index/--no-include-index',
26 | default=False,
27 | help="If true, the post marked as an index will be included as a chapter."
28 | ),
29 | SiteSpecificOption(
30 | 'offset',
31 | '--offset',
32 | type=int,
33 | help="The chapter index to start in the chapter marks."
34 | ),
35 | SiteSpecificOption(
36 | 'limit',
37 | '--limit',
38 | type=int,
39 | help="The chapter to end at at in the chapter marks."
40 | ),
41 | ]
42 |
43 | @classmethod
44 | def matches(cls, url):
45 | match = re.match(r'^(https?://%s/(?:index\.php\?)?threads/[^/]*\d+/(?:\d+/)?reader)/?.*' % cls.domain, url)
46 | if match:
47 | return match.group(1)
48 | match = re.match(r'^(https?://%s/(?:index\.php\?)?threads/[^/]*\d+)/?.*' % cls.domain, url)
49 | if match:
50 | return match.group(1) + '/'
51 |
52 | def siteurl(self, path):
53 | if self.index_urls:
54 | return f'https://{self.domain}/index.php?{path}'
55 | return f'https://{self.domain}/{path}'
56 |
57 | def login(self, login_details):
58 | with requests_cache.disabled():
59 | # Can't just pass this url to _soup because I need the cookies later
60 | login = self.session.get(self.siteurl('login/'))
61 | soup, nobase = self._soup(login.text)
62 | post, action, method = self._form_data(soup.find(class_='p-body-content'))
63 | post['login'] = login_details[0]
64 | post['password'] = login_details[1]
65 | # I feel the session *should* handle this cookies bit for me. But
66 | # it doesn't. And I don't know why.
67 | result = self.session.post(
68 | self._join_url(login.url, action),
69 | data=post, cookies=login.cookies
70 | )
71 | if not result.ok:
72 | return logger.error("Failed to log in as %s", login_details[0])
73 | soup, nobase = self._soup(result.text)
74 | if twofactor := soup.find('form', action="/login/two-step"):
75 | if len(login_details) < 3:
76 | return logger.error("Failed to log in as %s; login requires 2FA secret", login_details[0])
77 | post, action, method = self._form_data(twofactor)
78 | post['code'] = mintotp.totp(login_details[2])
79 | result = self.session.post(
80 | self._join_url(login.url, action),
81 | data=post, cookies=login.cookies
82 | )
83 | if not result.ok:
84 | return logger.error("Failed to log in as %s; 2FA failed", login_details[0])
85 | logger.info("Logged in as %s", login_details[0])
86 |
87 | def extract(self, url):
88 | soup, base = self._soup(url)
89 |
90 | story = self._base_story(soup)
91 |
92 | threadmark_categories = {}
93 | # Note to self: in the source this is data-categoryId, but the parser
94 | # in bs4 lowercases tags and attributes...
95 | for cat in soup.find_all('a', attrs={'data-categoryid': True}):
96 | threadmark_categories[int(cat['data-categoryid'])] = cat['title']
97 |
98 | if url.endswith('/reader'):
99 | reader_url = url
100 | elif soup.find('a', class_='readerToggle'):
101 | reader_url = soup.find('a', class_='readerToggle').get('href')
102 | elif soup.find('div', class_='threadmarks-reader'):
103 | # Technically this is the xenforo2 bit, but :shrug:
104 | reader_url = soup.find('div', class_='threadmarks-reader').find('a').get('href')
105 | else:
106 | reader_url = False
107 |
108 | if reader_url:
109 | match = re.search(r'\d+/(\d+)/reader', reader_url)
110 | if match:
111 | cat = int(match.group(1))
112 | if cat != 1 and cat in threadmark_categories:
113 | story.title = f'{story.title} ({threadmark_categories[cat]})'
114 | idx = 0
115 | while reader_url:
116 | reader_url = self._join_url(base, reader_url)
117 | logger.info("Fetching chapters @ %s", reader_url)
118 | reader_soup, reader_base = self._soup(reader_url)
119 | posts = self._posts_from_page(reader_soup)
120 |
121 | for post in posts:
122 | idx = idx + 1
123 | if self.options['offset'] and idx < self.options['offset']:
124 | continue
125 | if self.options['limit'] and idx >= self.options['limit']:
126 | continue
127 | title = self._threadmark_title(post)
128 | logger.info("Extracting chapter \"%s\"", title)
129 |
130 | story.add(Chapter(
131 | title=title,
132 | contents=self._clean_chapter(post, len(story) + 1, base),
133 | date=self._post_date(post)
134 | ))
135 |
136 | reader_url = False
137 | if reader_soup.find('link', rel='next'):
138 | reader_url = reader_soup.find('link', rel='next').get('href')
139 | else:
140 | # TODO: Research whether reader mode is guaranteed to be enabled
141 | # when threadmarks are; if so, can delete this branch.
142 | marks = [
143 | mark for mark in self._chapter_list(url)
144 | if '/members' not in mark.get('href') and '/threadmarks' not in mark.get('href')
145 | ]
146 | marks = marks[self.options['offset']:self.options['limit']]
147 |
148 | for idx, mark in enumerate(marks, 1):
149 | href = self._join_url(base, mark.get('href'))
150 | title = str(mark.string).strip()
151 | logger.info("Fetching chapter \"%s\" @ %s", title, href)
152 | contents, post_date = self._chapter(href, idx)
153 | chapter = Chapter(title=title, contents=contents, date=post_date)
154 | story.add(chapter)
155 |
156 | self._finalize(story)
157 |
158 | return story
159 |
160 | def _base_story(self, soup):
161 | url = soup.find('meta', property='og:url').get('content')
162 | title = soup.select('div.titleBar > h1')[0]
163 | # clean out informational bits from the title
164 | for tag in title.find_all(class_='prefix'):
165 | tag.decompose()
166 | tags = [tag.get_text().strip() for tag in soup.select('div.tagBlock a.tag')]
167 | return Section(
168 | title=title.get_text().strip(),
169 | author=soup.find('p', id='pageDescription').find('a', class_='username').get_text(),
170 | url=url,
171 | tags=tags
172 | )
173 |
174 | def _posts_from_page(self, soup, postid=False):
175 | if postid:
176 | return soup.find('li', id='post-' + postid)
177 | return soup.select('#messageList > li.hasThreadmark')
178 |
179 | def _threadmark_title(self, post):
180 | # Get the title, removing "Threadmark:" which precedes it
181 | return ''.join(post.select('div.threadmarker > span.label')[0].findAll(text=True, recursive=False)).strip()
182 |
183 | def _chapter_list(self, url):
184 | try:
185 | return self._chapter_list_threadmarks(url)
186 | except SiteException as e:
187 | logger.debug("Tried threadmarks (%r)", e.args)
188 | return self._chapter_list_index(url)
189 |
190 | def _chapter_list_threadmarks(self, url):
191 | soup, base = self._soup(url)
192 |
193 | threadmarks_link = soup.find(class_="threadmarksTrigger", href=True)
194 | if not threadmarks_link:
195 | try:
196 | threadmarks_link = soup.select('.threadmarkMenus a.OverlayTrigger')[0]
197 | except IndexError:
198 | pass
199 |
200 | if not threadmarks_link:
201 | raise SiteException("No threadmarks")
202 |
203 | href = threadmarks_link.get('href')
204 | soup, base = self._soup(self._join_url(base, href))
205 |
206 | fetcher = soup.find(class_='ThreadmarkFetcher')
207 | while fetcher:
208 | # ThreadmarksPro, hiding some threadmarks. Means the API is available to do this.
209 | # Note: the fetched threadmarks can contain more placeholder elements to fetch. Ergo, loop.
210 | # Good test case: https://forums.sufficientvelocity.com/threads/ignition-mtg-multicross-planeswalker-pc.26099/threadmarks
211 | # e.g.:
212 | response = self.session.post(self.siteurl('threads/threadmarks/load-range'), data={
213 | # I did try a fetch on min/data-min+data-max, but there seems
214 | # to be an absolute limit which the API fetch won't override
215 | 'min': fetcher.get('data-range-min'),
216 | 'max': fetcher.get('data-range-max'),
217 | 'thread_id': fetcher.get('data-thread-id'),
218 | 'category_id': fetcher.get('data-category-id'),
219 | '_xfResponseType': 'json',
220 | }).json()
221 | responseSoup, nobase = self._soup(response['templateHtml'])
222 | fetcher.replace_with(responseSoup)
223 | fetcher = soup.find(class_='ThreadmarkFetcher')
224 |
225 | marks = soup.find(class_='threadmarks').select('li.primaryContent.threadmarkListItem a, li.primaryContent.threadmarkItem a')
226 | if not marks:
227 | raise SiteException("No marks on threadmarks page")
228 |
229 | return marks
230 |
231 | def _chapter_list_index(self, url):
232 | post = self._post_from_url(url)
233 | if not post:
234 | raise SiteException("Unparseable post URL", url)
235 |
236 | links = post.find('blockquote', class_='messageText').find_all('a', class_='internalLink')
237 | if not links:
238 | raise SiteException("No links in index?")
239 |
240 | if self.options['include_index']:
241 | fake_link = self._new_tag('a', href=url)
242 | fake_link.string = "Index"
243 | links.insert(0, fake_link)
244 |
245 | return links
246 |
247 | def _chapter(self, url, chapterid):
248 | post, base = self._post_from_url(url)
249 |
250 | return self._clean_chapter(post, chapterid, base), self._post_date(post)
251 |
252 | def _post_from_url(self, url):
253 | # URLs refer to specific posts, so get just that one
254 | # if no specific post referred to, get the first one
255 | match = re.search(r'posts/(\d+)/?', url)
256 | if not match:
257 | match = re.match(r'.+#post-(\d+)$', url)
258 | # could still be nothing here
259 | postid = match and match.group(1)
260 | if postid:
261 | # create a proper post-url, because threadmarks can sometimes
262 | # mess up page-wise with anchors
263 | url = self.siteurl(f'posts/{postid}/')
264 | soup, base = self._soup(url, 'lxml')
265 |
266 | if postid:
267 | return self._posts_from_page(soup, postid), base
268 |
269 | # just the first one in the thread, then
270 | return soup.find('li', class_='message'), base
271 |
272 | def _chapter_contents(self, post):
273 | return post.find('blockquote', class_='messageText')
274 |
275 | def _clean_chapter(self, post, chapterid, base):
276 | post = self._chapter_contents(post)
277 | post.name = 'div'
278 | # mostly, we want to remove colors because the Kindle is terrible at them
279 | # TODO: find a way to denote colors, because it can be relevant
280 | # TODO: at least invisitext, because outside of silly DC Lantern stuff, it's the most common
281 | for tag in post.find_all(style=True):
282 | if tag['style'] == 'color: transparent' and tag.text == 'TAB':
283 | # Some stories fake paragraph indents like this. The output
284 | # stylesheet will handle this just fine.
285 | tag.decompose()
286 | else:
287 | # There's a few things which xenforo does as styles, despite there being perfectly good tags
288 | # TODO: more robust CSS parsing? This is very whitespace dependent, if nothing else.
289 | if "font-family: 'Courier New'" in tag['style']:
290 | tag.wrap(self._new_tag('code'))
291 | if "text-decoration: strikethrough" in tag['style']:
292 | tag.wrap(self._new_tag('strike'))
293 | if "margin-left" in tag['style']:
294 | continue
295 | del tag['style']
296 | for tag in post.select('.quoteExpand, .bbCodeBlock-expandLink, .bbCodeBlock-shrinkLink'):
297 | tag.decompose()
298 | for tag in post.find_all('noscript'):
299 | # TODO: strip the noscript from these?
300 | # mostly this will be the lazyload images
301 | tag.decompose()
302 | for tag in post.select('img.lazyload[data-src]'):
303 | tag['src'] = tag['data-url']
304 | if tag['src'].startswith('proxy.php'):
305 | tag['src'] = f"{self.domain}/{tag['src']}"
306 | self._clean(post, base)
307 | self._clean_spoilers(post, chapterid)
308 | return post.prettify()
309 |
310 | def _clean_spoilers(self, post, chapterid):
311 | # spoilers don't work well, so turn them into epub footnotes
312 | for spoiler in post.find_all(class_='ToggleTriggerAnchor'):
313 | spoiler_title = spoiler.find(class_='SpoilerTitle')
314 | if self.options['skip_spoilers']:
315 | link = self._footnote(spoiler.find(class_='SpoilerTarget').extract(), chapterid)
316 | if spoiler_title:
317 | link.string = spoiler_title.get_text()
318 | else:
319 | if spoiler_title:
320 | link = f'[SPOILER: {spoiler_title.get_text()}]'
321 | else:
322 | link = '[SPOILER]'
323 | new_spoiler = self._new_tag('div', class_="leech-spoiler")
324 | new_spoiler.append(link)
325 | spoiler.replace_with(new_spoiler)
326 |
327 | def _post_date(self, post):
328 | maybe_date = post.find(class_='DateTime')
329 | if 'data-time' in maybe_date.attrs:
330 | return datetime.datetime.fromtimestamp(int(maybe_date['data-time']))
331 | if 'title' in maybe_date.attrs:
332 | # title="Feb 24, 2015 at 1:17 PM"
333 | return datetime.datetime.strptime(maybe_date['title'], "%b %d, %Y at %I:%M %p")
334 | raise SiteException("No date", maybe_date)
335 |
336 |
337 | class XenForoIndex(XenForo):
338 | @classmethod
339 | def matches(cls, url):
340 | match = re.match(r'^(https?://%s/posts/\d+)/?.*' % cls.domain, url)
341 | if match:
342 | return match.group(1) + '/'
343 |
344 | def _chapter_list(self, url):
345 | return self._chapter_list_index(url)
346 |
--------------------------------------------------------------------------------
/sites/xenforo2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import datetime
4 | import logging
5 |
6 | from . import register, Section, SiteException
7 | from .xenforo import XenForo, XenForoIndex
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | class XenForo2(XenForo):
13 | def _base_story(self, soup):
14 | url = soup.find('meta', property='og:url').get('content')
15 | title = soup.select('h1.p-title-value')[0]
16 | # clean out informational bits from the title
17 | for tag in title.select('.labelLink,.label-append'):
18 | tag.decompose()
19 | tags = [tag.get_text().strip() for tag in soup.select('.tagList a.tagItem')]
20 | return Section(
21 | title=title.get_text().strip(),
22 | author=soup.find('div', class_='p-description').find('a', class_='username').get_text(),
23 | url=url,
24 | tags=tags
25 | )
26 |
27 | def _posts_from_page(self, soup, postid=False):
28 | if postid:
29 | return soup.find('article', id='js-post-' + postid)
30 | return soup.select('article.message--post')
31 |
32 | def _threadmark_title(self, post):
33 | # Get the title, removing "Threadmark:" which precedes it
34 | return post.find('span', class_='threadmarkLabel').get_text()
35 |
36 | def _chapter_contents(self, post):
37 | return post.find('div', class_='message-userContent')
38 |
39 | def _clean_spoilers(self, post, chapterid):
40 | # spoilers don't work well, so turn them into epub footnotes
41 | for spoiler in post.find_all(class_='bbCodeSpoiler'):
42 | spoiler_title = spoiler.find(class_='bbCodeSpoiler-button-title')
43 | spoiler_contents = spoiler.find(class_='bbCodeBlock-content').extract()
44 | new_spoiler = self._new_tag('div', class_="leech-spoiler")
45 | if self.options['spoilers'] == 'skip':
46 | new_spoiler.append(spoiler_title and f'[SPOILER: {spoiler_title.get_text()}]' or '[SPOILER]')
47 | elif self.options['spoilers'] == 'inline':
48 | if spoiler_title:
49 | new_spoiler.append(f"{spoiler_title.get_text()}: ")
50 | new_spoiler.append(spoiler_contents)
51 | else:
52 | link = self._footnote(spoiler_contents, chapterid)
53 | if spoiler_title:
54 | link.string = spoiler_title.get_text()
55 | new_spoiler.append(link)
56 | spoiler.replace_with(new_spoiler)
57 |
58 | def _post_date(self, post):
59 | if post.find('time'):
60 | return datetime.datetime.fromtimestamp(int(post.find('time').get('data-time')))
61 | raise SiteException("No date")
62 |
63 |
64 | @register
65 | class SpaceBattles(XenForo2):
66 | domain = 'forums.spacebattles.com'
67 |
68 |
69 | @register
70 | class SpaceBattlesIndex(SpaceBattles, XenForoIndex):
71 | _key = "SpaceBattles"
72 |
73 |
74 | @register
75 | class SufficientVelocity(XenForo2):
76 | domain = 'forums.sufficientvelocity.com'
77 |
78 |
79 | @register
80 | class TheSietch(XenForo2):
81 | domain = 'www.the-sietch.com'
82 | index_urls = True
83 |
84 |
85 | @register
86 | class QuestionableQuesting(XenForo2):
87 | domain = 'forum.questionablequesting.com'
88 |
89 |
90 | @register
91 | class QuestionableQuestingIndex(QuestionableQuesting, XenForoIndex):
92 | _key = "QuestionableQuesting"
93 |
94 |
95 | @register
96 | class AlternateHistory(XenForo2):
97 | domain = 'www.alternatehistory.com/forum'
98 |
99 |
100 | @register
101 | class AlternateHistoryIndex(AlternateHistory, XenForoIndex):
102 | _key = "AlternateHistory"
103 |
--------------------------------------------------------------------------------