├── .editorconfig ├── .flake8 ├── .github └── workflows │ └── python-package-poetry.yml ├── .gitignore ├── Dockerfile ├── LICENSE.txt ├── README.markdown ├── ebook ├── __init__.py ├── cover.py ├── epub.py └── image.py ├── examples ├── cultivationchatgroup.json ├── dungeonkeeperami.json ├── fifthdefiance.json ├── heretical-edge-2.json ├── heretical-edge.json ├── pact.json ├── paeantosmac.json ├── pale-lights.json ├── pale-withextras.json ├── pale.json ├── phoenixdestiny.json ├── practical1.json ├── practical2.json ├── practical3.json ├── practical4.json ├── practical5.json ├── practical6.json ├── practical7.json ├── practicalall.json ├── practicalextra.json ├── sagaofsoul.json ├── shouldthesun.json ├── thegodsarebastards.json ├── twig.json ├── unsong.json ├── vacantthrone.json ├── wanderinginn.json ├── ward.json └── worm.json ├── leech.py ├── poetry.lock ├── pyproject.toml └── sites ├── __init__.py ├── ao3.py ├── arbitrary.py ├── deviantart.py ├── fanfictionnet.py ├── fictionlive.py ├── royalroad.py ├── stash.py ├── wattpad.py ├── xenforo.py └── xenforo2.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | insert_final_newline = true 6 | trim_trailing_whitespace = true 7 | 8 | [*.py] 9 | indent_style = space 10 | indent_size = 4 11 | charset = utf-8 12 | 13 | [{package.json,.travis.yml}] 14 | indent_style = space 15 | indent_size = 2 16 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | extend_ignore = 3 | # E128 continuation line under-indented for visual indent 4 | # E128, 5 | # E501 line too long 6 | E501 7 | exclude = .git,__pycache__,venv 8 | -------------------------------------------------------------------------------- /.github/workflows/python-package-poetry.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install tooling 27 | run: | 28 | python -m ensurepip 29 | python -m pip install --upgrade pip 30 | python -m pip install flake8 poetry 31 | - name: Install dependencies 32 | run: | 33 | poetry install 34 | - name: Lint with flake8 35 | run: | 36 | flake8 . 37 | - name: Make sure help runs 38 | run: | 39 | poetry run leech --help 40 | - name: Build a cover 41 | run: | 42 | poetry run python -m 'ebook.cover' && file -E output.png && rm output.png 43 | - name: Verify poetry build 44 | run: | 45 | poetry build && ls -og dist/* 46 | - name: eclint 47 | uses: snow-actions/eclint@v1.0.1 48 | with: 49 | args: 'check *.py sites/*.py' 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.epub 2 | *.mobi 3 | ./*.json 4 | leech.db 5 | leech.sqlite 6 | leech.cookies 7 | leech.json 8 | venv/ 9 | .venv 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | bin/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | .tox/ 39 | .coverage 40 | .cache 41 | nosetests.xml 42 | coverage.xml 43 | 44 | # Translations 45 | *.mo 46 | 47 | # Mr Developer 48 | .mr.developer.cfg 49 | .project 50 | .pydevproject 51 | 52 | # Rope 53 | .ropeproject 54 | 55 | # Django stuff: 56 | *.log 57 | *.pot 58 | 59 | # Sphinx documentation 60 | docs/_build/ 61 | 62 | # Pycharm 63 | .idea/ 64 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:latest 2 | 3 | # Package list taken from Pillow documentation: 4 | # https://pillow.readthedocs.io/en/stable/installation.html#building-on-linux 5 | RUN apk add tiff-dev jpeg-dev openjpeg-dev zlib-dev freetype-dev lcms2-dev \ 6 | libwebp-dev tcl-dev tk-dev harfbuzz-dev fribidi-dev libimagequant-dev \ 7 | libxcb-dev libpng-dev gcc musl-dev python3 python3-dev py3-pip py3-cryptography 8 | RUN pip3 config set global.break-system-packages true && pip3 install poetry 9 | 10 | COPY . /leech 11 | 12 | RUN cd /leech \ 13 | && poetry config virtualenvs.create false \ 14 | && poetry install --without dev 15 | 16 | WORKDIR /work 17 | 18 | ENTRYPOINT ["/leech/leech.py"] 19 | 20 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2013-2017 David Lynch 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | Leech 2 | === 3 | 4 | Let's say you want to read some sort of fiction. You're a fan of it, perhaps. But mobile websites are kind of non-ideal, so you'd like a proper ebook made from whatever you're reading. 5 | 6 | Setup 7 | --- 8 | 9 | You need Python 3.9+ and poetry. 10 | 11 | My recommended setup process is: 12 | 13 | $ pip install poetry 14 | $ poetry install 15 | $ poetry shell 16 | 17 | ...adjust as needed. Just make sure the dependencies from `pyproject.toml` get installed somehow. 18 | 19 | Usage 20 | --- 21 | 22 | Basic 23 | 24 | $ python3 leech.py [[URL]] 25 | 26 | A new file will appear named `Title of the Story.epub`. 27 | 28 | This is equivalent to the slightly longer 29 | 30 | $ python3 leech.py download [[URL]] 31 | 32 | Flushing the cache 33 | 34 | $ python3 leech.py flush 35 | 36 | Learn about other options 37 | 38 | $ python3 leech.py --help 39 | 40 | If you want to put an ePub on a Kindle you'll have to either use Amazon's send-to-kindle tools or convert it. For the latter I'd recommend [Calibre](http://calibre-ebook.com/), though you could also try using [kindlegen](http://www.amazon.com/gp/feature.html?docId=1000765211) directly. 41 | 42 | Supports 43 | --- 44 | 45 | * Fanfiction.net 46 | * FictionPress 47 | * ArchiveOfOurOwn 48 | * Yes, it has its own built-in EPUB export, but the formatting is horrible 49 | * Various XenForo-based sites: SpaceBattles and SufficientVelocity, most notably 50 | * RoyalRoad 51 | * Fiction.live (Anonkun) 52 | * DeviantArt galleries/collections 53 | * Sta.sh 54 | * Completely arbitrary sites, with a bit more work (see below) 55 | 56 | Configuration 57 | --- 58 | 59 | A very small amount of configuration is possible by creating a file called `leech.json` in the project directory. Currently you can define login information for sites that support it, and some options for book covers. 60 | 61 | Example: 62 | 63 | ``` 64 | { 65 | "logins": { 66 | "QuestionableQuesting": ["username", "password"] 67 | }, 68 | "images": { 69 | "image_fetch": true, 70 | "image_format": "png", 71 | "compress_images": true, 72 | "max_image_size": 100000, 73 | "always_convert_images": true 74 | }, 75 | "cover": { 76 | "fontname": "Comic Sans MS", 77 | "fontsize": 30, 78 | "bgcolor": [20, 120, 20], 79 | "textcolor": [180, 20, 180], 80 | "cover_url": "https://website.com/image.png" 81 | }, 82 | "output_dir": "/tmp/ebooks", 83 | "site_options": { 84 | "RoyalRoad": { 85 | "output_dir": "/tmp/litrpg_isekai_trash", 86 | "image_fetch": false 87 | } 88 | } 89 | } 90 | ``` 91 | > Note: The `image_fetch` key is a boolean and can only be `true` or `false`. Booleans in JSON are written in lowercase. 92 | > If it is `false`, Leech will not download any images. 93 | > Leech will also ignore the `image_format` key if `images` is `false`. 94 | 95 | > Note: If the `image_format` key does not exist, Leech will default to `jpeg`. 96 | > The three image formats are `jpeg`, `png`, and `gif`. The `image_format` key is case-insensitive. 97 | 98 | > Note: The `compress_images` key tells Leech to compress images. This is only supported for `jpeg` and `png` images. 99 | > This also goes hand-in-hand with the `max_image_size` key. If the `compress_images` key is `true` but there's no `max_image_size` key, 100 | > Leech will compress the image to a size less than 1MB (1000000 bytes). If the `max_image_size` key is present, Leech will compress the image 101 | > to a size less than the value of the `max_image_size` key. The `max_image_size` key is in bytes. 102 | > If `compress_images` is `false`, Leech will ignore the `max_image_size` key. 103 | 104 | > Warning: Compressing images might make Leech take a lot longer to download images. 105 | 106 | > Warning: Compressing images might make the image quality worse. 107 | 108 | > Warning: `max_image_size` is not a hard limit. Leech will try to compress the image to the size of the `max_image_size` key, but Leech might 109 | > not be able to compress the image to the exact size of the `max_image_size` key. 110 | 111 | > Warning: `max_image_size` should not be too small. For instance, if you set `max_image_size` to 1000, Leech will probably not be able to 112 | > compress the image to 1000 bytes. If you set `max_image_size` to 1000000, Leech will probably be able to compress the image to 1000000 bytes. 113 | 114 | > Warning: Leech will not compress GIFs, that might damage the animation. 115 | 116 | > Note: if `always_convert_images` is `true`, Leech will convert all non-GIF images to the specified `image_format`. 117 | 118 | Arbitrary Sites 119 | --- 120 | 121 | If you want to just download a one-off story from a site, you can create a definition file to describe it. This requires investigation and understanding of things like CSS selectors, which may take some trial and error. 122 | 123 | Example `practical.json`: 124 | 125 | ``` 126 | { 127 | "url": "https://practicalguidetoevil.wordpress.com/table-of-contents/", 128 | "title": "A Practical Guide To Evil: Book 1", 129 | "author": "erraticerrata", 130 | "chapter_selector": "#main .entry-content > ul:nth-of-type(1) > li > a", 131 | "content_selector": "#main .entry-content", 132 | "filter_selector": ".sharedaddy, .wpcnt, style", 133 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" 134 | } 135 | ``` 136 | 137 | Run as: 138 | 139 | $ ./leech.py practical.json 140 | 141 | This tells leech to load `url`, follow the links described by `chapter_selector`, extract the content from those pages as described by `content_selector`, and remove any content from *that* which matches `filter_selector`. Optionally, `cover_url` will replace the default cover with the image of your choice. 142 | 143 | If `chapter_selector` isn't given, it'll create a single-chapter book by applying `content_selector` to `url`. 144 | 145 | This is a fairly viable way to extract a story from, say, a random Wordpress installation with a convenient table of contents. It's relatively likely to get you at least *most* of the way to the ebook you want, with maybe some manual editing needed. 146 | 147 | A more advanced example with JSON would be: 148 | 149 | ``` 150 | { 151 | "url": "https://practicalguidetoevil.wordpress.com/2015/03/25/prologue/", 152 | "title": "A Practical Guide To Evil: Book 1", 153 | "author": "erraticerrata", 154 | "content_selector": "#main .entry-wrapper", 155 | "content_title_selector": "h1.entry-title", 156 | "content_text_selector": ".entry-content", 157 | "filter_selector": ".sharedaddy, .wpcnt, style", 158 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])", 159 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" 160 | } 161 | ``` 162 | 163 | Because there's no `chapter_selector` here, leech will keep on looking for a link which it can find with `next_selector` and following that link. We also see more advanced metadata acquisition here, with `content_title_selector` and `content_text_selector` being used to find specific elements from within the content. 164 | 165 | If multiple matches for `content_selector` are found, leech will assume multiple chapters are present on one page, and will handle that. If you find a story that you want on a site which has all the chapters in the right order and next-page links, this is a notably efficient way to download it. See `examples/dungeonkeeperami.json` for this being used. 166 | 167 | If you need more advanced behavior, consider looking at... 168 | 169 | Adding new site handlers 170 | --- 171 | 172 | To add support for a new site, create a file in the `sites` directory that implements the `Site` interface. Take a look at `ao3.py` for a minimal example of what you have to do. 173 | 174 | Images support 175 | --- 176 | 177 | Leech creates EPUB 2.01 files, which means that Leech can only save images in the following 178 | format: 179 | - JPEG (JPG/JFIF) 180 | - PNG 181 | - GIF 182 | 183 | See the [Open Publication Structure (OPS) 2.0.1](https://idpf.org/epub/20/spec/OPS_2.0.1_draft.htm#TOC2.3.4) for more information. 184 | 185 | Leech can not save images in SVG because it is not supported by Pillow. 186 | 187 | Leech uses [Pillow](https://pillow.readthedocs.io/en/stable/index.html) for image manipulation and conversion. If you want to use a different 188 | image format, you can install the required dependencies for Pillow and you will probably have to tinker with Leech. See the [Pillow documentation](https://pillow.readthedocs.io/en/stable/installation.html#external-libraries) for more information. 189 | 190 | To configure image support, you will need to create a file called `leech.json`. See the section below for more information. 191 | 192 | Docker 193 | --- 194 | 195 | You can build the project's Docker container like this: 196 | 197 | ```shell 198 | docker build . -t kemayo/leech:snapshot 199 | ``` 200 | 201 | The container's entrypoint runs `leech` directly and sets the current working directory to `/work`, so you can mount any directory there: 202 | 203 | ```shell 204 | docker run -it --rm -v ${DIR}:/work kemayo/leech:snapshot download [[URL]] 205 | ``` 206 | 207 | Contributing 208 | --- 209 | 210 | If you submit a pull request to add support for another reasonably-general-purpose site, I will nigh-certainly accept it. 211 | 212 | Run [EpubCheck](https://github.com/IDPF/epubcheck) on epubs you generate to make sure they're not breaking. 213 | -------------------------------------------------------------------------------- /ebook/__init__.py: -------------------------------------------------------------------------------- 1 | from .epub import make_epub, EpubFile 2 | from .cover import make_cover, make_cover_from_url 3 | from .image import get_image_from_url 4 | 5 | import html 6 | import unicodedata 7 | import datetime 8 | from attrs import define, asdict 9 | 10 | html_template = ''' 11 | 12 | 13 | {title} 14 | 15 | 16 | 17 |

{title}

18 | {text} 19 | 20 | 21 | ''' 22 | 23 | cover_template = ''' 24 | 25 | 26 | Cover 27 | 28 | 29 | 30 |
31 | 33 | 34 | 35 |
36 | 37 | 38 | ''' 39 | 40 | frontmatter_template = ''' 41 | 42 | 43 | Front Matter 44 | 45 | 46 | 47 |
48 |

{title}
By {author}

49 |
50 |
Source
51 |
{unique_id}
52 |
Started
53 |
{started:%Y-%m-%d}
54 |
Updated
55 |
{updated:%Y-%m-%d}
56 |
Downloaded on
57 |
{now:%Y-%m-%d}
58 | {extra} 59 |
60 |
61 | 62 | 63 | ''' 64 | 65 | 66 | @define 67 | class CoverOptions: 68 | fontname: str = None 69 | fontsize: int = None 70 | width: int = None 71 | height: int = None 72 | wrapat: int = None 73 | bgcolor: tuple = None 74 | textcolor: tuple = None 75 | cover_url: str = None 76 | 77 | 78 | @define 79 | class ImageOptions: 80 | image_fetch: bool = False 81 | image_format: str = "JPEG" 82 | always_convert_images: bool = False 83 | compress_images: bool = False 84 | max_image_size: int = 1_000_000 85 | 86 | 87 | def chapter_html( 88 | story, 89 | image_options, 90 | titleprefix=None, 91 | normalize=False, 92 | session=None 93 | ): 94 | images = {} 95 | chapters = [] 96 | for i, chapter in enumerate(story): 97 | title = chapter.title or f'#{i}' 98 | if hasattr(chapter, '__iter__'): 99 | # This is a Section 100 | chapters.extend(chapter_html( 101 | chapter, image_options=image_options, titleprefix=title, normalize=normalize, session=session 102 | )) 103 | else: 104 | contents = chapter.contents 105 | images.update(chapter.images) 106 | 107 | title = titleprefix and f'{titleprefix}: {title}' or title 108 | if normalize: 109 | title = unicodedata.normalize('NFKC', title) 110 | contents = unicodedata.normalize('NFKC', contents) 111 | chapters.append(EpubFile( 112 | title=title, 113 | path=f'{story.id}/chapter{i + 1}.html', 114 | contents=html_template.format( 115 | title=html.escape(title), text=contents) 116 | )) 117 | 118 | if story.footnotes: 119 | chapters.append(EpubFile(title="Footnotes", path=f'{story.id}/footnotes.html', contents=html_template.format( 120 | title="Footnotes", text=story.footnotes.contents))) 121 | images.update(story.footnotes.images) 122 | 123 | for image in images.values(): 124 | img_contents = get_image_from_url( 125 | image.url, 126 | image_format=image_options.get('image_format'), 127 | compress_images=image_options.get('compress_images'), 128 | max_image_size=image_options.get('max_image_size'), 129 | always_convert=image_options.get('always_convert_images'), 130 | session=session 131 | ) 132 | path = f'{story.id}/{image.path()}' 133 | for chapterfile in chapters: 134 | if chapterfile.path == path: 135 | break 136 | else: 137 | chapters.append( 138 | EpubFile(path=path, contents=img_contents[0], filetype=img_contents[2]) 139 | ) 140 | 141 | return chapters 142 | 143 | 144 | def generate_epub(story, cover_options={}, image_options={}, output_filename=None, output_dir=None, normalize=False, allow_spaces=False, session=None, parser='lxml'): 145 | dates = list(story.dates()) 146 | metadata = { 147 | 'title': story.title, 148 | 'author': story.author, 149 | 'unique_id': story.url, 150 | 'started': min(dates), 151 | 'updated': max(dates), 152 | 'extra': '', 153 | } 154 | extra_metadata = {} 155 | 156 | session.headers.update({ 157 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0', 158 | }) 159 | if story.url: 160 | session.headers.update({ 161 | 'Referer': story.url, 162 | }) 163 | 164 | if story.summary: 165 | extra_metadata['Summary'] = story.summary 166 | if story.tags: 167 | extra_metadata['Tags'] = ', '.join(story.tags) 168 | 169 | if extra_metadata: 170 | metadata['extra'] = '\n '.join( 171 | f'
{k}
{v}
' for k, v in extra_metadata.items()) 172 | 173 | valid_image_options = ('image_fetch', 'image_format', 'compress_images', 174 | 'max_image_size', 'always_convert_images') 175 | image_options = ImageOptions( 176 | **{k: v for k, v in image_options.items() if k in valid_image_options}) 177 | image_options = asdict(image_options, filter=lambda k, v: v is not None) 178 | 179 | valid_cover_options = ('fontname', 'fontsize', 'width', 180 | 'height', 'wrapat', 'bgcolor', 'textcolor', 'cover_url') 181 | cover_options = CoverOptions( 182 | **{k: v for k, v in cover_options.items() if k in valid_cover_options}) 183 | cover_options = asdict(cover_options, filter=lambda k, v: v is not None) 184 | 185 | if cover_options and "cover_url" in cover_options: 186 | image = make_cover_from_url( 187 | cover_options["cover_url"], story.title, story.author) 188 | elif story.cover_url: 189 | image = make_cover_from_url(story.cover_url, story.title, story.author) 190 | else: 191 | image = make_cover(story.title, story.author, **cover_options) 192 | 193 | return make_epub( 194 | output_filename or story.title + '.epub', 195 | [ 196 | # The cover is static, and the only change comes from the image which we generate 197 | EpubFile(title='Cover', path='cover.html', contents=cover_template), 198 | EpubFile(title='Front Matter', path='frontmatter.html', contents=frontmatter_template.format( 199 | now=datetime.datetime.now(), **metadata)), 200 | *chapter_html( 201 | story, 202 | image_options=image_options, 203 | normalize=normalize, 204 | session=session 205 | ), 206 | EpubFile( 207 | path='Styles/base.css', 208 | contents=session.get( 209 | 'https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 210 | filetype='text/css' 211 | ), 212 | EpubFile(path='images/cover.png', 213 | contents=image.read(), filetype='image/png'), 214 | ], 215 | metadata, 216 | output_dir=output_dir, 217 | allow_spaces=allow_spaces 218 | ) 219 | -------------------------------------------------------------------------------- /ebook/cover.py: -------------------------------------------------------------------------------- 1 | 2 | from PIL import Image, ImageDraw 3 | from io import BytesIO 4 | import textwrap 5 | import requests 6 | import logging 7 | from . import image 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | def make_cover(title, author, width=600, height=800, fontname="Helvetica", fontsize=40, bgcolor=(120, 20, 20), textcolor=(255, 255, 255), wrapat=30): 13 | img = Image.new("RGBA", (width, height), bgcolor) 14 | draw = ImageDraw.Draw(img) 15 | 16 | title = textwrap.fill(title, wrapat) 17 | author = textwrap.fill(author, wrapat) 18 | 19 | font = image._safe_font(fontname, size=fontsize) 20 | title_size = image.textsize(draw, title, font=font) 21 | image.draw_text_outlined(draw, ((width - title_size[0]) / 2, 100), title, textcolor, font=font) 22 | # draw.text(((width - title_size[0]) / 2, 100), title, textcolor, font=font) 23 | 24 | font = image._safe_font(fontname, size=fontsize - 2) 25 | author_size = image.textsize(draw, author, font=font) 26 | image.draw_text_outlined(draw, ((width - author_size[0]) / 2, 100 + title_size[1] + 70), author, textcolor, font=font) 27 | 28 | output = BytesIO() 29 | img.save(output, "PNG") 30 | output.name = 'cover.png' 31 | # writing left the cursor at the end of the file, so reset it 32 | output.seek(0) 33 | return output 34 | 35 | 36 | def make_cover_from_url(url, title, author): 37 | try: 38 | logger.info("Downloading cover from " + url) 39 | img = requests.Session().get(url) 40 | cover = BytesIO(img.content) 41 | 42 | imgformat = Image.open(cover).format 43 | # The `Image.open` read a few bytes from the stream to work out the 44 | # format, so reset it: 45 | cover.seek(0) 46 | 47 | if imgformat != "PNG": 48 | cover = image._convert_to_new_format(cover, "PNG") 49 | except Exception as e: 50 | logger.info("Encountered an error downloading cover: " + str(e)) 51 | cover = make_cover(title, author) 52 | 53 | return cover 54 | 55 | 56 | if __name__ == '__main__': 57 | f = make_cover('Test of a Title which is quite long and will require multiple lines', 'Some Dude') 58 | with open('output.png', 'wb') as out: 59 | out.write(f.read()) 60 | -------------------------------------------------------------------------------- /ebook/epub.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import os.path 4 | import zipfile 5 | import xml.etree.ElementTree as etree 6 | import uuid 7 | import string 8 | from collections import namedtuple 9 | 10 | """ 11 | So, an epub is approximately a zipfile of HTML files, with 12 | a bit of metadata thrown in for good measure. 13 | 14 | This totally started from http://www.manuel-strehl.de/dev/simple_epub_ebooks_with_python.en.html 15 | """ 16 | 17 | 18 | EpubFile = namedtuple('EbookFile', 'path, contents, title, filetype', defaults=(False, False, "application/xhtml+xml")) 19 | 20 | 21 | def sanitize_filename(s, allow_spaces=False): 22 | """Take a string and return a valid filename constructed from the string. 23 | Uses a whitelist approach: any characters not present in valid_chars are 24 | removed. Also spaces are replaced with underscores. 25 | 26 | Note: this method may produce invalid filenames such as ``, `.` or `..` 27 | When I use this method I prepend a date string like '2009_01_15_19_46_32_' 28 | and append a file extension like '.txt', so I avoid the potential of using 29 | an invalid filename. 30 | 31 | """ 32 | valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) 33 | filename = ''.join(c for c in s if c in valid_chars) 34 | if not allow_spaces: 35 | filename = filename.replace(' ', '_') # I don't like spaces in filenames. 36 | return filename 37 | 38 | 39 | def make_epub(filename, files, meta, compress=True, output_dir=False, allow_spaces=False): 40 | unique_id = meta.get('unique_id', False) 41 | if not unique_id: 42 | unique_id = 'leech_book_' + str(uuid.uuid4()) 43 | 44 | filename = sanitize_filename(filename, allow_spaces) 45 | if output_dir: 46 | filename = os.path.join(output_dir, filename) 47 | epub = zipfile.ZipFile(filename, 'w', compression=compress and zipfile.ZIP_DEFLATED or zipfile.ZIP_STORED) 48 | 49 | # The first file must be named "mimetype", and shouldn't be compressed 50 | epub.writestr("mimetype", "application/epub+zip", compress_type=zipfile.ZIP_STORED) 51 | 52 | # We need an index file, that lists all other HTML files 53 | # This index file itself is referenced in the META_INF/container.xml 54 | # file 55 | container = etree.Element('container', version="1.0", xmlns="urn:oasis:names:tc:opendocument:xmlns:container") 56 | rootfiles = etree.SubElement(container, 'rootfiles') 57 | etree.SubElement(rootfiles, 'rootfile', { 58 | 'full-path': "OEBPS/Content.opf", 59 | 'media-type': "application/oebps-package+xml", 60 | }) 61 | epub.writestr("META-INF/container.xml", etree.tostring(container)) 62 | 63 | package = etree.Element('package', { 64 | 'version': "2.0", 65 | 'xmlns': "http://www.idpf.org/2007/opf", 66 | 'unique-identifier': 'book_identifier', # could plausibly be based on the name 67 | }) 68 | 69 | # build the metadata 70 | metadata = etree.SubElement(package, 'metadata', { 71 | 'xmlns:dc': "http://purl.org/dc/elements/1.1/", 72 | 'xmlns:opf': "http://www.idpf.org/2007/opf", 73 | }) 74 | identifier = etree.SubElement(metadata, 'dc:identifier', id='book_identifier') 75 | if unique_id.find('://') != -1: 76 | identifier.set('opf:scheme', "URI") 77 | identifier.text = unique_id 78 | etree.SubElement(metadata, 'dc:title').text = meta.get('title', 'Untitled') 79 | etree.SubElement(metadata, 'dc:language').text = meta.get('language', 'en') 80 | etree.SubElement(metadata, 'dc:creator', {'opf:role': 'aut'}).text = meta.get('author', 'Unknown') 81 | etree.SubElement(metadata, 'meta', {'name': 'generator', 'content': 'leech'}) 82 | 83 | # we'll need a manifest and spine 84 | manifest = etree.SubElement(package, 'manifest') 85 | spine = etree.SubElement(package, 'spine', toc="ncx") 86 | guide = etree.SubElement(package, 'guide') 87 | 88 | # ...and the ncx index 89 | ncx = etree.Element('ncx', { 90 | 'xmlns': "http://www.daisy.org/z3986/2005/ncx/", 91 | 'version': "2005-1", 92 | 'xml:lang': "en-US", 93 | }) 94 | etree.SubElement(etree.SubElement(ncx, 'head'), 'meta', name="dtb:uid", content=unique_id) 95 | etree.SubElement(etree.SubElement(ncx, 'docTitle'), 'text').text = meta.get('title', 'Untitled') 96 | etree.SubElement(etree.SubElement(ncx, 'docAuthor'), 'text').text = meta.get('author', 'Unknown') 97 | navmap = etree.SubElement(ncx, 'navMap') 98 | 99 | # Write each HTML file to the ebook, collect information for the index 100 | for i, file in enumerate(files): 101 | file_id = 'file_%d' % (i + 1) 102 | etree.SubElement(manifest, 'item', { 103 | 'id': file_id, 104 | 'href': file.path, 105 | 'media-type': file.filetype, 106 | }) 107 | if file.filetype == "application/xhtml+xml": 108 | itemref = etree.SubElement(spine, 'itemref', idref=file_id) 109 | point = etree.SubElement(navmap, 'navPoint', { 110 | 'class': "h1", 111 | 'id': file_id, 112 | }) 113 | etree.SubElement(etree.SubElement(point, 'navLabel'), 'text').text = file.title 114 | etree.SubElement(point, 'content', src=file.path) 115 | 116 | if 'cover.html' == os.path.basename(file.path): 117 | etree.SubElement(guide, 'reference', { 118 | 'type': 'cover', 119 | 'title': 'Cover', 120 | 'href': file.path, 121 | }) 122 | itemref.set('linear', 'no') 123 | if 'images/cover.png' == file.path: 124 | etree.SubElement(metadata, 'meta', { 125 | 'name': 'cover', 126 | 'content': file_id, 127 | }) 128 | 129 | # and add the actual html to the zip 130 | if file.contents: 131 | epub.writestr('OEBPS/' + file.path, file.contents) 132 | else: 133 | epub.write(file.path, 'OEBPS/' + file.path) 134 | 135 | # ...and add the ncx to the manifest 136 | etree.SubElement(manifest, 'item', { 137 | 'id': 'ncx', 138 | 'href': 'toc.ncx', 139 | 'media-type': "application/x-dtbncx+xml", 140 | }) 141 | epub.writestr('OEBPS/toc.ncx', etree.tostring(ncx)) 142 | 143 | # Finally, write the index 144 | epub.writestr('OEBPS/Content.opf', etree.tostring(package)) 145 | 146 | epub.close() 147 | 148 | return filename 149 | 150 | 151 | if __name__ == '__main__': 152 | make_epub('test.epub', [EpubFile(title='Chapter 1', path='a.html', contents="Test"), EpubFile(title='Chapter 2', path='test/b.html', contents="Still a test")], {}) 153 | -------------------------------------------------------------------------------- /ebook/image.py: -------------------------------------------------------------------------------- 1 | # Basically the same as cover.py with some minor differences 2 | import PIL 3 | from PIL import Image, ImageDraw, ImageFont 4 | from io import BytesIO 5 | from base64 import b64decode 6 | import math 7 | import textwrap 8 | import requests 9 | import logging 10 | 11 | from typing import Tuple 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def get_size_format(b, factor=1000, suffix="B"): 17 | """ 18 | Scale bytes to its proper byte format 19 | e.g: 20 | 1253656 => '1.20MB' 21 | 1253656678 => '1.17GB' 22 | """ 23 | for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]: 24 | if b < factor: 25 | return f"{b:.2f}{unit}{suffix}" 26 | b /= factor 27 | return f"{b:.2f}Y{suffix}" 28 | 29 | 30 | def compress_image(image: BytesIO, target_size: int, image_format: str) -> PIL.Image.Image: 31 | image_size = get_size_format(len(image.getvalue())) 32 | logger.info(f"Image size: {image_size}") 33 | 34 | big_photo = Image.open(image).convert("RGBA") 35 | 36 | target_pixel_count = 2.8114 * target_size 37 | if len(image.getvalue()) > target_size: 38 | logger.info(f"Image is greater than {get_size_format(target_size)}, compressing") 39 | scale_factor = target_pixel_count / math.prod(big_photo.size) 40 | if scale_factor < 1: 41 | x, y = tuple(int(scale_factor * dim) for dim in big_photo.size) 42 | logger.info(f"Resizing image dimensions from {big_photo.size} to ({x}, {y})") 43 | sml_photo = big_photo.resize((x, y), resample=Image.LANCZOS) 44 | else: 45 | sml_photo = big_photo 46 | compressed_image_size = get_size_format(len(PIL_Image_to_bytes(sml_photo, image_format))) 47 | logger.info(f"Compressed image size: {compressed_image_size}") 48 | return sml_photo 49 | else: 50 | logger.info(f"Image is less than {get_size_format(target_size)}, not compressing") 51 | return big_photo 52 | 53 | 54 | def PIL_Image_to_bytes( 55 | pil_image: PIL.Image.Image, 56 | image_format: str 57 | ) -> bytes: 58 | out_io = BytesIO() 59 | if image_format.lower().startswith("gif"): 60 | frames = [] 61 | current = pil_image.convert('RGBA') 62 | while True: 63 | try: 64 | frames.append(current) 65 | pil_image.seek(pil_image.tell() + 1) 66 | current = Image.alpha_composite(current, pil_image.convert('RGBA')) 67 | except EOFError: 68 | break 69 | frames[0].save(out_io, format=image_format, save_all=True, append_images=frames[1:], optimize=True, loop=0) 70 | return out_io.getvalue() 71 | 72 | elif image_format.lower() in ["jpeg", "jpg"]: 73 | # Create a new image with a white background 74 | background_img = Image.new('RGBA', pil_image.size, "white") 75 | 76 | # Paste the image on top of the background 77 | background_img.paste(pil_image.convert("RGBA"), (0, 0), pil_image.convert("RGBA")) 78 | pil_image = background_img.convert('RGB') 79 | 80 | pil_image.save(out_io, format=image_format, optimize=True, quality=95) 81 | return out_io.getvalue() 82 | 83 | 84 | def get_image_from_url( 85 | url: str, 86 | image_format: str = "JPEG", 87 | compress_images: bool = False, 88 | max_image_size: int = 1_000_000, 89 | always_convert: bool = False, 90 | session: requests.Session = None 91 | ) -> Tuple[bytes, str, str]: 92 | """ 93 | Based on make_cover_from_url(), this function takes in the image url usually gotten from the `src` attribute of 94 | an image tag and returns the image data, the image format and the image mime type 95 | 96 | @param url: The url of the image 97 | @param image_format: The format to convert the image to if it's not in the supported formats 98 | @param compress_images: Whether to compress the image or not 99 | @param max_image_size: The maximum size of the image in bytes 100 | @return: A tuple of the image data, the image format and the image mime type 101 | """ 102 | logger.info("Downloading image: %s", url) 103 | session = session or requests.Session() 104 | try: 105 | if url.startswith("https://www.filepicker.io/api/"): 106 | logger.warning("Filepicker.io image detected, converting to Fiction.live image. This might fail.") 107 | url = f"https://cdn3.fiction.live/fp/{url.split('/')[-1]}?&quality=95" 108 | elif url.startswith("https://cdn3.fiction.live/images/") or url.startswith("https://ddx5i92cqts4o.cloudfront.net/images/"): 109 | logger.warning("Converting url to cdn6. This might fail.") 110 | url = f"https://cdn6.fiction.live/file/fictionlive/images/{url.split('/images/')[-1]}" 111 | elif url.startswith("data:image") and 'base64' in url: 112 | logger.info("Base64 image detected") 113 | head, base64data = url.split(',') 114 | file_ext = str(head.split(';')[0].split('/')[1]) 115 | imgdata = b64decode(base64data) 116 | if compress_images: 117 | if file_ext.lower() == "gif": 118 | logger.info("GIF images should not be compressed, skipping compression") 119 | else: 120 | compressed_base64_image = compress_image(BytesIO(imgdata), max_image_size, file_ext) 121 | imgdata = PIL_Image_to_bytes(compressed_base64_image, file_ext) 122 | 123 | if file_ext.lower() not in ["jpg", "jpeg", "png", "gif"]: 124 | logger.info(f"Image format {file_ext} not supported by EPUB2.0.1, converting to {image_format}") 125 | return _convert_to_new_format(imgdata, image_format).read(), image_format.lower(), f"image/{image_format.lower()}" 126 | return imgdata, file_ext, f"image/{file_ext}" 127 | 128 | img = session.get(url, timeout=(6.01, 30)) 129 | image = BytesIO(img.content) 130 | image.seek(0) 131 | 132 | PIL_image = Image.open(image) 133 | 134 | current_format = str(PIL_image.format) 135 | 136 | if current_format.lower() == "gif": 137 | PIL_image = Image.open(image) 138 | if PIL_image.info['version'] not in [b"GIF89a", "GIF89a"]: 139 | PIL_image.info['version'] = b"GIF89a" 140 | return PIL_Image_to_bytes(PIL_image, "GIF"), "gif", "image/gif" 141 | 142 | if compress_images: 143 | PIL_image = compress_image(image, max_image_size, current_format) 144 | 145 | if always_convert: 146 | current_format = image_format 147 | 148 | return PIL_Image_to_bytes(PIL_image, current_format), current_format, f"image/{current_format.lower()}" 149 | 150 | except Exception as e: 151 | logger.info("Encountered an error downloading image: " + str(e)) 152 | image = make_fallback_image("There was a problem downloading this image.").read() 153 | return image, "jpeg", "image/jpeg" 154 | 155 | 156 | def make_fallback_image( 157 | message: str, 158 | width=600, 159 | height=300, 160 | fontname="Helvetica", 161 | font_size=40, 162 | bg_color=(0, 0, 0), 163 | textcolor=(255, 255, 255), 164 | wrap_at=30 165 | ): 166 | """ 167 | This function should only be called if get_image_from_url() fails 168 | """ 169 | img = Image.new("RGB", (width, height), bg_color) 170 | draw = ImageDraw.Draw(img) 171 | 172 | message = textwrap.fill(message, wrap_at) 173 | 174 | font = _safe_font(fontname, size=font_size) 175 | message_size = textsize(draw, message, font=font) 176 | draw_text_outlined( 177 | draw, ((width - message_size[0]) / 2, 100), message, textcolor, font=font) 178 | # draw.text(((width - title_size[0]) / 2, 100), title, textcolor, font=font) 179 | 180 | output = BytesIO() 181 | img.save(output, "JPEG") 182 | # writing left the cursor at the end of the file, so reset it 183 | output.seek(0) 184 | return output 185 | 186 | 187 | def _convert_to_new_format(image_bytestream, image_format: str): 188 | new_image = BytesIO() 189 | try: 190 | Image.open(image_bytestream).save(new_image, format=image_format.upper()) 191 | new_image.seek(0) 192 | except Exception as e: 193 | logger.info(f"Encountered an error converting image to {image_format}\nError: {e}") 194 | new_image = make_fallback_image("There was a problem converting this image.") 195 | return new_image 196 | 197 | 198 | def _safe_font(preferred, *args, **kwargs): 199 | for font in (preferred, "Helvetica", "FreeSans", "Arial"): 200 | try: 201 | return ImageFont.truetype(*args, font=font, **kwargs) 202 | except IOError: 203 | pass 204 | 205 | # This is pretty terrible, but it'll work regardless of what fonts the 206 | # system has. Worst issue: can't set the size. 207 | return ImageFont.load_default() 208 | 209 | 210 | def textsize(draw, text, **kwargs): 211 | left, top, right, bottom = draw.multiline_textbbox((0, 0), text, **kwargs) 212 | width, height = right - left, bottom - top 213 | return width, height 214 | 215 | 216 | def draw_text_outlined(draw, xy, text, fill=None, font=None, anchor=None): 217 | x, y = xy 218 | 219 | # Outline 220 | draw.text((x - 1, y), text=text, fill=(0, 0, 0), font=font, anchor=anchor) 221 | draw.text((x + 1, y), text=text, fill=(0, 0, 0), font=font, anchor=anchor) 222 | draw.text((x, y - 1), text=text, fill=(0, 0, 0), font=font, anchor=anchor) 223 | draw.text((x, y + 1), text=text, fill=(0, 0, 0), font=font, anchor=anchor) 224 | 225 | # Fill 226 | draw.text(xy, text=text, fill=fill, font=font, anchor=anchor) 227 | 228 | 229 | if __name__ == '__main__': 230 | f = make_fallback_image( 231 | 'Test of a Title which is quite long and will require multiple lines', 232 | 'output.png' 233 | ) 234 | with open('output.png', 'wb') as out: 235 | out.write(f.read()) 236 | -------------------------------------------------------------------------------- /examples/cultivationchatgroup.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://novelfull.com/cultivation-chat-group/chapter-1-mt-yellows-true-monarch-and-nine-provinces-1-group.html", 3 | "title": "Cultivation Chat Group", 4 | "author": "Legend of the Paladin", 5 | "content_selector": "#chapter", 6 | "content_title_selector": "h2 .chapter-text", 7 | "content_text_selector": "#chapter-content", 8 | "filter_selector": "style, script, .adsbygoogle, .ads", 9 | "next_selector": "#next_chap[href]" 10 | } 11 | -------------------------------------------------------------------------------- /examples/dungeonkeeperami.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://forums.sufficientvelocity.com/threads/dungeon-keeper-ami-sailor-moon-dungeon-keeper-story-only-thread.30066/", 3 | "title": "Dungeon Keeper Ami", 4 | "author": "Pusakuronu", 5 | "content_selector": "article.message-body .bbWrapper", 6 | "filter_selector": ".sharedaddy, .wpcnt, style", 7 | "next_selector": "link[rel=next]" 8 | } 9 | -------------------------------------------------------------------------------- /examples/fifthdefiance.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://thefifthdefiance.com/chapters/", 3 | "title": "The Fifth Defiance", 4 | "author": "Walter", 5 | "chapter_selector": ".entry-content > p > a", 6 | "content_selector": ".entry-content", 7 | "content_title_selector": ".entry-title", 8 | "filter_selector": ".sharedaddy, .wpcnt, style" 9 | } -------------------------------------------------------------------------------- /examples/heretical-edge-2.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://ceruleanscrawling.wordpress.com/heretical-edge-2-table-of-contents/", 3 | "title": "Heretical Edge 2", 4 | "author": "Ceruelean", 5 | "chapter_selector": "article .entry-content > p > a:not([href*=patreon])", 6 | "content_selector": "article .entry-content", 7 | "filter_selector": ".sharedaddy, .wpcnt, style" 8 | } 9 | -------------------------------------------------------------------------------- /examples/heretical-edge.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://ceruleanscrawling.wordpress.com/table-of-contents/", 3 | "title": "Heretical Edge", 4 | "author": "Ceruelean", 5 | "chapter_selector": "article .entry-content > p > a", 6 | "content_selector": "article .entry-content", 7 | "filter_selector": ".sharedaddy, .wpcnt, style" 8 | } 9 | -------------------------------------------------------------------------------- /examples/pact.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://pactwebserial.wordpress.com/2013/12/17/bonds-1-1/", 3 | "title": "Pact", 4 | "author": "Wildbow", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "filter_selector": ".sharedaddy, style, a[href*='pactwebserial.wordpress.com']", 9 | "next_selector": "a[rel=\"next\"]", 10 | "cover_url": "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/a456e440-ea22-45c0-8b39-dacf9bbddade/d7dxaz4-64cfabe8-f957-44af-aaea-82346c401b27.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOiIsImlzcyI6InVybjphcHA6Iiwib2JqIjpbW3sicGF0aCI6IlwvZlwvYTQ1NmU0NDAtZWEyMi00NWMwLThiMzktZGFjZjliYmRkYWRlXC9kN2R4YXo0LTY0Y2ZhYmU4LWY5NTctNDRhZi1hYWVhLTgyMzQ2YzQwMWIyNy5qcGcifV1dLCJhdWQiOlsidXJuOnNlcnZpY2U6ZmlsZS5kb3dubG9hZCJdfQ.J-Wn8bDrKmoKKZW8mkJdi3uRoDV2FDJQZ_TuTWvQazY" 11 | } 12 | -------------------------------------------------------------------------------- /examples/paeantosmac.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://paeantosmac.wordpress.com/2015/02/17/introduction/", 3 | "title": "Paean to SMAC", 4 | "author": "Nick Stipanovich", 5 | "content_selector": "article.post", 6 | "content_title_selector": "header h1", 7 | "content_text_selector": "div.entry-content", 8 | "filter_selector": ".sharedaddy, .wpcnt, style", 9 | "next_selector": "link[rel=next]" 10 | } 11 | -------------------------------------------------------------------------------- /examples/pale-lights.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://palelights.com/2022/08/17/chapter-1/", 3 | "title": "Pale Lights", 4 | "author": "erraticerrata", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "filter_selector": ".sharedaddy, .wpcnt, style", 9 | "next_selector": "a[rel=\"next\"]", 10 | "cover_url": "https://www.royalroadcdn.com/public/covers-large/pale-lights-aaaay6-1-bi.jpg" 11 | } 12 | -------------------------------------------------------------------------------- /examples/pale-withextras.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://palewebserial.wordpress.com/2020/05/05/blood-run-cold-0-0/", 3 | "title": "Pale", 4 | "author": "Wildbow", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']", 9 | "next_selector": "a[rel=\"next\"]" 10 | } 11 | -------------------------------------------------------------------------------- /examples/pale.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://palewebserial.wordpress.com/table-of-contents/", 3 | "title": "Pale", 4 | "author": "Wildbow", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "chapter_selector": "article .entry-content > p a", 9 | "filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']" 10 | } 11 | -------------------------------------------------------------------------------- /examples/phoenixdestiny.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://gravitytales.com/novel/phoenix-destiny/pd-chapter-1", 3 | "title": "Phoenix Destiny", 4 | "author": "Yun Ji", 5 | "content_selector": "#contentElement", 6 | "content_title_selector": "h4", 7 | "content_text_selector": "#chapterContent", 8 | "filter_selector": ".sharedaddy, .wpcnt, style", 9 | "next_selector": ".chapter-navigation > a:last-child[href*=\"pd-chapter\"]" 10 | } 11 | -------------------------------------------------------------------------------- /examples/practical1.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://practicalguidetoevil.wordpress.com/2015/03/25/prologue/", 3 | "title": "A Practical Guide To Evil: Book 1", 4 | "author": "erraticerrata", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "filter_selector": ".sharedaddy, .wpcnt, style", 9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])", 10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" 11 | } 12 | -------------------------------------------------------------------------------- /examples/practical2.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://practicalguidetoevil.wordpress.com/2015/11/04/prologue-2/", 3 | "title": "A Practical Guide To Evil: Book 2", 4 | "author": "erraticerrata", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "filter_selector": ".sharedaddy, .wpcnt, style", 9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])", 10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" 11 | } -------------------------------------------------------------------------------- /examples/practical3.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://practicalguidetoevil.wordpress.com/2017/02/08/prologue-3/", 3 | "title": "A Practical Guide To Evil: Book 3", 4 | "author": "erraticerrata", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "filter_selector": ".sharedaddy, .wpcnt, style", 9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])", 10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" 11 | } 12 | -------------------------------------------------------------------------------- /examples/practical4.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://practicalguidetoevil.wordpress.com/2018/04/09/prologue-4/", 3 | "title": "A Practical Guide To Evil: Book 4", 4 | "author": "erraticerrata", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "filter_selector": ".sharedaddy, .wpcnt, style", 9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])", 10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" 11 | } 12 | -------------------------------------------------------------------------------- /examples/practical5.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://practicalguidetoevil.wordpress.com/2019/01/14/prologue-5/", 3 | "title": "A Practical Guide To Evil: Book 5", 4 | "author": "erraticerrata", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "filter_selector": ".sharedaddy, .wpcnt, style", 9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])", 10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" 11 | } 12 | -------------------------------------------------------------------------------- /examples/practical6.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://practicalguidetoevil.wordpress.com/2020/01/06/prologue-6/", 3 | "title": "A Practical Guide To Evil: Book 6", 4 | "author": "erraticerrata", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "filter_selector": ".sharedaddy, .wpcnt, style", 9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])", 10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" 11 | } 12 | -------------------------------------------------------------------------------- /examples/practical7.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://practicalguidetoevil.wordpress.com/2021/03/02/prologue-7/", 3 | "title": "A Practical Guide To Evil: Book 7", 4 | "author": "erraticerrata", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "filter_selector": ".sharedaddy, .wpcnt, style", 9 | "next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])", 10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" 11 | } -------------------------------------------------------------------------------- /examples/practicalall.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://practicalguidetoevil.wordpress.com/2015/03/25/prologue/", 3 | "title": "A Practical Guide To Evil", 4 | "author": "erraticerrata", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "filter_selector": ".sharedaddy, .wpcnt, style", 9 | "next_selector": "a[rel=\"next\"]", 10 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" 11 | } -------------------------------------------------------------------------------- /examples/practicalextra.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://practicalguidetoevil.wordpress.com/extra-chapters/", 3 | "title": "A Practical Guide To Evil: Extra Chapters", 4 | "author": "erraticerrata", 5 | "chapter_selector": "#main .entry-content > ul > li > a", 6 | "content_selector": "#main .entry-content", 7 | "filter_selector": ".sharedaddy, .wpcnt, style", 8 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" 9 | } 10 | -------------------------------------------------------------------------------- /examples/sagaofsoul.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://www.sagaofsoul.com/story.html", 3 | "title": "Saga of Soul", 4 | "author": "Ouri Maler", 5 | "chapter_selector": "#mainbody li a", 6 | "content_selector": "#mainbody", 7 | "filter_selector": "script, noscript" 8 | } 9 | -------------------------------------------------------------------------------- /examples/shouldthesun.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://shouldthesun.wordpress.com/", 3 | "title": "Should The Sun Not Rise", 4 | "author": "Omicron", 5 | "chapter_selector": "#text-1 li a", 6 | "content_selector": ".entry-content", 7 | "filter_selector": ".sharedaddy, style, a[href*='shouldthesun.wordpress.com']", 8 | "cover_url": "https://shouldthesun.files.wordpress.com/2017/09/itzpapalotl.jpg" 9 | } 10 | -------------------------------------------------------------------------------- /examples/thegodsarebastards.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://tiraas.wordpress.com/table-of-contents/", 3 | "title": "The Gods Are Bastards", 4 | "author": "D. D. Webb", 5 | "chapter_selector": "article .entry-content a[href*='20']", 6 | "content_selector": "article .entry-content", 7 | "filter_selector": ".sharedaddy, .wpcnt, style, a[href*='tiraas.wordpress.com']", 8 | "cover_url": "https://tiraas.files.wordpress.com/2016/02/classof1182byhoarous.png" 9 | } 10 | -------------------------------------------------------------------------------- /examples/twig.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://twigserial.wordpress.com/2014/12/24/taking-root-1-1/", 3 | "title": "Twig", 4 | "author": "Wildbow", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "filter_selector": ".sharedaddy, style, a[href*='twigserial.wordpress.com']", 9 | "next_selector": "a[rel=\"next\"]", 10 | "cover_url": "https://twigserial.files.wordpress.com/2015/03/cropped-twig-commission-titled1.png" 11 | } 12 | -------------------------------------------------------------------------------- /examples/unsong.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://unsongbook.com/prologue-2/", 3 | "title": "Unsong", 4 | "author": "Scott Alexander", 5 | "content_selector": "#pjgm-content", 6 | "content_title_selector": "h1.pjgm-posttitle", 7 | "content_text_selector": ".pjgm-postcontent", 8 | "filter_selector": ".sharedaddy, style", 9 | "next_selector": "a[rel=\"next\"]", 10 | "cover_url": "https://i.imgur.com/d9LvKMc.png" 11 | } 12 | -------------------------------------------------------------------------------- /examples/vacantthrone.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://tcthrone.wordpress.com/", 3 | "title": "Vacant Throne", 4 | "author": "TCurator", 5 | "chapter_selector": "#main .entry-content > p a[href*=\"vacant-throne-\"]", 6 | "content_selector": "#main .entry-content", 7 | "filter_selector": ".sharedaddy, style, p:nth-of-type(1), a[href*='tcthrone.wordpress.com']" 8 | } 9 | -------------------------------------------------------------------------------- /examples/wanderinginn.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://wanderinginn.com/table-of-contents/", 3 | "title": "The Wandering Inn", 4 | "author": "pirate aba", 5 | "cover_url": "https://i0.wp.com/wanderinginn.com/wp-content/uploads/2023/03/Wandering_Inn-Vol1-eCover.jpg?ssl=1", 6 | "chapter_selector": "#table-of-contents .chapter-entry .body-web > a", 7 | "content_selector": ".entry-content", 8 | "filter_selector": "hr:last-of-type, hr:last-of-type ~ *" 9 | } 10 | -------------------------------------------------------------------------------- /examples/ward.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://www.parahumans.net/table-of-contents/", 3 | "title": "Ward", 4 | "author": "Wildbow", 5 | "chapter_selector": "#main .entry-content a", 6 | "content_selector": "#main .entry-content", 7 | "filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com'], p:first-of-type, p:last-of-type" 8 | } -------------------------------------------------------------------------------- /examples/worm.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://parahumans.wordpress.com/2011/06/11/1-1/", 3 | "title": "Worm", 4 | "author": "Wildbow", 5 | "content_selector": "#main", 6 | "content_title_selector": "h1.entry-title", 7 | "content_text_selector": ".entry-content", 8 | "filter_selector": ".sharedaddy, style, a[href*='parahumans.wordpress.com']", 9 | "next_selector": "a[rel=\"next\"]", 10 | "cover_url": "https://pre00.deviantart.net/969a/th/pre/i/2015/051/8/7/worm_cover_by_cactusfantastico-d8ivj4b.png" 11 | } 12 | -------------------------------------------------------------------------------- /leech.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import click 4 | import http.cookiejar 5 | import json 6 | import logging 7 | import os 8 | import requests 9 | import requests_cache 10 | import sqlite3 11 | from click_default_group import DefaultGroup 12 | from functools import reduce 13 | 14 | import sites 15 | import ebook 16 | 17 | __version__ = 2 18 | USER_AGENT = 'Leech/%s +http://davidlynch.org' % __version__ 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | def configure_logging(verbose): 24 | if verbose: 25 | logging.basicConfig( 26 | level=logging.DEBUG, 27 | format="[%(name)s @ %(levelname)s] %(message)s" 28 | ) 29 | else: 30 | logging.basicConfig( 31 | level=logging.INFO, 32 | format="[%(name)s] %(message)s" 33 | ) 34 | 35 | 36 | def create_session(cache): 37 | if cache: 38 | session = requests_cache.CachedSession('leech', expire_after=4 * 3600) 39 | else: 40 | session = requests.Session() 41 | 42 | lwp_cookiejar = http.cookiejar.LWPCookieJar() 43 | try: 44 | lwp_cookiejar.load('leech.cookies', ignore_discard=True) 45 | except Exception: 46 | # This file is very much optional, so this log isn't really necessary 47 | # logging.exception("Couldn't load cookies from leech.cookies") 48 | pass 49 | session.cookies.update(lwp_cookiejar) 50 | session.headers.update({ 51 | 'User-Agent': USER_AGENT, 52 | 'Accept-Language': 'en-US,en;q=0.5', 53 | 'Accept-Encoding': 'gzip, deflate', 54 | 'Accept': '*/*', # this is essential for imgur 55 | }) 56 | return session 57 | 58 | 59 | def load_on_disk_options(site): 60 | try: 61 | with open('leech.json') as store_file: 62 | store = json.load(store_file) 63 | login = store.get('logins', {}).get(site.site_key(), False) 64 | cover_options = store.get('cover', {}) 65 | image_options = store.get('images', {}) 66 | consolidated_options = { 67 | **{k: v for k, v in store.items() if k not in ('cover', 'images', 'logins')}, 68 | **store.get('site_options', {}).get(site.site_key(), {}) 69 | } 70 | except FileNotFoundError: 71 | logger.info("Unable to locate leech.json. Continuing assuming it does not exist.") 72 | login = False 73 | image_options = {} 74 | cover_options = {} 75 | consolidated_options = {} 76 | return consolidated_options, login, cover_options, image_options 77 | 78 | 79 | def create_options(site, site_options, unused_flags): 80 | """Compiles options provided from multiple different sources 81 | (e.g. on disk, via flags, via defaults, via JSON provided as a flag value) 82 | into a single options object.""" 83 | default_site_options = site.get_default_options() 84 | 85 | flag_specified_site_options = site.interpret_site_specific_options(**unused_flags) 86 | 87 | configured_site_options, login, cover_options, image_options = load_on_disk_options(site) 88 | 89 | overridden_site_options = json.loads(site_options) 90 | 91 | # The final options dictionary is computed by layering the default, configured, 92 | # and overridden, and flag-specified options together in that order. 93 | options = dict( 94 | list(default_site_options.items()) + 95 | list(cover_options.items()) + 96 | list(image_options.items()) + 97 | list(configured_site_options.items()) + 98 | list(overridden_site_options.items()) + 99 | list(flag_specified_site_options.items()) 100 | ) 101 | return options, login 102 | 103 | 104 | def open_story(site, url, session, login, options): 105 | handler = site( 106 | session, 107 | options=options 108 | ) 109 | 110 | if login: 111 | handler.login(login) 112 | 113 | try: 114 | story = handler.extract(url) 115 | except sites.SiteException as e: 116 | logger.error(e) 117 | return 118 | if not story: 119 | logger.error("Couldn't extract story") 120 | return 121 | return story 122 | 123 | 124 | def site_specific_options(f): 125 | option_list = sites.list_site_specific_options() 126 | return reduce(lambda cmd, decorator: decorator(cmd), [f] + option_list) 127 | 128 | 129 | @click.group(cls=DefaultGroup, default='download', default_if_no_args=True) 130 | def cli(): 131 | """Top level click group. Uses click-default-group to preserve most behavior from leech v1.""" 132 | pass 133 | 134 | 135 | @cli.command() 136 | @click.option('--verbose', '-v', is_flag=True, help="verbose output") 137 | def flush(verbose): 138 | """Flushes the contents of the cache.""" 139 | configure_logging(verbose) 140 | requests_cache.install_cache('leech') 141 | requests_cache.clear() 142 | 143 | conn = sqlite3.connect('leech.sqlite') 144 | conn.execute("VACUUM") 145 | conn.close() 146 | 147 | logger.info("Flushed cache") 148 | 149 | 150 | @cli.command() 151 | @click.argument('urls', nargs=-1, required=True) 152 | @click.option( 153 | '--site-options', 154 | default='{}', 155 | help='JSON object encoding any site specific option.' 156 | ) 157 | @click.option( 158 | '--output-dir', 159 | default=None, 160 | help='Directory to save generated ebooks' 161 | ) 162 | @click.option('--cache/--no-cache', default=True) 163 | @click.option('--normalize/--no-normalize', default=True, help="Whether to normalize strange unicode text") 164 | @click.option('--verbose', '-v', is_flag=True, help="Verbose debugging output") 165 | @site_specific_options # Includes other click.options specific to sites 166 | def download(urls, site_options, cache, verbose, normalize, output_dir, **other_flags): 167 | """Downloads a story and saves it on disk as an epub ebook.""" 168 | configure_logging(verbose) 169 | session = create_session(cache) 170 | 171 | for url in urls: 172 | site, url = sites.get(url) 173 | options, login = create_options(site, site_options, other_flags) 174 | story = open_story(site, url, session, login, options) 175 | if story: 176 | filename = ebook.generate_epub( 177 | story, options, 178 | image_options={ 179 | 'image_fetch': options.get('image_fetch', True), 180 | 'image_format': options.get('image_format', 'jpeg'), 181 | 'compress_images': options.get('compress_images', False), 182 | 'max_image_size': options.get('max_image_size', 1_000_000), 183 | 'always_convert_images': options.get('always_convert_images', False) 184 | }, 185 | normalize=normalize, 186 | output_dir=output_dir or options.get('output_dir', os.getcwd()), 187 | allow_spaces=options.get('allow_spaces', False), 188 | session=session, 189 | parser=options.get('parser', 'lxml') 190 | ) 191 | logger.info("File created: " + filename) 192 | else: 193 | logger.warning("No ebook created") 194 | 195 | 196 | if __name__ == '__main__': 197 | cli() 198 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. 2 | 3 | [[package]] 4 | name = "attrs" 5 | version = "25.1.0" 6 | description = "Classes Without Boilerplate" 7 | optional = false 8 | python-versions = ">=3.8" 9 | groups = ["main"] 10 | files = [ 11 | {file = "attrs-25.1.0-py3-none-any.whl", hash = "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a"}, 12 | {file = "attrs-25.1.0.tar.gz", hash = "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e"}, 13 | ] 14 | 15 | [package.extras] 16 | benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] 17 | cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] 18 | dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] 19 | docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] 20 | tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] 21 | tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""] 22 | 23 | [[package]] 24 | name = "beautifulsoup4" 25 | version = "4.13.3" 26 | description = "Screen-scraping library" 27 | optional = false 28 | python-versions = ">=3.7.0" 29 | groups = ["main"] 30 | files = [ 31 | {file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"}, 32 | {file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"}, 33 | ] 34 | 35 | [package.dependencies] 36 | soupsieve = ">1.2" 37 | typing-extensions = ">=4.0.0" 38 | 39 | [package.extras] 40 | cchardet = ["cchardet"] 41 | chardet = ["chardet"] 42 | charset-normalizer = ["charset-normalizer"] 43 | html5lib = ["html5lib"] 44 | lxml = ["lxml"] 45 | 46 | [[package]] 47 | name = "cattrs" 48 | version = "24.1.2" 49 | description = "Composable complex class support for attrs and dataclasses." 50 | optional = false 51 | python-versions = ">=3.8" 52 | groups = ["main"] 53 | files = [ 54 | {file = "cattrs-24.1.2-py3-none-any.whl", hash = "sha256:67c7495b760168d931a10233f979b28dc04daf853b30752246f4f8471c6d68d0"}, 55 | {file = "cattrs-24.1.2.tar.gz", hash = "sha256:8028cfe1ff5382df59dd36474a86e02d817b06eaf8af84555441bac915d2ef85"}, 56 | ] 57 | 58 | [package.dependencies] 59 | attrs = ">=23.1.0" 60 | exceptiongroup = {version = ">=1.1.1", markers = "python_version < \"3.11\""} 61 | typing-extensions = {version = ">=4.1.0,<4.6.3 || >4.6.3", markers = "python_version < \"3.11\""} 62 | 63 | [package.extras] 64 | bson = ["pymongo (>=4.4.0)"] 65 | cbor2 = ["cbor2 (>=5.4.6)"] 66 | msgpack = ["msgpack (>=1.0.5)"] 67 | msgspec = ["msgspec (>=0.18.5) ; implementation_name == \"cpython\""] 68 | orjson = ["orjson (>=3.9.2) ; implementation_name == \"cpython\""] 69 | pyyaml = ["pyyaml (>=6.0)"] 70 | tomlkit = ["tomlkit (>=0.11.8)"] 71 | ujson = ["ujson (>=5.7.0)"] 72 | 73 | [[package]] 74 | name = "certifi" 75 | version = "2024.8.30" 76 | description = "Python package for providing Mozilla's CA Bundle." 77 | optional = false 78 | python-versions = ">=3.6" 79 | groups = ["main"] 80 | files = [ 81 | {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"}, 82 | {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"}, 83 | ] 84 | 85 | [[package]] 86 | name = "charset-normalizer" 87 | version = "3.4.0" 88 | description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." 89 | optional = false 90 | python-versions = ">=3.7.0" 91 | groups = ["main"] 92 | files = [ 93 | {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4f9fc98dad6c2eaa32fc3af1417d95b5e3d08aff968df0cd320066def971f9a6"}, 94 | {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0de7b687289d3c1b3e8660d0741874abe7888100efe14bd0f9fd7141bcbda92b"}, 95 | {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5ed2e36c3e9b4f21dd9422f6893dec0abf2cca553af509b10cd630f878d3eb99"}, 96 | {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d3ff7fc90b98c637bda91c89d51264a3dcf210cade3a2c6f838c7268d7a4ca"}, 97 | {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1110e22af8ca26b90bd6364fe4c763329b0ebf1ee213ba32b68c73de5752323d"}, 98 | {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86f4e8cca779080f66ff4f191a685ced73d2f72d50216f7112185dc02b90b9b7"}, 99 | {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f683ddc7eedd742e2889d2bfb96d69573fde1d92fcb811979cdb7165bb9c7d3"}, 100 | {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27623ba66c183eca01bf9ff833875b459cad267aeeb044477fedac35e19ba907"}, 101 | {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f606a1881d2663630ea5b8ce2efe2111740df4b687bd78b34a8131baa007f79b"}, 102 | {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0b309d1747110feb25d7ed6b01afdec269c647d382c857ef4663bbe6ad95a912"}, 103 | {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:136815f06a3ae311fae551c3df1f998a1ebd01ddd424aa5603a4336997629e95"}, 104 | {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:14215b71a762336254351b00ec720a8e85cada43b987da5a042e4ce3e82bd68e"}, 105 | {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:79983512b108e4a164b9c8d34de3992f76d48cadc9554c9e60b43f308988aabe"}, 106 | {file = "charset_normalizer-3.4.0-cp310-cp310-win32.whl", hash = "sha256:c94057af19bc953643a33581844649a7fdab902624d2eb739738a30e2b3e60fc"}, 107 | {file = "charset_normalizer-3.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:55f56e2ebd4e3bc50442fbc0888c9d8c94e4e06a933804e2af3e89e2f9c1c749"}, 108 | {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0d99dd8ff461990f12d6e42c7347fd9ab2532fb70e9621ba520f9e8637161d7c"}, 109 | {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c57516e58fd17d03ebe67e181a4e4e2ccab1168f8c2976c6a334d4f819fe5944"}, 110 | {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6dba5d19c4dfab08e58d5b36304b3f92f3bd5d42c1a3fa37b5ba5cdf6dfcbcee"}, 111 | {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf4475b82be41b07cc5e5ff94810e6a01f276e37c2d55571e3fe175e467a1a1c"}, 112 | {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce031db0408e487fd2775d745ce30a7cd2923667cf3b69d48d219f1d8f5ddeb6"}, 113 | {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ff4e7cdfdb1ab5698e675ca622e72d58a6fa2a8aa58195de0c0061288e6e3ea"}, 114 | {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3710a9751938947e6327ea9f3ea6332a09bf0ba0c09cae9cb1f250bd1f1549bc"}, 115 | {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82357d85de703176b5587dbe6ade8ff67f9f69a41c0733cf2425378b49954de5"}, 116 | {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47334db71978b23ebcf3c0f9f5ee98b8d65992b65c9c4f2d34c2eaf5bcaf0594"}, 117 | {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8ce7fd6767a1cc5a92a639b391891bf1c268b03ec7e021c7d6d902285259685c"}, 118 | {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f1a2f519ae173b5b6a2c9d5fa3116ce16e48b3462c8b96dfdded11055e3d6365"}, 119 | {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:63bc5c4ae26e4bc6be6469943b8253c0fd4e4186c43ad46e713ea61a0ba49129"}, 120 | {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bcb4f8ea87d03bc51ad04add8ceaf9b0f085ac045ab4d74e73bbc2dc033f0236"}, 121 | {file = "charset_normalizer-3.4.0-cp311-cp311-win32.whl", hash = "sha256:9ae4ef0b3f6b41bad6366fb0ea4fc1d7ed051528e113a60fa2a65a9abb5b1d99"}, 122 | {file = "charset_normalizer-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cee4373f4d3ad28f1ab6290684d8e2ebdb9e7a1b74fdc39e4c211995f77bec27"}, 123 | {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0713f3adb9d03d49d365b70b84775d0a0d18e4ab08d12bc46baa6132ba78aaf6"}, 124 | {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de7376c29d95d6719048c194a9cf1a1b0393fbe8488a22008610b0361d834ecf"}, 125 | {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a51b48f42d9358460b78725283f04bddaf44a9358197b889657deba38f329db"}, 126 | {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b295729485b06c1a0683af02a9e42d2caa9db04a373dc38a6a58cdd1e8abddf1"}, 127 | {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee803480535c44e7f5ad00788526da7d85525cfefaf8acf8ab9a310000be4b03"}, 128 | {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d59d125ffbd6d552765510e3f31ed75ebac2c7470c7274195b9161a32350284"}, 129 | {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cda06946eac330cbe6598f77bb54e690b4ca93f593dee1568ad22b04f347c15"}, 130 | {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07afec21bbbbf8a5cc3651aa96b980afe2526e7f048fdfb7f1014d84acc8b6d8"}, 131 | {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6b40e8d38afe634559e398cc32b1472f376a4099c75fe6299ae607e404c033b2"}, 132 | {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b8dcd239c743aa2f9c22ce674a145e0a25cb1566c495928440a181ca1ccf6719"}, 133 | {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:84450ba661fb96e9fd67629b93d2941c871ca86fc38d835d19d4225ff946a631"}, 134 | {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:44aeb140295a2f0659e113b31cfe92c9061622cadbc9e2a2f7b8ef6b1e29ef4b"}, 135 | {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1db4e7fefefd0f548d73e2e2e041f9df5c59e178b4c72fbac4cc6f535cfb1565"}, 136 | {file = "charset_normalizer-3.4.0-cp312-cp312-win32.whl", hash = "sha256:5726cf76c982532c1863fb64d8c6dd0e4c90b6ece9feb06c9f202417a31f7dd7"}, 137 | {file = "charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:b197e7094f232959f8f20541ead1d9862ac5ebea1d58e9849c1bf979255dfac9"}, 138 | {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dd4eda173a9fcccb5f2e2bd2a9f423d180194b1bf17cf59e3269899235b2a114"}, 139 | {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9e3c4c9e1ed40ea53acf11e2a386383c3304212c965773704e4603d589343ed"}, 140 | {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92a7e36b000bf022ef3dbb9c46bfe2d52c047d5e3f3343f43204263c5addc250"}, 141 | {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54b6a92d009cbe2fb11054ba694bc9e284dad30a26757b1e372a1fdddaf21920"}, 142 | {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ffd9493de4c922f2a38c2bf62b831dcec90ac673ed1ca182fe11b4d8e9f2a64"}, 143 | {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:35c404d74c2926d0287fbd63ed5d27eb911eb9e4a3bb2c6d294f3cfd4a9e0c23"}, 144 | {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4796efc4faf6b53a18e3d46343535caed491776a22af773f366534056c4e1fbc"}, 145 | {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7fdd52961feb4c96507aa649550ec2a0d527c086d284749b2f582f2d40a2e0d"}, 146 | {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:92db3c28b5b2a273346bebb24857fda45601aef6ae1c011c0a997106581e8a88"}, 147 | {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ab973df98fc99ab39080bfb0eb3a925181454d7c3ac8a1e695fddfae696d9e90"}, 148 | {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4b67fdab07fdd3c10bb21edab3cbfe8cf5696f453afce75d815d9d7223fbe88b"}, 149 | {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:aa41e526a5d4a9dfcfbab0716c7e8a1b215abd3f3df5a45cf18a12721d31cb5d"}, 150 | {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ffc519621dce0c767e96b9c53f09c5d215578e10b02c285809f76509a3931482"}, 151 | {file = "charset_normalizer-3.4.0-cp313-cp313-win32.whl", hash = "sha256:f19c1585933c82098c2a520f8ec1227f20e339e33aca8fa6f956f6691b784e67"}, 152 | {file = "charset_normalizer-3.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:707b82d19e65c9bd28b81dde95249b07bf9f5b90ebe1ef17d9b57473f8a64b7b"}, 153 | {file = "charset_normalizer-3.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dbe03226baf438ac4fda9e2d0715022fd579cb641c4cf639fa40d53b2fe6f3e2"}, 154 | {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd9a8bd8900e65504a305bf8ae6fa9fbc66de94178c420791d0293702fce2df7"}, 155 | {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8831399554b92b72af5932cdbbd4ddc55c55f631bb13ff8fe4e6536a06c5c51"}, 156 | {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a14969b8691f7998e74663b77b4c36c0337cb1df552da83d5c9004a93afdb574"}, 157 | {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dcaf7c1524c0542ee2fc82cc8ec337f7a9f7edee2532421ab200d2b920fc97cf"}, 158 | {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425c5f215d0eecee9a56cdb703203dda90423247421bf0d67125add85d0c4455"}, 159 | {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:d5b054862739d276e09928de37c79ddeec42a6e1bfc55863be96a36ba22926f6"}, 160 | {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:f3e73a4255342d4eb26ef6df01e3962e73aa29baa3124a8e824c5d3364a65748"}, 161 | {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:2f6c34da58ea9c1a9515621f4d9ac379871a8f21168ba1b5e09d74250de5ad62"}, 162 | {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:f09cb5a7bbe1ecae6e87901a2eb23e0256bb524a79ccc53eb0b7629fbe7677c4"}, 163 | {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:0099d79bdfcf5c1f0c2c72f91516702ebf8b0b8ddd8905f97a8aecf49712c621"}, 164 | {file = "charset_normalizer-3.4.0-cp37-cp37m-win32.whl", hash = "sha256:9c98230f5042f4945f957d006edccc2af1e03ed5e37ce7c373f00a5a4daa6149"}, 165 | {file = "charset_normalizer-3.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:62f60aebecfc7f4b82e3f639a7d1433a20ec32824db2199a11ad4f5e146ef5ee"}, 166 | {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:af73657b7a68211996527dbfeffbb0864e043d270580c5aef06dc4b659a4b578"}, 167 | {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cab5d0b79d987c67f3b9e9c53f54a61360422a5a0bc075f43cab5621d530c3b6"}, 168 | {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9289fd5dddcf57bab41d044f1756550f9e7cf0c8e373b8cdf0ce8773dc4bd417"}, 169 | {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b493a043635eb376e50eedf7818f2f322eabbaa974e948bd8bdd29eb7ef2a51"}, 170 | {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fa2566ca27d67c86569e8c85297aaf413ffab85a8960500f12ea34ff98e4c41"}, 171 | {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8e538f46104c815be19c975572d74afb53f29650ea2025bbfaef359d2de2f7f"}, 172 | {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fd30dc99682dc2c603c2b315bded2799019cea829f8bf57dc6b61efde6611c8"}, 173 | {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2006769bd1640bdf4d5641c69a3d63b71b81445473cac5ded39740a226fa88ab"}, 174 | {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:dc15e99b2d8a656f8e666854404f1ba54765871104e50c8e9813af8a7db07f12"}, 175 | {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ab2e5bef076f5a235c3774b4f4028a680432cded7cad37bba0fd90d64b187d19"}, 176 | {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:4ec9dd88a5b71abfc74e9df5ebe7921c35cbb3b641181a531ca65cdb5e8e4dea"}, 177 | {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:43193c5cda5d612f247172016c4bb71251c784d7a4d9314677186a838ad34858"}, 178 | {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:aa693779a8b50cd97570e5a0f343538a8dbd3e496fa5dcb87e29406ad0299654"}, 179 | {file = "charset_normalizer-3.4.0-cp38-cp38-win32.whl", hash = "sha256:7706f5850360ac01d80c89bcef1640683cc12ed87f42579dab6c5d3ed6888613"}, 180 | {file = "charset_normalizer-3.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:c3e446d253bd88f6377260d07c895816ebf33ffffd56c1c792b13bff9c3e1ade"}, 181 | {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:980b4f289d1d90ca5efcf07958d3eb38ed9c0b7676bf2831a54d4f66f9c27dfa"}, 182 | {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f28f891ccd15c514a0981f3b9db9aa23d62fe1a99997512b0491d2ed323d229a"}, 183 | {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8aacce6e2e1edcb6ac625fb0f8c3a9570ccc7bfba1f63419b3769ccf6a00ed0"}, 184 | {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd7af3717683bea4c87acd8c0d3d5b44d56120b26fd3f8a692bdd2d5260c620a"}, 185 | {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ff2ed8194587faf56555927b3aa10e6fb69d931e33953943bc4f837dfee2242"}, 186 | {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e91f541a85298cf35433bf66f3fab2a4a2cff05c127eeca4af174f6d497f0d4b"}, 187 | {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:309a7de0a0ff3040acaebb35ec45d18db4b28232f21998851cfa709eeff49d62"}, 188 | {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:285e96d9d53422efc0d7a17c60e59f37fbf3dfa942073f666db4ac71e8d726d0"}, 189 | {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5d447056e2ca60382d460a604b6302d8db69476fd2015c81e7c35417cfabe4cd"}, 190 | {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:20587d20f557fe189b7947d8e7ec5afa110ccf72a3128d61a2a387c3313f46be"}, 191 | {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:130272c698667a982a5d0e626851ceff662565379baf0ff2cc58067b81d4f11d"}, 192 | {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:ab22fbd9765e6954bc0bcff24c25ff71dcbfdb185fcdaca49e81bac68fe724d3"}, 193 | {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7782afc9b6b42200f7362858f9e73b1f8316afb276d316336c0ec3bd73312742"}, 194 | {file = "charset_normalizer-3.4.0-cp39-cp39-win32.whl", hash = "sha256:2de62e8801ddfff069cd5c504ce3bc9672b23266597d4e4f50eda28846c322f2"}, 195 | {file = "charset_normalizer-3.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:95c3c157765b031331dd4db3c775e58deaee050a3042fcad72cbc4189d7c8dca"}, 196 | {file = "charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079"}, 197 | {file = "charset_normalizer-3.4.0.tar.gz", hash = "sha256:223217c3d4f82c3ac5e29032b3f1c2eb0fb591b72161f86d93f5719079dae93e"}, 198 | ] 199 | 200 | [[package]] 201 | name = "click" 202 | version = "8.1.8" 203 | description = "Composable command line interface toolkit" 204 | optional = false 205 | python-versions = ">=3.7" 206 | groups = ["main"] 207 | files = [ 208 | {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, 209 | {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, 210 | ] 211 | 212 | [package.dependencies] 213 | colorama = {version = "*", markers = "platform_system == \"Windows\""} 214 | 215 | [[package]] 216 | name = "click-default-group" 217 | version = "1.2.4" 218 | description = "click_default_group" 219 | optional = false 220 | python-versions = ">=2.7" 221 | groups = ["main"] 222 | files = [ 223 | {file = "click_default_group-1.2.4-py2.py3-none-any.whl", hash = "sha256:9b60486923720e7fc61731bdb32b617039aba820e22e1c88766b1125592eaa5f"}, 224 | {file = "click_default_group-1.2.4.tar.gz", hash = "sha256:eb3f3c99ec0d456ca6cd2a7f08f7d4e91771bef51b01bdd9580cc6450fe1251e"}, 225 | ] 226 | 227 | [package.dependencies] 228 | click = "*" 229 | 230 | [package.extras] 231 | test = ["pytest"] 232 | 233 | [[package]] 234 | name = "colorama" 235 | version = "0.4.6" 236 | description = "Cross-platform colored terminal text." 237 | optional = false 238 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" 239 | groups = ["main"] 240 | markers = "platform_system == \"Windows\"" 241 | files = [ 242 | {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, 243 | {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, 244 | ] 245 | 246 | [[package]] 247 | name = "exceptiongroup" 248 | version = "1.2.2" 249 | description = "Backport of PEP 654 (exception groups)" 250 | optional = false 251 | python-versions = ">=3.7" 252 | groups = ["main"] 253 | markers = "python_version < \"3.11\"" 254 | files = [ 255 | {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, 256 | {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, 257 | ] 258 | 259 | [package.extras] 260 | test = ["pytest (>=6)"] 261 | 262 | [[package]] 263 | name = "flake8" 264 | version = "6.1.0" 265 | description = "the modular source code checker: pep8 pyflakes and co" 266 | optional = false 267 | python-versions = ">=3.8.1" 268 | groups = ["dev"] 269 | files = [ 270 | {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, 271 | {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, 272 | ] 273 | 274 | [package.dependencies] 275 | mccabe = ">=0.7.0,<0.8.0" 276 | pycodestyle = ">=2.11.0,<2.12.0" 277 | pyflakes = ">=3.1.0,<3.2.0" 278 | 279 | [[package]] 280 | name = "idna" 281 | version = "3.10" 282 | description = "Internationalized Domain Names in Applications (IDNA)" 283 | optional = false 284 | python-versions = ">=3.6" 285 | groups = ["main"] 286 | files = [ 287 | {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, 288 | {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, 289 | ] 290 | 291 | [package.extras] 292 | all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] 293 | 294 | [[package]] 295 | name = "lxml" 296 | version = "5.3.1" 297 | description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." 298 | optional = false 299 | python-versions = ">=3.6" 300 | groups = ["main"] 301 | files = [ 302 | {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a4058f16cee694577f7e4dd410263cd0ef75644b43802a689c2b3c2a7e69453b"}, 303 | {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:364de8f57d6eda0c16dcfb999af902da31396949efa0e583e12675d09709881b"}, 304 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:528f3a0498a8edc69af0559bdcf8a9f5a8bf7c00051a6ef3141fdcf27017bbf5"}, 305 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db4743e30d6f5f92b6d2b7c86b3ad250e0bad8dee4b7ad8a0c44bfb276af89a3"}, 306 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17b5d7f8acf809465086d498d62a981fa6a56d2718135bb0e4aa48c502055f5c"}, 307 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:928e75a7200a4c09e6efc7482a1337919cc61fe1ba289f297827a5b76d8969c2"}, 308 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a997b784a639e05b9d4053ef3b20c7e447ea80814a762f25b8ed5a89d261eac"}, 309 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7b82e67c5feb682dbb559c3e6b78355f234943053af61606af126df2183b9ef9"}, 310 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:f1de541a9893cf8a1b1db9bf0bf670a2decab42e3e82233d36a74eda7822b4c9"}, 311 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:de1fc314c3ad6bc2f6bd5b5a5b9357b8c6896333d27fdbb7049aea8bd5af2d79"}, 312 | {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:7c0536bd9178f754b277a3e53f90f9c9454a3bd108b1531ffff720e082d824f2"}, 313 | {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:68018c4c67d7e89951a91fbd371e2e34cd8cfc71f0bb43b5332db38497025d51"}, 314 | {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa826340a609d0c954ba52fd831f0fba2a4165659ab0ee1a15e4aac21f302406"}, 315 | {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:796520afa499732191e39fc95b56a3b07f95256f2d22b1c26e217fb69a9db5b5"}, 316 | {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3effe081b3135237da6e4c4530ff2a868d3f80be0bda027e118a5971285d42d0"}, 317 | {file = "lxml-5.3.1-cp310-cp310-win32.whl", hash = "sha256:a22f66270bd6d0804b02cd49dae2b33d4341015545d17f8426f2c4e22f557a23"}, 318 | {file = "lxml-5.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:0bcfadea3cdc68e678d2b20cb16a16716887dd00a881e16f7d806c2138b8ff0c"}, 319 | {file = "lxml-5.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e220f7b3e8656ab063d2eb0cd536fafef396829cafe04cb314e734f87649058f"}, 320 | {file = "lxml-5.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f2cfae0688fd01f7056a17367e3b84f37c545fb447d7282cf2c242b16262607"}, 321 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67d2f8ad9dcc3a9e826bdc7802ed541a44e124c29b7d95a679eeb58c1c14ade8"}, 322 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db0c742aad702fd5d0c6611a73f9602f20aec2007c102630c06d7633d9c8f09a"}, 323 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:198bb4b4dd888e8390afa4f170d4fa28467a7eaf857f1952589f16cfbb67af27"}, 324 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d2a3e412ce1849be34b45922bfef03df32d1410a06d1cdeb793a343c2f1fd666"}, 325 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b8969dbc8d09d9cd2ae06362c3bad27d03f433252601ef658a49bd9f2b22d79"}, 326 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5be8f5e4044146a69c96077c7e08f0709c13a314aa5315981185c1f00235fe65"}, 327 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:133f3493253a00db2c870d3740bc458ebb7d937bd0a6a4f9328373e0db305709"}, 328 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:52d82b0d436edd6a1d22d94a344b9a58abd6c68c357ed44f22d4ba8179b37629"}, 329 | {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b6f92e35e2658a5ed51c6634ceb5ddae32053182851d8cad2a5bc102a359b33"}, 330 | {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:203b1d3eaebd34277be06a3eb880050f18a4e4d60861efba4fb946e31071a295"}, 331 | {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:155e1a5693cf4b55af652f5c0f78ef36596c7f680ff3ec6eb4d7d85367259b2c"}, 332 | {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:22ec2b3c191f43ed21f9545e9df94c37c6b49a5af0a874008ddc9132d49a2d9c"}, 333 | {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7eda194dd46e40ec745bf76795a7cccb02a6a41f445ad49d3cf66518b0bd9cff"}, 334 | {file = "lxml-5.3.1-cp311-cp311-win32.whl", hash = "sha256:fb7c61d4be18e930f75948705e9718618862e6fc2ed0d7159b2262be73f167a2"}, 335 | {file = "lxml-5.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c809eef167bf4a57af4b03007004896f5c60bd38dc3852fcd97a26eae3d4c9e6"}, 336 | {file = "lxml-5.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e69add9b6b7b08c60d7ff0152c7c9a6c45b4a71a919be5abde6f98f1ea16421c"}, 337 | {file = "lxml-5.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4e52e1b148867b01c05e21837586ee307a01e793b94072d7c7b91d2c2da02ffe"}, 338 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4b382e0e636ed54cd278791d93fe2c4f370772743f02bcbe431a160089025c9"}, 339 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2e49dc23a10a1296b04ca9db200c44d3eb32c8d8ec532e8c1fd24792276522a"}, 340 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4399b4226c4785575fb20998dc571bc48125dc92c367ce2602d0d70e0c455eb0"}, 341 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5412500e0dc5481b1ee9cf6b38bb3b473f6e411eb62b83dc9b62699c3b7b79f7"}, 342 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c93ed3c998ea8472be98fb55aed65b5198740bfceaec07b2eba551e55b7b9ae"}, 343 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:63d57fc94eb0bbb4735e45517afc21ef262991d8758a8f2f05dd6e4174944519"}, 344 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:b450d7cabcd49aa7ab46a3c6aa3ac7e1593600a1a0605ba536ec0f1b99a04322"}, 345 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:4df0ec814b50275ad6a99bc82a38b59f90e10e47714ac9871e1b223895825468"}, 346 | {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d184f85ad2bb1f261eac55cddfcf62a70dee89982c978e92b9a74a1bfef2e367"}, 347 | {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b725e70d15906d24615201e650d5b0388b08a5187a55f119f25874d0103f90dd"}, 348 | {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a31fa7536ec1fb7155a0cd3a4e3d956c835ad0a43e3610ca32384d01f079ea1c"}, 349 | {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3c3c8b55c7fc7b7e8877b9366568cc73d68b82da7fe33d8b98527b73857a225f"}, 350 | {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d61ec60945d694df806a9aec88e8f29a27293c6e424f8ff91c80416e3c617645"}, 351 | {file = "lxml-5.3.1-cp312-cp312-win32.whl", hash = "sha256:f4eac0584cdc3285ef2e74eee1513a6001681fd9753b259e8159421ed28a72e5"}, 352 | {file = "lxml-5.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:29bfc8d3d88e56ea0a27e7c4897b642706840247f59f4377d81be8f32aa0cfbf"}, 353 | {file = "lxml-5.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c093c7088b40d8266f57ed71d93112bd64c6724d31f0794c1e52cc4857c28e0e"}, 354 | {file = "lxml-5.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b0884e3f22d87c30694e625b1e62e6f30d39782c806287450d9dc2fdf07692fd"}, 355 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1637fa31ec682cd5760092adfabe86d9b718a75d43e65e211d5931809bc111e7"}, 356 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a364e8e944d92dcbf33b6b494d4e0fb3499dcc3bd9485beb701aa4b4201fa414"}, 357 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:779e851fd0e19795ccc8a9bb4d705d6baa0ef475329fe44a13cf1e962f18ff1e"}, 358 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c4393600915c308e546dc7003d74371744234e8444a28622d76fe19b98fa59d1"}, 359 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:673b9d8e780f455091200bba8534d5f4f465944cbdd61f31dc832d70e29064a5"}, 360 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2e4a570f6a99e96c457f7bec5ad459c9c420ee80b99eb04cbfcfe3fc18ec6423"}, 361 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:71f31eda4e370f46af42fc9f264fafa1b09f46ba07bdbee98f25689a04b81c20"}, 362 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:42978a68d3825eaac55399eb37a4d52012a205c0c6262199b8b44fcc6fd686e8"}, 363 | {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8b1942b3e4ed9ed551ed3083a2e6e0772de1e5e3aca872d955e2e86385fb7ff9"}, 364 | {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:85c4f11be9cf08917ac2a5a8b6e1ef63b2f8e3799cec194417e76826e5f1de9c"}, 365 | {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:231cf4d140b22a923b1d0a0a4e0b4f972e5893efcdec188934cc65888fd0227b"}, 366 | {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5865b270b420eda7b68928d70bb517ccbe045e53b1a428129bb44372bf3d7dd5"}, 367 | {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dbf7bebc2275016cddf3c997bf8a0f7044160714c64a9b83975670a04e6d2252"}, 368 | {file = "lxml-5.3.1-cp313-cp313-win32.whl", hash = "sha256:d0751528b97d2b19a388b302be2a0ee05817097bab46ff0ed76feeec24951f78"}, 369 | {file = "lxml-5.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:91fb6a43d72b4f8863d21f347a9163eecbf36e76e2f51068d59cd004c506f332"}, 370 | {file = "lxml-5.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:016b96c58e9a4528219bb563acf1aaaa8bc5452e7651004894a973f03b84ba81"}, 371 | {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82a4bb10b0beef1434fb23a09f001ab5ca87895596b4581fd53f1e5145a8934a"}, 372 | {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d68eeef7b4d08a25e51897dac29bcb62aba830e9ac6c4e3297ee7c6a0cf6439"}, 373 | {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:f12582b8d3b4c6be1d298c49cb7ae64a3a73efaf4c2ab4e37db182e3545815ac"}, 374 | {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2df7ed5edeb6bd5590914cd61df76eb6cce9d590ed04ec7c183cf5509f73530d"}, 375 | {file = "lxml-5.3.1-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:585c4dc429deebc4307187d2b71ebe914843185ae16a4d582ee030e6cfbb4d8a"}, 376 | {file = "lxml-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:06a20d607a86fccab2fc15a77aa445f2bdef7b49ec0520a842c5c5afd8381576"}, 377 | {file = "lxml-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:057e30d0012439bc54ca427a83d458752ccda725c1c161cc283db07bcad43cf9"}, 378 | {file = "lxml-5.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4867361c049761a56bd21de507cab2c2a608c55102311d142ade7dab67b34f32"}, 379 | {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3dddf0fb832486cc1ea71d189cb92eb887826e8deebe128884e15020bb6e3f61"}, 380 | {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bcc211542f7af6f2dfb705f5f8b74e865592778e6cafdfd19c792c244ccce19"}, 381 | {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaca5a812f050ab55426c32177091130b1e49329b3f002a32934cd0245571307"}, 382 | {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:236610b77589faf462337b3305a1be91756c8abc5a45ff7ca8f245a71c5dab70"}, 383 | {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:aed57b541b589fa05ac248f4cb1c46cbb432ab82cbd467d1c4f6a2bdc18aecf9"}, 384 | {file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:75fa3d6946d317ffc7016a6fcc44f42db6d514b7fdb8b4b28cbe058303cb6e53"}, 385 | {file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:96eef5b9f336f623ffc555ab47a775495e7e8846dde88de5f941e2906453a1ce"}, 386 | {file = "lxml-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:ef45f31aec9be01379fc6c10f1d9c677f032f2bac9383c827d44f620e8a88407"}, 387 | {file = "lxml-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0611da6b07dd3720f492db1b463a4d1175b096b49438761cc9f35f0d9eaaef5"}, 388 | {file = "lxml-5.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b2aca14c235c7a08558fe0a4786a1a05873a01e86b474dfa8f6df49101853a4e"}, 389 | {file = "lxml-5.3.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae82fce1d964f065c32c9517309f0c7be588772352d2f40b1574a214bd6e6098"}, 390 | {file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7aae7a3d63b935babfdc6864b31196afd5145878ddd22f5200729006366bc4d5"}, 391 | {file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8e0d177b1fe251c3b1b914ab64135475c5273c8cfd2857964b2e3bb0fe196a7"}, 392 | {file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:6c4dd3bfd0c82400060896717dd261137398edb7e524527438c54a8c34f736bf"}, 393 | {file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f1208c1c67ec9e151d78aa3435aa9b08a488b53d9cfac9b699f15255a3461ef2"}, 394 | {file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:c6aacf00d05b38a5069826e50ae72751cb5bc27bdc4d5746203988e429b385bb"}, 395 | {file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5881aaa4bf3a2d086c5f20371d3a5856199a0d8ac72dd8d0dbd7a2ecfc26ab73"}, 396 | {file = "lxml-5.3.1-cp38-cp38-win32.whl", hash = "sha256:45fbb70ccbc8683f2fb58bea89498a7274af1d9ec7995e9f4af5604e028233fc"}, 397 | {file = "lxml-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:7512b4d0fc5339d5abbb14d1843f70499cab90d0b864f790e73f780f041615d7"}, 398 | {file = "lxml-5.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5885bc586f1edb48e5d68e7a4b4757b5feb2a496b64f462b4d65950f5af3364f"}, 399 | {file = "lxml-5.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1b92fe86e04f680b848fff594a908edfa72b31bfc3499ef7433790c11d4c8cd8"}, 400 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a091026c3bf7519ab1e64655a3f52a59ad4a4e019a6f830c24d6430695b1cf6a"}, 401 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ffb141361108e864ab5f1813f66e4e1164181227f9b1f105b042729b6c15125"}, 402 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3715cdf0dd31b836433af9ee9197af10e3df41d273c19bb249230043667a5dfd"}, 403 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88b72eb7222d918c967202024812c2bfb4048deeb69ca328363fb8e15254c549"}, 404 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa59974880ab5ad8ef3afaa26f9bda148c5f39e06b11a8ada4660ecc9fb2feb3"}, 405 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3bb8149840daf2c3f97cebf00e4ed4a65a0baff888bf2605a8d0135ff5cf764e"}, 406 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:0d6b2fa86becfa81f0a0271ccb9eb127ad45fb597733a77b92e8a35e53414914"}, 407 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:136bf638d92848a939fd8f0e06fcf92d9f2e4b57969d94faae27c55f3d85c05b"}, 408 | {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:89934f9f791566e54c1d92cdc8f8fd0009447a5ecdb1ec6b810d5f8c4955f6be"}, 409 | {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a8ade0363f776f87f982572c2860cc43c65ace208db49c76df0a21dde4ddd16e"}, 410 | {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:bfbbab9316330cf81656fed435311386610f78b6c93cc5db4bebbce8dd146675"}, 411 | {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:172d65f7c72a35a6879217bcdb4bb11bc88d55fb4879e7569f55616062d387c2"}, 412 | {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e3c623923967f3e5961d272718655946e5322b8d058e094764180cdee7bab1af"}, 413 | {file = "lxml-5.3.1-cp39-cp39-win32.whl", hash = "sha256:ce0930a963ff593e8bb6fda49a503911accc67dee7e5445eec972668e672a0f0"}, 414 | {file = "lxml-5.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:f7b64fcd670bca8800bc10ced36620c6bbb321e7bc1214b9c0c0df269c1dddc2"}, 415 | {file = "lxml-5.3.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:afa578b6524ff85fb365f454cf61683771d0170470c48ad9d170c48075f86725"}, 416 | {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67f5e80adf0aafc7b5454f2c1cb0cde920c9b1f2cbd0485f07cc1d0497c35c5d"}, 417 | {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dd0b80ac2d8f13ffc906123a6f20b459cb50a99222d0da492360512f3e50f84"}, 418 | {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:422c179022ecdedbe58b0e242607198580804253da220e9454ffe848daa1cfd2"}, 419 | {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:524ccfded8989a6595dbdda80d779fb977dbc9a7bc458864fc9a0c2fc15dc877"}, 420 | {file = "lxml-5.3.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:48fd46bf7155def2e15287c6f2b133a2f78e2d22cdf55647269977b873c65499"}, 421 | {file = "lxml-5.3.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:05123fad495a429f123307ac6d8fd6f977b71e9a0b6d9aeeb8f80c017cb17131"}, 422 | {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a243132767150a44e6a93cd1dde41010036e1cbc63cc3e9fe1712b277d926ce3"}, 423 | {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c92ea6d9dd84a750b2bae72ff5e8cf5fdd13e58dda79c33e057862c29a8d5b50"}, 424 | {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2f1be45d4c15f237209bbf123a0e05b5d630c8717c42f59f31ea9eae2ad89394"}, 425 | {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:a83d3adea1e0ee36dac34627f78ddd7f093bb9cfc0a8e97f1572a949b695cb98"}, 426 | {file = "lxml-5.3.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:3edbb9c9130bac05d8c3fe150c51c337a471cc7fdb6d2a0a7d3a88e88a829314"}, 427 | {file = "lxml-5.3.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2f23cf50eccb3255b6e913188291af0150d89dab44137a69e14e4dcb7be981f1"}, 428 | {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df7e5edac4778127f2bf452e0721a58a1cfa4d1d9eac63bdd650535eb8543615"}, 429 | {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:094b28ed8a8a072b9e9e2113a81fda668d2053f2ca9f2d202c2c8c7c2d6516b1"}, 430 | {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:514fe78fc4b87e7a7601c92492210b20a1b0c6ab20e71e81307d9c2e377c64de"}, 431 | {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8fffc08de02071c37865a155e5ea5fce0282e1546fd5bde7f6149fcaa32558ac"}, 432 | {file = "lxml-5.3.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4b0d5cdba1b655d5b18042ac9c9ff50bda33568eb80feaaca4fc237b9c4fbfde"}, 433 | {file = "lxml-5.3.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3031e4c16b59424e8d78522c69b062d301d951dc55ad8685736c3335a97fc270"}, 434 | {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb659702a45136c743bc130760c6f137870d4df3a9e14386478b8a0511abcfca"}, 435 | {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a11b16a33656ffc43c92a5343a28dc71eefe460bcc2a4923a96f292692709f6"}, 436 | {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c5ae125276f254b01daa73e2c103363d3e99e3e10505686ac7d9d2442dd4627a"}, 437 | {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c76722b5ed4a31ba103e0dc77ab869222ec36efe1a614e42e9bcea88a36186fe"}, 438 | {file = "lxml-5.3.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:33e06717c00c788ab4e79bc4726ecc50c54b9bfb55355eae21473c145d83c2d2"}, 439 | {file = "lxml-5.3.1.tar.gz", hash = "sha256:106b7b5d2977b339f1e97efe2778e2ab20e99994cbb0ec5e55771ed0795920c8"}, 440 | ] 441 | 442 | [package.extras] 443 | cssselect = ["cssselect (>=0.7)"] 444 | html-clean = ["lxml_html_clean"] 445 | html5 = ["html5lib"] 446 | htmlsoup = ["BeautifulSoup4"] 447 | source = ["Cython (>=3.0.11,<3.1.0)"] 448 | 449 | [[package]] 450 | name = "mccabe" 451 | version = "0.7.0" 452 | description = "McCabe checker, plugin for flake8" 453 | optional = false 454 | python-versions = ">=3.6" 455 | groups = ["dev"] 456 | files = [ 457 | {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, 458 | {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, 459 | ] 460 | 461 | [[package]] 462 | name = "mintotp" 463 | version = "0.3.0" 464 | description = "MinTOTP - Minimal TOTP Generator" 465 | optional = false 466 | python-versions = "*" 467 | groups = ["main"] 468 | files = [ 469 | {file = "mintotp-0.3.0-py3-none-any.whl", hash = "sha256:eadee8531d9ee95eda92fd17949137454acd1d2a001dcf68f99bb8de56f06468"}, 470 | {file = "mintotp-0.3.0.tar.gz", hash = "sha256:d0f4db5edb38a7481120176a526e8c29539b9e80581dd2dcc1811557d77cfad5"}, 471 | ] 472 | 473 | [[package]] 474 | name = "pillow" 475 | version = "11.1.0" 476 | description = "Python Imaging Library (Fork)" 477 | optional = false 478 | python-versions = ">=3.9" 479 | groups = ["main"] 480 | files = [ 481 | {file = "pillow-11.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8"}, 482 | {file = "pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192"}, 483 | {file = "pillow-11.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a07dba04c5e22824816b2615ad7a7484432d7f540e6fa86af60d2de57b0fcee2"}, 484 | {file = "pillow-11.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e267b0ed063341f3e60acd25c05200df4193e15a4a5807075cd71225a2386e26"}, 485 | {file = "pillow-11.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd165131fd51697e22421d0e467997ad31621b74bfc0b75956608cb2906dda07"}, 486 | {file = "pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:abc56501c3fd148d60659aae0af6ddc149660469082859fa7b066a298bde9482"}, 487 | {file = "pillow-11.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:54ce1c9a16a9561b6d6d8cb30089ab1e5eb66918cb47d457bd996ef34182922e"}, 488 | {file = "pillow-11.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:73ddde795ee9b06257dac5ad42fcb07f3b9b813f8c1f7f870f402f4dc54b5269"}, 489 | {file = "pillow-11.1.0-cp310-cp310-win32.whl", hash = "sha256:3a5fe20a7b66e8135d7fd617b13272626a28278d0e578c98720d9ba4b2439d49"}, 490 | {file = "pillow-11.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:b6123aa4a59d75f06e9dd3dac5bf8bc9aa383121bb3dd9a7a612e05eabc9961a"}, 491 | {file = "pillow-11.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:a76da0a31da6fcae4210aa94fd779c65c75786bc9af06289cd1c184451ef7a65"}, 492 | {file = "pillow-11.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e06695e0326d05b06833b40b7ef477e475d0b1ba3a6d27da1bb48c23209bf457"}, 493 | {file = "pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96f82000e12f23e4f29346e42702b6ed9a2f2fea34a740dd5ffffcc8c539eb35"}, 494 | {file = "pillow-11.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3cd561ded2cf2bbae44d4605837221b987c216cff94f49dfeed63488bb228d2"}, 495 | {file = "pillow-11.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f189805c8be5ca5add39e6f899e6ce2ed824e65fb45f3c28cb2841911da19070"}, 496 | {file = "pillow-11.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dd0052e9db3474df30433f83a71b9b23bd9e4ef1de13d92df21a52c0303b8ab6"}, 497 | {file = "pillow-11.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:837060a8599b8f5d402e97197d4924f05a2e0d68756998345c829c33186217b1"}, 498 | {file = "pillow-11.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa8dd43daa836b9a8128dbe7d923423e5ad86f50a7a14dc688194b7be5c0dea2"}, 499 | {file = "pillow-11.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0a2f91f8a8b367e7a57c6e91cd25af510168091fb89ec5146003e424e1558a96"}, 500 | {file = "pillow-11.1.0-cp311-cp311-win32.whl", hash = "sha256:c12fc111ef090845de2bb15009372175d76ac99969bdf31e2ce9b42e4b8cd88f"}, 501 | {file = "pillow-11.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbd43429d0d7ed6533b25fc993861b8fd512c42d04514a0dd6337fb3ccf22761"}, 502 | {file = "pillow-11.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:f7955ecf5609dee9442cbface754f2c6e541d9e6eda87fad7f7a989b0bdb9d71"}, 503 | {file = "pillow-11.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2062ffb1d36544d42fcaa277b069c88b01bb7298f4efa06731a7fd6cc290b81a"}, 504 | {file = "pillow-11.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a85b653980faad27e88b141348707ceeef8a1186f75ecc600c395dcac19f385b"}, 505 | {file = "pillow-11.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9409c080586d1f683df3f184f20e36fb647f2e0bc3988094d4fd8c9f4eb1b3b3"}, 506 | {file = "pillow-11.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fdadc077553621911f27ce206ffcbec7d3f8d7b50e0da39f10997e8e2bb7f6a"}, 507 | {file = "pillow-11.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:93a18841d09bcdd774dcdc308e4537e1f867b3dec059c131fde0327899734aa1"}, 508 | {file = "pillow-11.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9aa9aeddeed452b2f616ff5507459e7bab436916ccb10961c4a382cd3e03f47f"}, 509 | {file = "pillow-11.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3cdcdb0b896e981678eee140d882b70092dac83ac1cdf6b3a60e2216a73f2b91"}, 510 | {file = "pillow-11.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36ba10b9cb413e7c7dfa3e189aba252deee0602c86c309799da5a74009ac7a1c"}, 511 | {file = "pillow-11.1.0-cp312-cp312-win32.whl", hash = "sha256:cfd5cd998c2e36a862d0e27b2df63237e67273f2fc78f47445b14e73a810e7e6"}, 512 | {file = "pillow-11.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a697cd8ba0383bba3d2d3ada02b34ed268cb548b369943cd349007730c92bddf"}, 513 | {file = "pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5"}, 514 | {file = "pillow-11.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae98e14432d458fc3de11a77ccb3ae65ddce70f730e7c76140653048c71bfcbc"}, 515 | {file = "pillow-11.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cc1331b6d5a6e144aeb5e626f4375f5b7ae9934ba620c0ac6b3e43d5e683a0f0"}, 516 | {file = "pillow-11.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:758e9d4ef15d3560214cddbc97b8ef3ef86ce04d62ddac17ad39ba87e89bd3b1"}, 517 | {file = "pillow-11.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b523466b1a31d0dcef7c5be1f20b942919b62fd6e9a9be199d035509cbefc0ec"}, 518 | {file = "pillow-11.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:9044b5e4f7083f209c4e35aa5dd54b1dd5b112b108648f5c902ad586d4f945c5"}, 519 | {file = "pillow-11.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:3764d53e09cdedd91bee65c2527815d315c6b90d7b8b79759cc48d7bf5d4f114"}, 520 | {file = "pillow-11.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:31eba6bbdd27dde97b0174ddf0297d7a9c3a507a8a1480e1e60ef914fe23d352"}, 521 | {file = "pillow-11.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b5d658fbd9f0d6eea113aea286b21d3cd4d3fd978157cbf2447a6035916506d3"}, 522 | {file = "pillow-11.1.0-cp313-cp313-win32.whl", hash = "sha256:f86d3a7a9af5d826744fabf4afd15b9dfef44fe69a98541f666f66fbb8d3fef9"}, 523 | {file = "pillow-11.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:593c5fd6be85da83656b93ffcccc2312d2d149d251e98588b14fbc288fd8909c"}, 524 | {file = "pillow-11.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:11633d58b6ee5733bde153a8dafd25e505ea3d32e261accd388827ee987baf65"}, 525 | {file = "pillow-11.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:70ca5ef3b3b1c4a0812b5c63c57c23b63e53bc38e758b37a951e5bc466449861"}, 526 | {file = "pillow-11.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8000376f139d4d38d6851eb149b321a52bb8893a88dae8ee7d95840431977081"}, 527 | {file = "pillow-11.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ee85f0696a17dd28fbcfceb59f9510aa71934b483d1f5601d1030c3c8304f3c"}, 528 | {file = "pillow-11.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:dd0e081319328928531df7a0e63621caf67652c8464303fd102141b785ef9547"}, 529 | {file = "pillow-11.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e63e4e5081de46517099dc30abe418122f54531a6ae2ebc8680bcd7096860eab"}, 530 | {file = "pillow-11.1.0-cp313-cp313t-win32.whl", hash = "sha256:dda60aa465b861324e65a78c9f5cf0f4bc713e4309f83bc387be158b077963d9"}, 531 | {file = "pillow-11.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ad5db5781c774ab9a9b2c4302bbf0c1014960a0a7be63278d13ae6fdf88126fe"}, 532 | {file = "pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756"}, 533 | {file = "pillow-11.1.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:bf902d7413c82a1bfa08b06a070876132a5ae6b2388e2712aab3a7cbc02205c6"}, 534 | {file = "pillow-11.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c1eec9d950b6fe688edee07138993e54ee4ae634c51443cfb7c1e7613322718e"}, 535 | {file = "pillow-11.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e275ee4cb11c262bd108ab2081f750db2a1c0b8c12c1897f27b160c8bd57bbc"}, 536 | {file = "pillow-11.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4db853948ce4e718f2fc775b75c37ba2efb6aaea41a1a5fc57f0af59eee774b2"}, 537 | {file = "pillow-11.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:ab8a209b8485d3db694fa97a896d96dd6533d63c22829043fd9de627060beade"}, 538 | {file = "pillow-11.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:54251ef02a2309b5eec99d151ebf5c9904b77976c8abdcbce7891ed22df53884"}, 539 | {file = "pillow-11.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5bb94705aea800051a743aa4874bb1397d4695fb0583ba5e425ee0328757f196"}, 540 | {file = "pillow-11.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:89dbdb3e6e9594d512780a5a1c42801879628b38e3efc7038094430844e271d8"}, 541 | {file = "pillow-11.1.0-cp39-cp39-win32.whl", hash = "sha256:e5449ca63da169a2e6068dd0e2fcc8d91f9558aba89ff6d02121ca8ab11e79e5"}, 542 | {file = "pillow-11.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:3362c6ca227e65c54bf71a5f88b3d4565ff1bcbc63ae72c34b07bbb1cc59a43f"}, 543 | {file = "pillow-11.1.0-cp39-cp39-win_arm64.whl", hash = "sha256:b20be51b37a75cc54c2c55def3fa2c65bb94ba859dde241cd0a4fd302de5ae0a"}, 544 | {file = "pillow-11.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8c730dc3a83e5ac137fbc92dfcfe1511ce3b2b5d7578315b63dbbb76f7f51d90"}, 545 | {file = "pillow-11.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d33d2fae0e8b170b6a6c57400e077412240f6f5bb2a342cf1ee512a787942bb"}, 546 | {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8d65b38173085f24bc07f8b6c505cbb7418009fa1a1fcb111b1f4961814a442"}, 547 | {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83"}, 548 | {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d44ff19eea13ae4acdaaab0179fa68c0c6f2f45d66a4d8ec1eda7d6cecbcc15f"}, 549 | {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d3d8da4a631471dfaf94c10c85f5277b1f8e42ac42bade1ac67da4b4a7359b73"}, 550 | {file = "pillow-11.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0"}, 551 | {file = "pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20"}, 552 | ] 553 | 554 | [package.extras] 555 | docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] 556 | fpx = ["olefile"] 557 | mic = ["olefile"] 558 | tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"] 559 | typing = ["typing-extensions ; python_version < \"3.10\""] 560 | xmp = ["defusedxml"] 561 | 562 | [[package]] 563 | name = "platformdirs" 564 | version = "4.3.6" 565 | description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." 566 | optional = false 567 | python-versions = ">=3.8" 568 | groups = ["main"] 569 | files = [ 570 | {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, 571 | {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, 572 | ] 573 | 574 | [package.extras] 575 | docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"] 576 | test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"] 577 | type = ["mypy (>=1.11.2)"] 578 | 579 | [[package]] 580 | name = "pycodestyle" 581 | version = "2.11.1" 582 | description = "Python style guide checker" 583 | optional = false 584 | python-versions = ">=3.8" 585 | groups = ["dev"] 586 | files = [ 587 | {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, 588 | {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, 589 | ] 590 | 591 | [[package]] 592 | name = "pyflakes" 593 | version = "3.1.0" 594 | description = "passive checker of Python programs" 595 | optional = false 596 | python-versions = ">=3.8" 597 | groups = ["dev"] 598 | files = [ 599 | {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"}, 600 | {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, 601 | ] 602 | 603 | [[package]] 604 | name = "requests" 605 | version = "2.32.3" 606 | description = "Python HTTP for Humans." 607 | optional = false 608 | python-versions = ">=3.8" 609 | groups = ["main"] 610 | files = [ 611 | {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, 612 | {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, 613 | ] 614 | 615 | [package.dependencies] 616 | certifi = ">=2017.4.17" 617 | charset-normalizer = ">=2,<4" 618 | idna = ">=2.5,<4" 619 | urllib3 = ">=1.21.1,<3" 620 | 621 | [package.extras] 622 | socks = ["PySocks (>=1.5.6,!=1.5.7)"] 623 | use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] 624 | 625 | [[package]] 626 | name = "requests-cache" 627 | version = "1.2.1" 628 | description = "A persistent cache for python requests" 629 | optional = false 630 | python-versions = ">=3.8" 631 | groups = ["main"] 632 | files = [ 633 | {file = "requests_cache-1.2.1-py3-none-any.whl", hash = "sha256:1285151cddf5331067baa82598afe2d47c7495a1334bfe7a7d329b43e9fd3603"}, 634 | {file = "requests_cache-1.2.1.tar.gz", hash = "sha256:68abc986fdc5b8d0911318fbb5f7c80eebcd4d01bfacc6685ecf8876052511d1"}, 635 | ] 636 | 637 | [package.dependencies] 638 | attrs = ">=21.2" 639 | cattrs = ">=22.2" 640 | platformdirs = ">=2.5" 641 | requests = ">=2.22" 642 | url-normalize = ">=1.4" 643 | urllib3 = ">=1.25.5" 644 | 645 | [package.extras] 646 | all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "pymongo (>=3)", "pyyaml (>=6.0.1)", "redis (>=3)", "ujson (>=5.4)"] 647 | bson = ["bson (>=0.5)"] 648 | docs = ["furo (>=2023.3,<2024.0)", "linkify-it-py (>=2.0,<3.0)", "myst-parser (>=1.0,<2.0)", "sphinx (>=5.0.2,<6.0.0)", "sphinx-autodoc-typehints (>=1.19)", "sphinx-automodapi (>=0.14)", "sphinx-copybutton (>=0.5)", "sphinx-design (>=0.2)", "sphinx-notfound-page (>=0.8)", "sphinxcontrib-apidoc (>=0.3)", "sphinxext-opengraph (>=0.9)"] 649 | dynamodb = ["boto3 (>=1.15)", "botocore (>=1.18)"] 650 | json = ["ujson (>=5.4)"] 651 | mongodb = ["pymongo (>=3)"] 652 | redis = ["redis (>=3)"] 653 | security = ["itsdangerous (>=2.0)"] 654 | yaml = ["pyyaml (>=6.0.1)"] 655 | 656 | [[package]] 657 | name = "six" 658 | version = "1.16.0" 659 | description = "Python 2 and 3 compatibility utilities" 660 | optional = false 661 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" 662 | groups = ["main"] 663 | files = [ 664 | {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, 665 | {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, 666 | ] 667 | 668 | [[package]] 669 | name = "soupsieve" 670 | version = "2.6" 671 | description = "A modern CSS selector implementation for Beautiful Soup." 672 | optional = false 673 | python-versions = ">=3.8" 674 | groups = ["main"] 675 | files = [ 676 | {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, 677 | {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, 678 | ] 679 | 680 | [[package]] 681 | name = "typing-extensions" 682 | version = "4.12.2" 683 | description = "Backported and Experimental Type Hints for Python 3.8+" 684 | optional = false 685 | python-versions = ">=3.8" 686 | groups = ["main"] 687 | files = [ 688 | {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, 689 | {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, 690 | ] 691 | 692 | [[package]] 693 | name = "url-normalize" 694 | version = "1.4.3" 695 | description = "URL normalization for Python" 696 | optional = false 697 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" 698 | groups = ["main"] 699 | files = [ 700 | {file = "url-normalize-1.4.3.tar.gz", hash = "sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2"}, 701 | {file = "url_normalize-1.4.3-py2.py3-none-any.whl", hash = "sha256:ec3c301f04e5bb676d333a7fa162fa977ad2ca04b7e652bfc9fac4e405728eed"}, 702 | ] 703 | 704 | [package.dependencies] 705 | six = "*" 706 | 707 | [[package]] 708 | name = "urllib3" 709 | version = "2.2.3" 710 | description = "HTTP library with thread-safe connection pooling, file post, and more." 711 | optional = false 712 | python-versions = ">=3.8" 713 | groups = ["main"] 714 | files = [ 715 | {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"}, 716 | {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"}, 717 | ] 718 | 719 | [package.extras] 720 | brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] 721 | h2 = ["h2 (>=4,<5)"] 722 | socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] 723 | zstd = ["zstandard (>=0.18.0)"] 724 | 725 | [metadata] 726 | lock-version = "2.1" 727 | python-versions = "^3.9" 728 | content-hash = "92cfb836603d3fa5af84e8b5de458c70cfa66ef8878a7125424609fa22921343" 729 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "leech" 3 | version = "1.0.0" 4 | description = "Turn a story on certain websites into an ebook for convenient reading" 5 | authors = ["David Lynch "] 6 | license = "MIT License" 7 | include = ["ebook/*", "sites/*"] 8 | 9 | [tool.poetry.scripts] 10 | leech = "leech:cli" 11 | 12 | [tool.poetry.dependencies] 13 | python = "^3.9" 14 | attrs = "^25.1.0" 15 | beautifulsoup4 = "^4.13.3" 16 | click-default-group = "^1.2.4" 17 | click = "^8.1.8" 18 | requests = "^2.32.3" 19 | requests-cache = "^1.2.1" 20 | Pillow = "^11.1.0" 21 | mintotp = "^0.3.0" 22 | lxml = "^5.3.1" 23 | 24 | [tool.poetry.group.dev.dependencies] 25 | flake8 = "^6.1.0" 26 | 27 | [build-system] 28 | requires = ["poetry-core>=1.0.0"] 29 | build-backend = "poetry.core.masonry.api" -------------------------------------------------------------------------------- /sites/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | import click 3 | import glob 4 | import os 5 | import random 6 | import uuid 7 | import datetime 8 | import time 9 | import logging 10 | import urllib 11 | import re 12 | import hashlib 13 | from attrs import define, field, Factory 14 | from bs4 import BeautifulSoup 15 | 16 | logger = logging.getLogger(__name__) 17 | logger.addHandler(logging.NullHandler()) 18 | _sites = [] 19 | 20 | 21 | def _default_uuid_string(self): 22 | rd = random.Random(x=self.url) 23 | return str(uuid.UUID(int=rd.getrandbits(8*16), version=4)) 24 | 25 | 26 | @define 27 | class Image: 28 | url: str 29 | 30 | def path(self): 31 | return f"images/{hashlib.sha1(self.url.encode()).hexdigest()}.{self.ext()}" 32 | 33 | def ext(self): 34 | if self.url.startswith("data:image") and 'base64' in self.url: 35 | head, base64data = self.url.split(',') 36 | return str(head.split(';')[0].split('/')[1]) 37 | path = urllib.parse.urlparse(self.url).path 38 | return os.path.splitext(path)[1] 39 | 40 | 41 | @define 42 | class Chapter: 43 | title: str 44 | contents: str 45 | date: datetime.datetime = False 46 | images: dict = Factory(dict) 47 | 48 | 49 | @define 50 | class Section: 51 | title: str 52 | author: str 53 | url: str 54 | cover_url: str = '' 55 | id: str = Factory(_default_uuid_string, takes_self=True) 56 | contents: list = Factory(list) 57 | footnotes: list = Factory(list) 58 | tags: list = Factory(list) 59 | summary: str = '' 60 | 61 | def __iter__(self): 62 | return self.contents.__iter__() 63 | 64 | def __getitem__(self, index): 65 | return self.contents.__getitem__(index) 66 | 67 | def __setitem__(self, index, value): 68 | return self.contents.__setitem__(index, value) 69 | 70 | def __len__(self): 71 | return len(self.contents) 72 | 73 | def everychapter(self): 74 | for chapter in self.contents: 75 | if hasattr(chapter, '__iter__'): 76 | yield from chapter 77 | else: 78 | yield chapter 79 | 80 | def add(self, value, index=None): 81 | if index is not None: 82 | self.contents.insert(index, value) 83 | else: 84 | self.contents.append(value) 85 | 86 | def dates(self): 87 | for chapter in self.everychapter(): 88 | yield chapter.date 89 | 90 | 91 | @define 92 | class Site: 93 | """A Site handles checking whether a URL might represent a site, and then 94 | extracting the content of a story from said site. 95 | """ 96 | session: object = field() 97 | footnotes: list = field(factory=list, init=False) 98 | options: dict = Factory( 99 | lambda site: site.get_default_options(), 100 | takes_self=True 101 | ) 102 | 103 | @classmethod 104 | def site_key(cls): 105 | if hasattr(cls, '_key'): 106 | return cls._key 107 | return cls.__name__ 108 | 109 | @staticmethod 110 | def get_site_specific_option_defs(): 111 | """Returns a list of click.option objects to add to CLI commands. 112 | 113 | It is best practice to ensure that these names are reasonably unique 114 | to ensure that they do not conflict with the core options, or other 115 | sites' options. It is OK for different site's options to have the 116 | same name, but pains should be taken to ensure they remain semantically 117 | similar in meaning. 118 | """ 119 | return [ 120 | SiteSpecificOption( 121 | 'strip_colors', 122 | '--strip-colors/--no-strip-colors', 123 | default=True, 124 | help="If true, colors will be stripped from the text." 125 | ), 126 | SiteSpecificOption( 127 | 'image_fetch', 128 | '--fetch-images/--no-fetch-images', 129 | default=True, 130 | help="If true, images embedded in the story will be downloaded" 131 | ), 132 | SiteSpecificOption( 133 | 'spoilers', 134 | '--spoilers', 135 | choices=('include', 'inline', 'skip'), 136 | default='include', 137 | help="Whether to include spoilers" 138 | ), 139 | SiteSpecificOption( 140 | 'deprecated_skip_spoilers', 141 | '--skip-spoilers/--include-spoilers', 142 | help="If true, do not transcribe any tags that are marked as a spoiler. (DEPRECATED)", 143 | exposed=False, 144 | click_kwargs={ 145 | "callback": lambda ctx, param, value: ctx.params.update({"spoilers": value and "skip" or "include"}), 146 | }, 147 | ), 148 | SiteSpecificOption( 149 | 'parser', 150 | '--parser', 151 | help="Which HTML parser to use", 152 | choices=('lxml', 'html5lib', 'html.parser', 'lxml-xml'), 153 | default='lxml', 154 | ), 155 | ] 156 | 157 | @classmethod 158 | def get_default_options(cls): 159 | options = {} 160 | for option in cls.get_site_specific_option_defs(): 161 | if option.exposed: 162 | options[option.name] = option.default 163 | return options 164 | 165 | @classmethod 166 | def interpret_site_specific_options(cls, **kwargs): 167 | """Returns options summarizing CLI flags provided. 168 | 169 | Only includes entries the user has explicitly provided as flags 170 | / will not contain default values. For that, use get_default_options(). 171 | """ 172 | options = {} 173 | for option in cls.get_site_specific_option_defs(): 174 | option_value = kwargs.get(option.name) 175 | if option.exposed and option_value is not None: 176 | options[option.name] = option_value 177 | return options 178 | 179 | @staticmethod 180 | def matches(url): 181 | raise NotImplementedError() 182 | 183 | def extract(self, url): 184 | """Download a story from a given URL 185 | 186 | Args: 187 | url (string): A valid URL for this Site 188 | Returns: 189 | story (dict) containing keys: 190 | title (string) 191 | author (string) 192 | chapters (list): list of Chapters (namedtuple, defined above) 193 | """ 194 | raise NotImplementedError() 195 | 196 | def login(self, login_details): 197 | raise NotImplementedError() 198 | 199 | def _soup(self, url, method=False, delay=0, retry=3, retry_delay=10, **kw): 200 | if not method: 201 | method = self.options.get('parser', 'lxml') 202 | if url.startswith('http://') or url.startswith('https://'): 203 | page = self.session.get(url, **kw) 204 | if not page: 205 | if page.status_code == 403 and page.headers.get('Server', False) == 'cloudflare' and "captcha-bypass" in page.text: 206 | raise CloudflareException("Couldn't fetch, probably because of Cloudflare protection", url) 207 | if retry and retry > 0: 208 | real_delay = retry_delay 209 | if 'Retry-After' in page.headers: 210 | real_delay = int(page.headers['Retry-After']) 211 | logger.warning("Load failed: waiting %s to retry (%s: %s)", real_delay, page.status_code, page.url) 212 | time.sleep(real_delay) 213 | return self._soup(url, method=method, retry=retry - 1, retry_delay=retry_delay, **kw) 214 | raise SiteException("Couldn't fetch", url) 215 | if delay and delay > 0 and not page.from_cache: 216 | time.sleep(delay) 217 | text = page.text 218 | fallback_base = url 219 | else: 220 | text = url 221 | fallback_base = '' 222 | soup = BeautifulSoup(text, method) 223 | return soup, (soup.head and soup.head.base) and soup.head.base.get('href') or fallback_base 224 | 225 | def _form_in_soup(self, soup): 226 | if soup.name == 'form': 227 | return soup 228 | return soup.find('form') 229 | 230 | def _form_data(self, soup): 231 | data = {} 232 | form = self._form_in_soup(soup) 233 | if not form: 234 | return data, '', '' 235 | for tag in form.find_all('input'): 236 | itype = tag.attrs.get('type', 'text') 237 | name = tag.attrs.get('name') 238 | if not name: 239 | continue 240 | value = tag.attrs.get('value', '') 241 | if itype in ('checkbox', 'radio') and not tag.attrs.get('checked', False): 242 | continue 243 | data[name] = value 244 | for select in form.find_all('select'): 245 | # todo: multiple 246 | name = select.attrs.get('name') 247 | if not name: 248 | continue 249 | data[name] = '' 250 | for option in select.find_all('option'): 251 | value = option.attrs.get('value', '') 252 | if value and option.attrs.get('selected'): 253 | data[name] = value 254 | for textarea in form.find_all('textarea'): 255 | name = textarea.attrs.get('name') 256 | if not name: 257 | continue 258 | data[name] = textarea.attrs.get('value', '') 259 | 260 | return data, form.attrs.get('action'), form.attrs.get('method', 'get').lower() 261 | 262 | def _new_tag(self, *args, **kw): 263 | soup = BeautifulSoup("", self.options.get('parser')) 264 | return soup.new_tag(*args, **kw) 265 | 266 | def _join_url(self, *args, **kwargs): 267 | return urllib.parse.urljoin(*args, **kwargs) 268 | 269 | def _footnote(self, contents, chapterid): 270 | """Register a footnote and return a link to that footnote""" 271 | 272 | # TODO: This embeds knowledge of what the generated filenames will be. Work out a better way. 273 | 274 | idx = len(self.footnotes) + 1 275 | 276 | # epub spec footnotes are all about epub:type on the footnote and the link 277 | # http://www.idpf.org/accessibility/guidelines/content/semantics/epub-type.php 278 | contents.name = 'div' 279 | contents.attrs['id'] = f'footnote{idx}' 280 | contents.attrs['epub:type'] = 'rearnote' 281 | 282 | # a backlink is essential for Kindle to think of this as a footnote 283 | # otherwise it doesn't get the inline-popup treatment 284 | # http://kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.pdf 285 | # section 3.9.10 286 | backlink = self._new_tag('a', href=f'chapter{chapterid}.html#noteback{idx}') 287 | backlink.string = '^' 288 | contents.insert(0, backlink) 289 | 290 | self.footnotes.append(contents.prettify()) 291 | 292 | # now build the link to the footnote to return, with appropriate 293 | # epub annotations. 294 | spoiler_link = self._new_tag('a') 295 | spoiler_link.attrs = { 296 | 'id': f'noteback{idx}', 297 | 'href': f'footnotes.html#footnote{idx}', 298 | 'epub:type': 'noteref', 299 | } 300 | spoiler_link.string = str(idx) 301 | 302 | return spoiler_link 303 | 304 | def _clean(self, contents, base=False): 305 | """Clean up story content to be more ebook-friendly 306 | 307 | TODO: this expects a soup as its argument, so the couple of API-driven sites can't use it as-is 308 | """ 309 | # Cloudflare is used on many sites, and mangles things that look like email addresses 310 | # e.g. Point_Me_@_The_Sky becomes 311 | # [email protected]_The_Sky 312 | # or 313 | # [email protected]_The_Sky 314 | for tag in contents.find_all(class_='__cf_email__'): 315 | # See: https://usamaejaz.com/cloudflare-email-decoding/ 316 | enc = bytes.fromhex(tag['data-cfemail']) 317 | email = bytes([c ^ enc[0] for c in enc[1:]]).decode('utf8') 318 | if tag.parent.name == 'a' and tag.parent['href'].startswith('/cdn-cgi/l/email-protection'): 319 | tag = tag.parent 320 | tag.insert_before(email) 321 | tag.decompose() 322 | # strip colors 323 | if self.options['strip_colors']: 324 | for tag in contents.find_all(style=re.compile(r'(?:color|background)\s*:')): 325 | tag['style'] = re.sub(r'(?:color|background)\s*:[^;]+;?', '', tag['style']) 326 | 327 | if base: 328 | for img in contents.find_all('img', src=True): 329 | # Later epub processing needs absolute image URLs 330 | # print("fixing img src", img['src'], self._join_url(base, img['src'])) 331 | img['src'] = self._join_url(base, img['src']) 332 | del img['srcset'] 333 | del img['sizes'] 334 | 335 | return contents 336 | 337 | def _finalize(self, story): 338 | # Call this on a story after it's fully extracted to clean up things 339 | for chapter in story: 340 | if hasattr(chapter, '__iter__'): 341 | self._finalize(chapter, story) 342 | else: 343 | self._process_images(chapter) 344 | 345 | if self.footnotes: 346 | story.footnotes = Chapter('Footnotes', '\n\n'.join(self.footnotes)) 347 | self.footnotes = [] 348 | self._process_images(story.footnotes) 349 | 350 | def _process_images(self, chapter): 351 | soup, base = self._soup(chapter.contents) 352 | 353 | if self.options.get('image_fetch'): 354 | for count, img in enumerate(soup.find_all('img', src=True)): 355 | # logger.info(f"Image in {chapter.title}: {img['src']}") 356 | if img['src'] not in chapter.images: 357 | chapter.images[img['src']] = Image(img['src']) 358 | 359 | img['src'] = chapter.images.get(img['src']).path() 360 | else: 361 | # Remove all images from the chapter so you don't get that annoying grey background. 362 | for img in soup.find_all('img'): 363 | # Note: alt="" will be completely removed here, which is consitent with the semantics 364 | if img.parent.name.lower() == "figure": 365 | # TODO: figcaption? 366 | img.parent.replace_with(img.get('alt', '🖼')) 367 | else: 368 | img.replace_with(img.get('alt', '🖼')) 369 | 370 | chapter.contents = str(soup) 371 | 372 | 373 | @define 374 | class SiteSpecificOption: 375 | """Represents a site-specific option that can be configured. 376 | 377 | Will be added to the CLI as a click.option -- many of these 378 | fields correspond to click.option arguments.""" 379 | name: str 380 | flag_pattern: str 381 | type: object = None 382 | default: bool = False 383 | help: str = None 384 | choices: tuple = None 385 | exposed: bool = True 386 | click_kwargs: frozenset = field(converter=lambda kwargs: frozenset(kwargs.items()), default={}) 387 | 388 | def __eq__(self, other): 389 | return self.name == other.name 390 | 391 | def __hash__(self): 392 | return hash(self.name) 393 | 394 | def as_click_option(self): 395 | return click.option( 396 | str(self.name), 397 | str(self.flag_pattern), 398 | type=self.choices and click.Choice(self.choices) or self.type, 399 | # Note: This default not matching self.default is intentional. 400 | # It ensures that we know if a flag was explicitly provided, 401 | # which keeps it from overriding options set in leech.json etc. 402 | # Instead, default is used in site_cls.get_default_options() 403 | default=None, 404 | help=self.help if self.help is not None else "", 405 | expose_value=self.exposed, 406 | **dict(self.click_kwargs) 407 | ) 408 | 409 | 410 | class SiteException(Exception): 411 | pass 412 | 413 | 414 | class CloudflareException(SiteException): 415 | pass 416 | 417 | 418 | def register(site_class): 419 | _sites.append(site_class) 420 | return site_class 421 | 422 | 423 | def get(url): 424 | for site_class in _sites: 425 | match = site_class.matches(url) 426 | if match: 427 | logger.info("Handler: %s (%s)", site_class, match) 428 | return site_class, match 429 | raise NotImplementedError("Could not find a handler for " + url) 430 | 431 | 432 | def list_site_specific_options(): 433 | """Returns a list of all site's click options, which will be presented to the user.""" 434 | 435 | # Ensures that duplicate options are not added twice. 436 | # Especially important for subclassed sites (e.g. Xenforo sites) 437 | options = set() 438 | 439 | for site_class in _sites: 440 | options.update(site_class.get_site_specific_option_defs()) 441 | return [option.as_click_option() for option in options] 442 | 443 | 444 | # And now, a particularly hacky take on a plugin system: 445 | # Make an __all__ out of all the python files in this directory that don't start 446 | # with __. Then import * them. 447 | 448 | modules = glob.glob(os.path.join(os.path.dirname(__file__), "*.py")) 449 | __all__ = [os.path.basename(f)[:-3] for f in modules if not f.startswith("__")] 450 | 451 | from . import * # noqa 452 | -------------------------------------------------------------------------------- /sites/ao3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import logging 4 | import datetime 5 | import re 6 | import requests_cache 7 | from . import register, Site, Section, Chapter, SiteException 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | @register 13 | class ArchiveOfOurOwn(Site): 14 | """Archive of Our Own: it has its own epub export, but the formatting is awful""" 15 | @staticmethod 16 | def matches(url): 17 | # e.g. http://archiveofourown.org/works/5683105/chapters/13092007 18 | match = re.match(r'^(https?://(?:www\.)?archiveofourown\.org/works/\d+)/?.*', url) 19 | if match: 20 | return match.group(1) + '/' 21 | 22 | def login(self, login_details): 23 | with requests_cache.disabled(): 24 | # Can't just pass this url to _soup because I need the cookies later 25 | login = self.session.get('https://archiveofourown.org/users/login') 26 | soup, nobase = self._soup(login.text) 27 | post, action, method = self._form_data(soup.find(id='new_user')) 28 | post['user[login]'] = login_details[0] 29 | post['user[password]'] = login_details[1] 30 | # I feel the session *should* handle this cookies bit for me. But 31 | # it doesn't. And I don't know why. 32 | result = self.session.post( 33 | self._join_url(login.url, action), 34 | data=post, cookies=login.cookies 35 | ) 36 | if result.ok: 37 | logger.info("Logged in as %s", login_details[0]) 38 | else: 39 | logger.error("Failed to log in as %s", login_details[0]) 40 | 41 | def extract(self, url): 42 | workid = re.match(r'^https?://(?:www\.)?archiveofourown\.org/works/(\d+)/?.*', url).group(1) 43 | return self._extract_work(workid) 44 | 45 | def _extract_work(self, workid): 46 | # Fetch the full work 47 | url = f'http://archiveofourown.org/works/{workid}?view_adult=true&view_full_work=true' 48 | logger.info("Extracting full work @ %s", url) 49 | soup, base = self._soup(url) 50 | 51 | if not soup.find(id='workskin'): 52 | raise SiteException("Can't find the story text; you may need to log in or flush the cache") 53 | 54 | story = Section( 55 | title=soup.select('#workskin > .preface .title')[0].text.strip(), 56 | author=soup.select('#workskin .preface .byline a')[0].text.strip(), 57 | summary=soup.select('#workskin .preface .summary blockquote')[0].prettify(), 58 | url=f'http://archiveofourown.org/works/{workid}', 59 | tags=[tag.get_text().strip() for tag in soup.select('.work.meta .tags a.tag')] 60 | ) 61 | 62 | # Fetch the chapter list as well because it contains info that's not in the full work 63 | nav_soup, nav_base = self._soup(f'https://archiveofourown.org/works/{workid}/navigate') 64 | chapters = soup.select('#chapters > div') 65 | if len(chapters) == 1: 66 | # in a single-chapter story the #chapters div is actually the chapter 67 | chapters = [soup.find(id='chapters').parent] 68 | 69 | for index, chapter in enumerate(nav_soup.select('#main ol[role="navigation"] li')): 70 | link = chapter.find('a') 71 | logger.info("Extracting chapter %s", link.string) 72 | 73 | updated = datetime.datetime.strptime( 74 | chapter.find('span', class_='datetime').string, 75 | "(%Y-%m-%d)" 76 | ) 77 | 78 | chapter_soup = chapters[index] 79 | if not chapter_soup: 80 | logger.warning("Couldn't find chapter %s in full work", index + 1) 81 | continue 82 | 83 | story.add(Chapter( 84 | title=link.string, 85 | # the `or soup` fallback covers single-chapter works 86 | contents=self._chapter(chapter_soup, base), 87 | date=updated 88 | )) 89 | 90 | self._finalize(story) 91 | 92 | return story 93 | 94 | def _chapter(self, soup, base): 95 | content = soup.find('div', role='article') 96 | 97 | for landmark in content.find_all(class_='landmark'): 98 | landmark.decompose() 99 | 100 | # TODO: Maybe these should be footnotes instead? 101 | notes = soup.select('#chapters .end.notes') 102 | if notes: 103 | notes = notes[0] 104 | for landmark in notes.find_all(class_='landmark'): 105 | landmark.decompose() 106 | 107 | self._clean(content, base) 108 | 109 | return content.prettify() + (notes and notes.prettify() or '') 110 | 111 | 112 | @register 113 | class ArchiveOfOurOwnSeries(ArchiveOfOurOwn): 114 | _key = "ArchiveOfOurOwn" 115 | 116 | @staticmethod 117 | def matches(url): 118 | # e.g. http://archiveofourown.org/series/5683105/ 119 | match = re.match(r'^(https?://archiveofourown\.org/series/\d+)/?.*', url) 120 | if match: 121 | return match.group(1) + '/' 122 | 123 | def extract(self, url): 124 | seriesid = re.match(r'^https?://archiveofourown\.org/series/(\d+)/?.*', url).group(1) 125 | 126 | soup, base = self._soup(f'http://archiveofourown.org/series/{seriesid}?view_adult=true') 127 | 128 | story = Section( 129 | title=soup.select('#main h2.heading')[0].text.strip(), 130 | author=soup.select('#main dl.series.meta a[rel="author"]')[0].string, 131 | url=f'http://archiveofourown.org/series/{seriesid}' 132 | ) 133 | 134 | for work in soup.select('#main ul.series li.work'): 135 | workid = work.get('id').replace('work_', '') 136 | substory = self._extract_work(workid) 137 | 138 | # TODO: improve epub-writer to be able to generate a toc.ncx with nested headings 139 | story.add(substory) 140 | 141 | return story 142 | -------------------------------------------------------------------------------- /sites/arbitrary.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import logging 4 | from attrs import define 5 | import datetime 6 | import json 7 | import re 8 | import os.path 9 | from . import register, Site, Section, Chapter, SiteException 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | """ 14 | Example JSON: 15 | { 16 | "url": "https://practicalguidetoevil.wordpress.com/table-of-contents/", 17 | "title": "A Practical Guide To Evil: Book 1", 18 | "author": "erraticerrata", 19 | "chapter_selector": "#main .entry-content > ul > li > a", 20 | "content_selector": "#main .entry-content", 21 | "filter_selector": ".sharedaddy, .wpcnt, style", 22 | "cover_url": "https://gitlab.com/Mikescher2/A-Practical-Guide-To-Evil-Lyx/raw/master/APGTE_1/APGTE_front.png" 23 | } 24 | """ 25 | 26 | 27 | @define 28 | class SiteDefinition: 29 | url: str 30 | title: str 31 | author: str 32 | content_selector: str 33 | # If present, find something within `content` to use a chapter title; if not found, the link text to it will be used 34 | content_title_selector: str = False 35 | # If present, find a specific element in the `content` to be the chapter text 36 | content_text_selector: str = False 37 | # If present, it looks for chapters linked from `url`. If not, it assumes `url` points to a chapter. 38 | chapter_selector: str = False 39 | # If present, use to find a link to the next content page (only used if not using chapter_selector) 40 | next_selector: str = False 41 | # If present, use to filter out content that matches the selector 42 | filter_selector: str = False 43 | cover_url: str = '' 44 | 45 | 46 | @register 47 | class Arbitrary(Site): 48 | """A way to describe an arbitrary side for a one-off fetch 49 | """ 50 | @staticmethod 51 | def matches(url): 52 | # e.g. practical1.json 53 | if url.endswith('.json') and os.path.isfile(url): 54 | return url 55 | 56 | def extract(self, url): 57 | with open(url) as definition_file: 58 | definition = SiteDefinition(**json.load(definition_file)) 59 | 60 | story = Section( 61 | title=definition.title, 62 | author=definition.author, 63 | url=url, 64 | cover_url=definition.cover_url 65 | ) 66 | 67 | if definition.chapter_selector: 68 | soup, base = self._soup(definition.url) 69 | for chapter_link in soup.select(definition.chapter_selector): 70 | chapter_url = str(chapter_link.get('href')) 71 | if base: 72 | chapter_url = self._join_url(base, chapter_url) 73 | chapter_url = self._join_url(definition.url, chapter_url) 74 | for chapter in self._chapter(chapter_url, definition, title=chapter_link.string): 75 | story.add(chapter) 76 | else: 77 | # set of already processed urls. Stored to detect loops. 78 | found_content_urls = set() 79 | content_urls = [definition.url] 80 | 81 | def process_content_url(content_url): 82 | if content_url in found_content_urls: 83 | return None 84 | found_content_urls.add(content_url) 85 | for chapter in self._chapter(content_url, definition): 86 | story.add(chapter) 87 | return content_url 88 | 89 | while content_urls: 90 | for temp_url in content_urls: 91 | # stop inner loop once a new link is found 92 | if content_url := process_content_url(temp_url): 93 | break 94 | # reset url list 95 | content_urls = [] 96 | if content_url and definition.next_selector: 97 | soup, base = self._soup(content_url) 98 | next_link = soup.select(definition.next_selector) 99 | if next_link: 100 | for next_link_item in next_link: 101 | next_link_url = str(next_link_item.get('href')) 102 | if base: 103 | next_link_url = self._join_url(base, next_link_url) 104 | content_urls.append(self._join_url(content_url, next_link_url)) 105 | 106 | if not story: 107 | raise SiteException("No story content found; check the content selectors") 108 | 109 | self._finalize(story) 110 | 111 | return story 112 | 113 | def _chapter(self, url, definition, title=False): 114 | logger.info("Extracting chapter @ %s", url) 115 | soup, base = self._soup(url) 116 | 117 | chapters = [] 118 | 119 | if not soup.select(definition.content_selector): 120 | return chapters 121 | 122 | # clean up a few things which will definitely break epubs: 123 | # TODO: expand this greatly, or make it configurable 124 | for namespaced in soup.find_all(re.compile(r'[a-z]+:[a-z]+')): 125 | # Namespaced elements are going to cause validation errors 126 | namespaced.decompose() 127 | 128 | for content in soup.select(definition.content_selector): 129 | if definition.filter_selector: 130 | for filtered in content.select(definition.filter_selector): 131 | filtered.decompose() 132 | 133 | if definition.content_title_selector: 134 | title_element = content.select(definition.content_title_selector) 135 | if title_element: 136 | title = title_element[0].get_text().strip() 137 | 138 | if definition.content_text_selector: 139 | # TODO: multiple text elements? 140 | content = content.select(definition.content_text_selector)[0] 141 | 142 | # TODO: consider `'\n'.join(map(str, content.contents))` 143 | content.name = 'div' 144 | 145 | self._clean(content, base) 146 | 147 | chapters.append(Chapter( 148 | title=title, 149 | contents=content.prettify(), 150 | # TODO: better date detection 151 | date=datetime.datetime.now() 152 | )) 153 | 154 | return chapters 155 | -------------------------------------------------------------------------------- /sites/deviantart.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import logging 4 | import re 5 | 6 | from . import register, Section 7 | from .stash import Stash 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | @register 13 | class DeviantArt(Stash): 14 | @staticmethod 15 | def matches(url): 16 | # Need a collection page 17 | match = re.match(r'^https?://[^.]+\.deviantart\.com/(?:gallery|favourites)/\d+/?', url) 18 | if match: 19 | return match.group(0) + '/' 20 | 21 | def extract(self, url): 22 | soup, base = self._soup(url) 23 | content = soup.find(id="output") 24 | if not content: 25 | return 26 | 27 | if "gallery" in url: 28 | author = str(content.select('h1 a.u')[0].string) 29 | else: 30 | authors = set(str(author.string) for author in content.select('.stream .details a.u')) 31 | author = ', '.join(authors) 32 | 33 | story = Section( 34 | title=str(content.find(class_="folder-title").string), 35 | author=author, 36 | url=url 37 | ) 38 | 39 | thumbs = content.select(".stream a.thumb") 40 | if not thumbs: 41 | return 42 | for thumb in thumbs: 43 | try: 44 | if thumb['href'] != '#': 45 | story.add(self._chapter(thumb['href'])) 46 | except Exception: 47 | logger.exception("Couldn't extract chapters from thumbs") 48 | 49 | self._finalize(story) 50 | 51 | return story 52 | -------------------------------------------------------------------------------- /sites/fanfictionnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import logging 4 | import datetime 5 | import re 6 | import urllib.parse 7 | import attr 8 | from . import register, Site, SiteException, CloudflareException, Section, Chapter 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @register 14 | class FanFictionNet(Site): 15 | _cloudflared = attr.ib(init=False, default=False) 16 | 17 | """FFN: it has a lot of stuff""" 18 | @staticmethod 19 | def matches(url): 20 | # e.g. https://www.fanfiction.net/s/4109686/3/Taking-Sights 21 | match = re.match(r'^https?://(?:www|m)\.fanfiction\.net/s/(\d+)/?.*', url) 22 | if match: 23 | return 'https://www.fanfiction.net/s/' + match.group(1) + '/' 24 | 25 | def extract(self, url): 26 | soup, base = self._soup(url) 27 | 28 | content = soup.find(id="content_wrapper_inner") 29 | if not content: 30 | raise SiteException("No content") 31 | 32 | metadata = content.find(id='profile_top') 33 | 34 | story = Section( 35 | title=str(metadata.find('b', class_="xcontrast_txt").string), 36 | author=str(metadata.find('a', class_="xcontrast_txt").string), 37 | url=url 38 | ) 39 | 40 | dates = content.find_all('span', attrs={'data-xutime': True}) 41 | published = False 42 | updated = False 43 | if len(dates) == 1: 44 | published = datetime.datetime.fromtimestamp(int(dates[0]['data-xutime'])) 45 | elif len(dates) == 2: 46 | updated = datetime.datetime.fromtimestamp(int(dates[0]['data-xutime'])) 47 | published = datetime.datetime.fromtimestamp(int(dates[1]['data-xutime'])) 48 | 49 | chapter_select = content.find(id="chap_select") 50 | if chapter_select: 51 | base_url = re.search(r'(https?://[^/]+/s/\d+/?)', url) 52 | if not base_url: 53 | raise SiteException("Can't find base URL for chapters") 54 | base_url = base_url.group(0) 55 | 56 | suffix = re.search(r"'(/[^']+)';", chapter_select.attrs['onchange']) 57 | if not suffix: 58 | raise SiteException("Can't find URL suffix for chapters") 59 | suffix = suffix.group(1) 60 | 61 | # beautiful soup doesn't handle ffn's unclosed option tags at all well here 62 | options = re.findall(r']*>([^<]+)', str(chapter_select)) 63 | for option in options: 64 | story.add(Chapter(title=option[1], contents=self._chapter(base_url + option[0] + suffix), date=False)) 65 | 66 | # fix up the dates 67 | story[-1].date = updated 68 | story[0].date = published 69 | else: 70 | story.add(Chapter(title=story.title, contents=self._chapter(url), date=published)) 71 | 72 | self._finalize(story) 73 | 74 | return story 75 | 76 | def _chapter(self, url): 77 | logger.info("Fetching chapter @ %s", url) 78 | soup, base = self._soup(url) 79 | 80 | content = soup.find(id="content_wrapper_inner") 81 | if not content: 82 | raise SiteException("No chapter content") 83 | 84 | text = content.find(id="storytext") 85 | if not text: 86 | raise SiteException("No chapter content") 87 | 88 | # clean up some invalid xhtml attributes 89 | # TODO: be more selective about this somehow 90 | try: 91 | for tag in text.find_all(True): 92 | tag.attrs.clear() 93 | except Exception: 94 | logger.exception("Trouble cleaning attributes") 95 | 96 | self._clean(text, base) 97 | 98 | return text.prettify() 99 | 100 | def _soup(self, url, *args, **kwargs): 101 | if self._cloudflared: 102 | fallback = f"https://archive.org/wayback/available?url={urllib.parse.quote(url)}" 103 | try: 104 | response = self.session.get(fallback) 105 | wayback = response.json() 106 | closest = wayback['archived_snapshots']['closest']['url'] 107 | return super()._soup(closest, *args, delay=1, **kwargs) 108 | except Exception: 109 | self.session.cache.delete_url(fallback) 110 | raise CloudflareException("Couldn't fetch, presumably because of Cloudflare protection, and falling back to archive.org failed; if some chapters were succeeding, try again?", url, fallback) 111 | try: 112 | return super()._soup(self, url, *args, **kwargs) 113 | except CloudflareException: 114 | self._cloudflared = True 115 | return self._soup(url, *args, **kwargs) 116 | 117 | 118 | @register 119 | class FictionPress(FanFictionNet): 120 | @staticmethod 121 | def matches(url): 122 | # e.g. https://www.fictionpress.com/s/2961893/1/Mother-of-Learning 123 | match = re.match(r'^https?://(?:www|m)\.fictionpress\.com/s/(\d+)/?.*', url) 124 | if match: 125 | return 'https://www.fictionpress.com/s/' + match.group(1) + '/' 126 | -------------------------------------------------------------------------------- /sites/fictionlive.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import logging 4 | import itertools 5 | import datetime 6 | import re 7 | from . import register, Site, Section, Chapter 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | @register 13 | class FictionLive(Site): 14 | """fiction.live: it's... mostly smut, I think? Terrible smut. But, hey, I had a rec to follow.""" 15 | @staticmethod 16 | def matches(url): 17 | # e.g. https://fiction.live/stories/Descendant-of-a-Demon-Lord/SBBA49fQavNQMWxFT 18 | match = re.match(r'^(https?://fiction\.live/(?:stories|Sci-fi)/[^\/]+/[0-9a-zA-Z\-]+)/?.*', url) 19 | if match: 20 | return match.group(1) 21 | 22 | def extract(self, url): 23 | workid = re.match(r'^https?://fiction\.live/(?:stories|Sci-fi)/[^\/]+/([0-9a-zA-Z\-]+)/?.*', url).group(1) 24 | 25 | response = self.session.get(f'https://fiction.live/api/node/{workid}').json() 26 | 27 | story = Section( 28 | title=response['t'], 29 | author=response['u'][0]['n'], 30 | # Could normalize the URL here from the returns, but I'd have to 31 | # go look up how they handle special characters in titles... 32 | url=url 33 | ) 34 | # There's a summary (or similar) in `d` and `b`, if I want to use that later. 35 | 36 | # TODO: extract these #special ones and send them off to an endnotes section? 37 | chapters = ({'ct': 0},) + tuple(c for c in response['bm'] if not c['title'].startswith('#special')) + ({'ct': 9999999999999999},) 38 | 39 | for prevc, currc, nextc in contextiterate(chapters): 40 | # `id`, `title`, `ct`, `isFirst` 41 | # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/0/1448245168594 42 | # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/1449266444062/1449615394752 43 | # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/1502823848216/9999999999999998 44 | # i.e. format is [current timestamp] / [next timestamp - 1] 45 | chapter_url = f'https://fiction.live/api/anonkun/chapters/{workid}/{currc["ct"]}/{nextc["ct"] - 1}' 46 | logger.info("Extracting chapter \"%s\" @ %s", currc['title'], chapter_url) 47 | data = self.session.get(chapter_url).json() 48 | html = [] 49 | 50 | updated = currc['ct'] 51 | for segment in (d for d in data if not d.get('t', '').startswith('#special')): 52 | updated = max(updated, segment['ct']) 53 | # TODO: work out if this is actually enough types handled 54 | # There's at least also a reader post type, which mostly seems to be used for die rolls. 55 | try: 56 | if segment['nt'] == 'chapter': 57 | html.extend(('
', segment['b'].replace('
', '
'), '
')) 58 | elif segment['nt'] == 'choice': 59 | if 'votes' not in segment: 60 | # Somehow, sometime, we end up with a choice without votes (or choices) 61 | continue 62 | votes = {} 63 | for vote in segment['votes']: 64 | votechoices = segment['votes'][vote] 65 | if isinstance(votechoices, str): 66 | # This caused issue #30, where for some reason one 67 | # choice on a story was a string rather than an 68 | # index into the choices array. 69 | continue 70 | if isinstance(votechoices, int): 71 | votechoices = (votechoices,) 72 | for choice in votechoices: 73 | if int(choice) < len(segment['choices']): 74 | # sometimes someone has voted for a presumably-deleted choice 75 | choice = segment['choices'][int(choice)] 76 | votes[choice] = votes.get(choice, 0) + 1 77 | choices = [(votes[v], v) for v in votes] 78 | choices.sort(reverse=True) 79 | html.append('

') 83 | elif segment['nt'] == 'readerPost': 84 | pass 85 | else: 86 | logger.info("Skipped chapter-segment of unhandled type: %s", segment['nt']) 87 | except Exception as e: 88 | logger.error("Skipped chapter-segment due to parsing error", exc_info=e) 89 | 90 | story.add(Chapter( 91 | title=currc['title'], 92 | contents='\n'.join(html), 93 | date=datetime.datetime.fromtimestamp(updated / 1000.0) 94 | )) 95 | 96 | self._finalize(story) 97 | 98 | return story 99 | 100 | 101 | # Stolen from the itertools docs 102 | def contextiterate(iterable): 103 | "s -> (s0,s1), (s1,s2), (s2, s3), ..." 104 | a, b, c = itertools.tee(iterable, 3) 105 | next(b, None) 106 | next(c, None) 107 | next(c, None) 108 | return zip(a, b, c) 109 | -------------------------------------------------------------------------------- /sites/royalroad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import http.client 4 | import logging 5 | import datetime 6 | import re 7 | from . import register, Site, Section, Chapter, SiteSpecificOption 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | @register 13 | class RoyalRoad(Site): 14 | domain = r'royalroad' 15 | 16 | @staticmethod 17 | def get_site_specific_option_defs(): 18 | return Site.get_site_specific_option_defs() + [ 19 | SiteSpecificOption( 20 | 'offset', 21 | '--offset', 22 | type=int, 23 | help="The chapter index to start in the chapter marks." 24 | ), 25 | SiteSpecificOption( 26 | 'limit', 27 | '--limit', 28 | type=int, 29 | help="The chapter to end at at in the chapter marks." 30 | ), 31 | ] 32 | 33 | """Royal Road: a place where people write novels, mostly seeming to be light-novel in tone.""" 34 | @classmethod 35 | def matches(cls, url): 36 | # e.g. https://royalroad.com/fiction/6752/lament-of-the-fallen 37 | match = re.match(r'^(https?://(?:www\.)?%s\.com/fiction/\d+)/?.*' % cls.domain, url) 38 | if match: 39 | return match.group(1) + '/' 40 | 41 | def extract(self, url): 42 | workid = re.match(r'^https?://(?:www\.)?%s\.com/fiction/(\d+)/?.*' % self.domain, url).group(1) 43 | soup, base = self._soup(f'https://www.{self.domain}.com/fiction/{workid}') 44 | # should have gotten redirected, for a valid title 45 | 46 | original_maxheaders = http.client._MAXHEADERS 47 | http.client._MAXHEADERS = 1000 48 | 49 | story = Section( 50 | title=soup.find('h1').string.strip(), 51 | author=soup.find('meta', property='books:author').get('content').strip(), 52 | url=soup.find('meta', property='og:url').get('content').strip(), 53 | cover_url=self._join_url(base, soup.find('img', class_='thumbnail')['src']), 54 | summary=str(soup.find('div', class_='description')).strip(), 55 | tags=[tag.get_text().strip() for tag in soup.select('span.tags a.fiction-tag')] 56 | ) 57 | 58 | for index, chapter in enumerate(soup.select('#chapters tbody tr[data-url]')): 59 | if self.options['offset'] and index < self.options['offset']: 60 | continue 61 | if self.options['limit'] and index >= self.options['limit']: 62 | continue 63 | chapter_url = str(self._join_url(story.url, str(chapter.get('data-url')))) 64 | 65 | contents, updated = self._chapter(chapter_url, len(story) + 1) 66 | 67 | story.add(Chapter(title=chapter.find('a', href=True).string.strip(), contents=contents, date=updated)) 68 | 69 | http.client._MAXHEADERS = original_maxheaders 70 | 71 | self._finalize(story) 72 | 73 | return story 74 | 75 | def _chapter(self, url, chapterid): 76 | logger.info("Extracting chapter @ %s", url) 77 | soup, base = self._soup(url) 78 | content = soup.find('div', class_='chapter-content') 79 | 80 | self._clean(content, full_page=soup, base=base) 81 | self._clean_spoilers(content, chapterid) 82 | 83 | content = str(content) 84 | 85 | author_note = soup.find_all('div', class_='author-note-portlet') 86 | 87 | if len(author_note) == 1: 88 | # Find the parent of chapter-content and check if the author's note is the first child div 89 | if 'author-note-portlet' in soup.find('div', class_='chapter-content').parent.find('div')['class']: 90 | content = str(author_note[0]) + '
' + content 91 | else: # The author note must be after the chapter content 92 | content = content + '
' + str(author_note[0]) 93 | elif len(author_note) == 2: 94 | content = str(author_note[0]) + '
' + content + '
' + str(author_note[1]) 95 | 96 | updated = datetime.datetime.fromtimestamp( 97 | int(soup.find(class_="profile-info").find('time').get('unixtime')) 98 | ) 99 | 100 | return content, updated 101 | 102 | def _clean(self, contents, full_page, base=False): 103 | contents = super()._clean(contents, base=base) 104 | 105 | # Royalroad has started inserting "this was stolen" notices into its 106 | # HTML, and hiding them with CSS. Currently the CSS is very easy to 107 | # find, so do so and filter them out. 108 | for style in full_page.find_all('style'): 109 | if m := re.match(r'\s*\.(\w+)\s*{[^}]*display:\s*none;[^}]*}', style.string): 110 | for warning in contents.find_all(class_=m.group(1)): 111 | warning.decompose() 112 | 113 | return contents 114 | 115 | def _clean_spoilers(self, content, chapterid): 116 | # Spoilers to footnotes 117 | for spoiler in content.find_all(class_=('spoiler-new')): 118 | spoiler_title = spoiler.get('data-caption') 119 | new_spoiler = self._new_tag('div', class_="leech-spoiler") 120 | if self.options['spoilers'] == 'skip': 121 | new_spoiler.append(spoiler_title and f'[SPOILER: {spoiler_title}]' or '[SPOILER]') 122 | elif self.options['spoilers'] == 'inline': 123 | if spoiler_title: 124 | new_spoiler.append(f"{spoiler_title}: ") 125 | new_spoiler.append(spoiler) 126 | else: 127 | link = self._footnote(spoiler, chapterid) 128 | if spoiler_title: 129 | link.string = spoiler_title 130 | new_spoiler.append(link) 131 | spoiler.replace_with(new_spoiler) 132 | 133 | 134 | @register 135 | class RoyalRoadL(RoyalRoad): 136 | domain = 'royalroadl' 137 | -------------------------------------------------------------------------------- /sites/stash.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import logging 4 | import datetime 5 | import re 6 | from . import register, Site, SiteException, Section, Chapter 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | @register 12 | class Stash(Site): 13 | @staticmethod 14 | def matches(url): 15 | # Need a stack page 16 | match = re.match(r'^(https?://sta\.sh/2.+)/?.*', url) 17 | if match: 18 | return match.group(1) + '/' 19 | 20 | def extract(self, url): 21 | soup, base = self._soup(url) 22 | content = soup.find(id="stash-body") 23 | if not content: 24 | return 25 | 26 | # metadata = content.find(id='profile_top') 27 | story = Section( 28 | title=str(soup.find(class_="stash-folder-name").h2.string), 29 | author=str(soup.find('span', class_="oh-stashlogo-name").string).rstrip("'s"), 30 | url=url 31 | ) 32 | 33 | thumbs = content.select(".stash-folder-stream .thumb") 34 | if not thumbs: 35 | return 36 | for thumb in thumbs: 37 | try: 38 | if thumb['href'] != '#': 39 | story.add(self._chapter(thumb['href'])) 40 | except Exception: 41 | logger.exception("Couldn't extract chapters from thumbs") 42 | 43 | self._finalize(story) 44 | 45 | return story 46 | 47 | def _chapter(self, url): 48 | logger.info("Fetching chapter @ %s", url) 49 | soup, base = self._soup(url) 50 | 51 | content = soup.find(class_="journal-wrapper") 52 | if not content: 53 | raise SiteException("No content") 54 | 55 | title = str(content.find(class_="gr-top").find(class_='metadata').h2.a.string) 56 | 57 | text = content.find(class_="text") 58 | 59 | # clean up some invalid xhtml attributes 60 | # TODO: be more selective about this somehow 61 | try: 62 | for tag in text.find_all(True): 63 | tag.attrs = None 64 | except Exception as e: 65 | raise SiteException("Trouble cleaning attributes", e) 66 | 67 | self._clean(text, base) 68 | 69 | return Chapter(title=title, contents=text.prettify(), date=self._date(soup)) 70 | 71 | def _date(self, soup): 72 | maybe_date = soup.find('div', class_="dev-metainfo-details").find('span', ts=True) 73 | return datetime.datetime.fromtimestamp(int(maybe_date['ts'])) 74 | -------------------------------------------------------------------------------- /sites/wattpad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import logging 4 | import datetime 5 | import re 6 | from . import register, Site, Section, Chapter 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | @register 12 | class Wattpad(Site): 13 | """Wattpad""" 14 | @classmethod 15 | def matches(cls, url): 16 | # e.g. https://www.wattpad.com/story/208753031-summoned-to-have-tea-with-the-demon-lord-i-guess 17 | # chapter URLs are e.g. https://www.wattpad.com/818687865-summoned-to-have-tea-with-the-demon-lord-i-guess 18 | match = re.match(r'^(https?://(?:www\.)?wattpad\.com/story/\d+)?.*', url) 19 | if match: 20 | # the story-title part is unnecessary 21 | return match.group(1) 22 | 23 | def extract(self, url): 24 | workid = re.match(r'^https?://(?:www\.)?wattpad\.com/story/(\d+)?.*', url).group(1) 25 | info = self.session.get(f"https://www.wattpad.com/api/v3/stories/{workid}").json() 26 | 27 | story = Section( 28 | title=info['title'], 29 | author=info['user']['name'], 30 | url=url, 31 | cover_url=info['cover'] 32 | ) 33 | 34 | for chapter in info['parts']: 35 | story.add(Chapter( 36 | title=chapter['title'], 37 | contents=self._chapter(chapter['id']), 38 | # "2020-05-03T22:14:29Z" 39 | date=datetime.datetime.fromisoformat(chapter['createDate'].rstrip('Z')) # modifyDate also? 40 | )) 41 | 42 | self._finalize(story) 43 | 44 | return story 45 | 46 | def _chapter(self, chapterid): 47 | logger.info(f"Extracting chapter @ {chapterid}") 48 | api = self.session.get(f"https://www.wattpad.com/apiv2/storytext?id={chapterid}") 49 | return '
' + api.text + '
' 50 | -------------------------------------------------------------------------------- /sites/xenforo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import datetime 4 | import re 5 | import logging 6 | import requests_cache 7 | 8 | from . import Site, SiteException, SiteSpecificOption, Section, Chapter 9 | import mintotp 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class XenForo(Site): 15 | """XenForo is forum software that powers a number of fiction-related forums.""" 16 | 17 | domain = False 18 | index_urls = False 19 | 20 | @staticmethod 21 | def get_site_specific_option_defs(): 22 | return Site.get_site_specific_option_defs() + [ 23 | SiteSpecificOption( 24 | 'include_index', 25 | '--include-index/--no-include-index', 26 | default=False, 27 | help="If true, the post marked as an index will be included as a chapter." 28 | ), 29 | SiteSpecificOption( 30 | 'offset', 31 | '--offset', 32 | type=int, 33 | help="The chapter index to start in the chapter marks." 34 | ), 35 | SiteSpecificOption( 36 | 'limit', 37 | '--limit', 38 | type=int, 39 | help="The chapter to end at at in the chapter marks." 40 | ), 41 | ] 42 | 43 | @classmethod 44 | def matches(cls, url): 45 | match = re.match(r'^(https?://%s/(?:index\.php\?)?threads/[^/]*\d+/(?:\d+/)?reader)/?.*' % cls.domain, url) 46 | if match: 47 | return match.group(1) 48 | match = re.match(r'^(https?://%s/(?:index\.php\?)?threads/[^/]*\d+)/?.*' % cls.domain, url) 49 | if match: 50 | return match.group(1) + '/' 51 | 52 | def siteurl(self, path): 53 | if self.index_urls: 54 | return f'https://{self.domain}/index.php?{path}' 55 | return f'https://{self.domain}/{path}' 56 | 57 | def login(self, login_details): 58 | with requests_cache.disabled(): 59 | # Can't just pass this url to _soup because I need the cookies later 60 | login = self.session.get(self.siteurl('login/')) 61 | soup, nobase = self._soup(login.text) 62 | post, action, method = self._form_data(soup.find(class_='p-body-content')) 63 | post['login'] = login_details[0] 64 | post['password'] = login_details[1] 65 | # I feel the session *should* handle this cookies bit for me. But 66 | # it doesn't. And I don't know why. 67 | result = self.session.post( 68 | self._join_url(login.url, action), 69 | data=post, cookies=login.cookies 70 | ) 71 | if not result.ok: 72 | return logger.error("Failed to log in as %s", login_details[0]) 73 | soup, nobase = self._soup(result.text) 74 | if twofactor := soup.find('form', action="/login/two-step"): 75 | if len(login_details) < 3: 76 | return logger.error("Failed to log in as %s; login requires 2FA secret", login_details[0]) 77 | post, action, method = self._form_data(twofactor) 78 | post['code'] = mintotp.totp(login_details[2]) 79 | result = self.session.post( 80 | self._join_url(login.url, action), 81 | data=post, cookies=login.cookies 82 | ) 83 | if not result.ok: 84 | return logger.error("Failed to log in as %s; 2FA failed", login_details[0]) 85 | logger.info("Logged in as %s", login_details[0]) 86 | 87 | def extract(self, url): 88 | soup, base = self._soup(url) 89 | 90 | story = self._base_story(soup) 91 | 92 | threadmark_categories = {} 93 | # Note to self: in the source this is data-categoryId, but the parser 94 | # in bs4 lowercases tags and attributes... 95 | for cat in soup.find_all('a', attrs={'data-categoryid': True}): 96 | threadmark_categories[int(cat['data-categoryid'])] = cat['title'] 97 | 98 | if url.endswith('/reader'): 99 | reader_url = url 100 | elif soup.find('a', class_='readerToggle'): 101 | reader_url = soup.find('a', class_='readerToggle').get('href') 102 | elif soup.find('div', class_='threadmarks-reader'): 103 | # Technically this is the xenforo2 bit, but :shrug: 104 | reader_url = soup.find('div', class_='threadmarks-reader').find('a').get('href') 105 | else: 106 | reader_url = False 107 | 108 | if reader_url: 109 | match = re.search(r'\d+/(\d+)/reader', reader_url) 110 | if match: 111 | cat = int(match.group(1)) 112 | if cat != 1 and cat in threadmark_categories: 113 | story.title = f'{story.title} ({threadmark_categories[cat]})' 114 | idx = 0 115 | while reader_url: 116 | reader_url = self._join_url(base, reader_url) 117 | logger.info("Fetching chapters @ %s", reader_url) 118 | reader_soup, reader_base = self._soup(reader_url) 119 | posts = self._posts_from_page(reader_soup) 120 | 121 | for post in posts: 122 | idx = idx + 1 123 | if self.options['offset'] and idx < self.options['offset']: 124 | continue 125 | if self.options['limit'] and idx >= self.options['limit']: 126 | continue 127 | title = self._threadmark_title(post) 128 | logger.info("Extracting chapter \"%s\"", title) 129 | 130 | story.add(Chapter( 131 | title=title, 132 | contents=self._clean_chapter(post, len(story) + 1, base), 133 | date=self._post_date(post) 134 | )) 135 | 136 | reader_url = False 137 | if reader_soup.find('link', rel='next'): 138 | reader_url = reader_soup.find('link', rel='next').get('href') 139 | else: 140 | # TODO: Research whether reader mode is guaranteed to be enabled 141 | # when threadmarks are; if so, can delete this branch. 142 | marks = [ 143 | mark for mark in self._chapter_list(url) 144 | if '/members' not in mark.get('href') and '/threadmarks' not in mark.get('href') 145 | ] 146 | marks = marks[self.options['offset']:self.options['limit']] 147 | 148 | for idx, mark in enumerate(marks, 1): 149 | href = self._join_url(base, mark.get('href')) 150 | title = str(mark.string).strip() 151 | logger.info("Fetching chapter \"%s\" @ %s", title, href) 152 | contents, post_date = self._chapter(href, idx) 153 | chapter = Chapter(title=title, contents=contents, date=post_date) 154 | story.add(chapter) 155 | 156 | self._finalize(story) 157 | 158 | return story 159 | 160 | def _base_story(self, soup): 161 | url = soup.find('meta', property='og:url').get('content') 162 | title = soup.select('div.titleBar > h1')[0] 163 | # clean out informational bits from the title 164 | for tag in title.find_all(class_='prefix'): 165 | tag.decompose() 166 | tags = [tag.get_text().strip() for tag in soup.select('div.tagBlock a.tag')] 167 | return Section( 168 | title=title.get_text().strip(), 169 | author=soup.find('p', id='pageDescription').find('a', class_='username').get_text(), 170 | url=url, 171 | tags=tags 172 | ) 173 | 174 | def _posts_from_page(self, soup, postid=False): 175 | if postid: 176 | return soup.find('li', id='post-' + postid) 177 | return soup.select('#messageList > li.hasThreadmark') 178 | 179 | def _threadmark_title(self, post): 180 | # Get the title, removing "Threadmark:" which precedes it 181 | return ''.join(post.select('div.threadmarker > span.label')[0].findAll(text=True, recursive=False)).strip() 182 | 183 | def _chapter_list(self, url): 184 | try: 185 | return self._chapter_list_threadmarks(url) 186 | except SiteException as e: 187 | logger.debug("Tried threadmarks (%r)", e.args) 188 | return self._chapter_list_index(url) 189 | 190 | def _chapter_list_threadmarks(self, url): 191 | soup, base = self._soup(url) 192 | 193 | threadmarks_link = soup.find(class_="threadmarksTrigger", href=True) 194 | if not threadmarks_link: 195 | try: 196 | threadmarks_link = soup.select('.threadmarkMenus a.OverlayTrigger')[0] 197 | except IndexError: 198 | pass 199 | 200 | if not threadmarks_link: 201 | raise SiteException("No threadmarks") 202 | 203 | href = threadmarks_link.get('href') 204 | soup, base = self._soup(self._join_url(base, href)) 205 | 206 | fetcher = soup.find(class_='ThreadmarkFetcher') 207 | while fetcher: 208 | # ThreadmarksPro, hiding some threadmarks. Means the API is available to do this. 209 | # Note: the fetched threadmarks can contain more placeholder elements to fetch. Ergo, loop. 210 | # Good test case: https://forums.sufficientvelocity.com/threads/ignition-mtg-multicross-planeswalker-pc.26099/threadmarks 211 | # e.g.:
  • 212 | response = self.session.post(self.siteurl('threads/threadmarks/load-range'), data={ 213 | # I did try a fetch on min/data-min+data-max, but there seems 214 | # to be an absolute limit which the API fetch won't override 215 | 'min': fetcher.get('data-range-min'), 216 | 'max': fetcher.get('data-range-max'), 217 | 'thread_id': fetcher.get('data-thread-id'), 218 | 'category_id': fetcher.get('data-category-id'), 219 | '_xfResponseType': 'json', 220 | }).json() 221 | responseSoup, nobase = self._soup(response['templateHtml']) 222 | fetcher.replace_with(responseSoup) 223 | fetcher = soup.find(class_='ThreadmarkFetcher') 224 | 225 | marks = soup.find(class_='threadmarks').select('li.primaryContent.threadmarkListItem a, li.primaryContent.threadmarkItem a') 226 | if not marks: 227 | raise SiteException("No marks on threadmarks page") 228 | 229 | return marks 230 | 231 | def _chapter_list_index(self, url): 232 | post = self._post_from_url(url) 233 | if not post: 234 | raise SiteException("Unparseable post URL", url) 235 | 236 | links = post.find('blockquote', class_='messageText').find_all('a', class_='internalLink') 237 | if not links: 238 | raise SiteException("No links in index?") 239 | 240 | if self.options['include_index']: 241 | fake_link = self._new_tag('a', href=url) 242 | fake_link.string = "Index" 243 | links.insert(0, fake_link) 244 | 245 | return links 246 | 247 | def _chapter(self, url, chapterid): 248 | post, base = self._post_from_url(url) 249 | 250 | return self._clean_chapter(post, chapterid, base), self._post_date(post) 251 | 252 | def _post_from_url(self, url): 253 | # URLs refer to specific posts, so get just that one 254 | # if no specific post referred to, get the first one 255 | match = re.search(r'posts/(\d+)/?', url) 256 | if not match: 257 | match = re.match(r'.+#post-(\d+)$', url) 258 | # could still be nothing here 259 | postid = match and match.group(1) 260 | if postid: 261 | # create a proper post-url, because threadmarks can sometimes 262 | # mess up page-wise with anchors 263 | url = self.siteurl(f'posts/{postid}/') 264 | soup, base = self._soup(url, 'lxml') 265 | 266 | if postid: 267 | return self._posts_from_page(soup, postid), base 268 | 269 | # just the first one in the thread, then 270 | return soup.find('li', class_='message'), base 271 | 272 | def _chapter_contents(self, post): 273 | return post.find('blockquote', class_='messageText') 274 | 275 | def _clean_chapter(self, post, chapterid, base): 276 | post = self._chapter_contents(post) 277 | post.name = 'div' 278 | # mostly, we want to remove colors because the Kindle is terrible at them 279 | # TODO: find a way to denote colors, because it can be relevant 280 | # TODO: at least invisitext, because outside of silly DC Lantern stuff, it's the most common 281 | for tag in post.find_all(style=True): 282 | if tag['style'] == 'color: transparent' and tag.text == 'TAB': 283 | # Some stories fake paragraph indents like this. The output 284 | # stylesheet will handle this just fine. 285 | tag.decompose() 286 | else: 287 | # There's a few things which xenforo does as styles, despite there being perfectly good tags 288 | # TODO: more robust CSS parsing? This is very whitespace dependent, if nothing else. 289 | if "font-family: 'Courier New'" in tag['style']: 290 | tag.wrap(self._new_tag('code')) 291 | if "text-decoration: strikethrough" in tag['style']: 292 | tag.wrap(self._new_tag('strike')) 293 | if "margin-left" in tag['style']: 294 | continue 295 | del tag['style'] 296 | for tag in post.select('.quoteExpand, .bbCodeBlock-expandLink, .bbCodeBlock-shrinkLink'): 297 | tag.decompose() 298 | for tag in post.find_all('noscript'): 299 | # TODO: strip the noscript from these? 300 | # mostly this will be the lazyload images 301 | tag.decompose() 302 | for tag in post.select('img.lazyload[data-src]'): 303 | tag['src'] = tag['data-url'] 304 | if tag['src'].startswith('proxy.php'): 305 | tag['src'] = f"{self.domain}/{tag['src']}" 306 | self._clean(post, base) 307 | self._clean_spoilers(post, chapterid) 308 | return post.prettify() 309 | 310 | def _clean_spoilers(self, post, chapterid): 311 | # spoilers don't work well, so turn them into epub footnotes 312 | for spoiler in post.find_all(class_='ToggleTriggerAnchor'): 313 | spoiler_title = spoiler.find(class_='SpoilerTitle') 314 | if self.options['skip_spoilers']: 315 | link = self._footnote(spoiler.find(class_='SpoilerTarget').extract(), chapterid) 316 | if spoiler_title: 317 | link.string = spoiler_title.get_text() 318 | else: 319 | if spoiler_title: 320 | link = f'[SPOILER: {spoiler_title.get_text()}]' 321 | else: 322 | link = '[SPOILER]' 323 | new_spoiler = self._new_tag('div', class_="leech-spoiler") 324 | new_spoiler.append(link) 325 | spoiler.replace_with(new_spoiler) 326 | 327 | def _post_date(self, post): 328 | maybe_date = post.find(class_='DateTime') 329 | if 'data-time' in maybe_date.attrs: 330 | return datetime.datetime.fromtimestamp(int(maybe_date['data-time'])) 331 | if 'title' in maybe_date.attrs: 332 | # title="Feb 24, 2015 at 1:17 PM" 333 | return datetime.datetime.strptime(maybe_date['title'], "%b %d, %Y at %I:%M %p") 334 | raise SiteException("No date", maybe_date) 335 | 336 | 337 | class XenForoIndex(XenForo): 338 | @classmethod 339 | def matches(cls, url): 340 | match = re.match(r'^(https?://%s/posts/\d+)/?.*' % cls.domain, url) 341 | if match: 342 | return match.group(1) + '/' 343 | 344 | def _chapter_list(self, url): 345 | return self._chapter_list_index(url) 346 | -------------------------------------------------------------------------------- /sites/xenforo2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import datetime 4 | import logging 5 | 6 | from . import register, Section, SiteException 7 | from .xenforo import XenForo, XenForoIndex 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class XenForo2(XenForo): 13 | def _base_story(self, soup): 14 | url = soup.find('meta', property='og:url').get('content') 15 | title = soup.select('h1.p-title-value')[0] 16 | # clean out informational bits from the title 17 | for tag in title.select('.labelLink,.label-append'): 18 | tag.decompose() 19 | tags = [tag.get_text().strip() for tag in soup.select('.tagList a.tagItem')] 20 | return Section( 21 | title=title.get_text().strip(), 22 | author=soup.find('div', class_='p-description').find('a', class_='username').get_text(), 23 | url=url, 24 | tags=tags 25 | ) 26 | 27 | def _posts_from_page(self, soup, postid=False): 28 | if postid: 29 | return soup.find('article', id='js-post-' + postid) 30 | return soup.select('article.message--post') 31 | 32 | def _threadmark_title(self, post): 33 | # Get the title, removing "Threadmark:" which precedes it 34 | return post.find('span', class_='threadmarkLabel').get_text() 35 | 36 | def _chapter_contents(self, post): 37 | return post.find('div', class_='message-userContent') 38 | 39 | def _clean_spoilers(self, post, chapterid): 40 | # spoilers don't work well, so turn them into epub footnotes 41 | for spoiler in post.find_all(class_='bbCodeSpoiler'): 42 | spoiler_title = spoiler.find(class_='bbCodeSpoiler-button-title') 43 | spoiler_contents = spoiler.find(class_='bbCodeBlock-content').extract() 44 | new_spoiler = self._new_tag('div', class_="leech-spoiler") 45 | if self.options['spoilers'] == 'skip': 46 | new_spoiler.append(spoiler_title and f'[SPOILER: {spoiler_title.get_text()}]' or '[SPOILER]') 47 | elif self.options['spoilers'] == 'inline': 48 | if spoiler_title: 49 | new_spoiler.append(f"{spoiler_title.get_text()}: ") 50 | new_spoiler.append(spoiler_contents) 51 | else: 52 | link = self._footnote(spoiler_contents, chapterid) 53 | if spoiler_title: 54 | link.string = spoiler_title.get_text() 55 | new_spoiler.append(link) 56 | spoiler.replace_with(new_spoiler) 57 | 58 | def _post_date(self, post): 59 | if post.find('time'): 60 | return datetime.datetime.fromtimestamp(int(post.find('time').get('data-time'))) 61 | raise SiteException("No date") 62 | 63 | 64 | @register 65 | class SpaceBattles(XenForo2): 66 | domain = 'forums.spacebattles.com' 67 | 68 | 69 | @register 70 | class SpaceBattlesIndex(SpaceBattles, XenForoIndex): 71 | _key = "SpaceBattles" 72 | 73 | 74 | @register 75 | class SufficientVelocity(XenForo2): 76 | domain = 'forums.sufficientvelocity.com' 77 | 78 | 79 | @register 80 | class TheSietch(XenForo2): 81 | domain = 'www.the-sietch.com' 82 | index_urls = True 83 | 84 | 85 | @register 86 | class QuestionableQuesting(XenForo2): 87 | domain = 'forum.questionablequesting.com' 88 | 89 | 90 | @register 91 | class QuestionableQuestingIndex(QuestionableQuesting, XenForoIndex): 92 | _key = "QuestionableQuesting" 93 | 94 | 95 | @register 96 | class AlternateHistory(XenForo2): 97 | domain = 'www.alternatehistory.com/forum' 98 | 99 | 100 | @register 101 | class AlternateHistoryIndex(AlternateHistory, XenForoIndex): 102 | _key = "AlternateHistory" 103 | --------------------------------------------------------------------------------