├── heliotrope ├── tasks │ ├── __init__.py │ └── mirroring.py ├── database │ ├── __init__.py │ ├── models │ │ ├── __init__.py │ │ └── hitomi.py │ ├── mongo.py │ └── query.py ├── request │ ├── __init__.py │ ├── base.py │ └── hitomi.py ├── hitomi │ ├── __init__.py │ ├── common.py │ ├── parser.py │ └── models.py ├── view │ ├── __init__.py │ └── api │ │ ├── __init__.py │ │ ├── hitomi │ │ ├── __init__.py │ │ ├── info.py │ │ ├── galleryinfo.py │ │ ├── list.py │ │ ├── search.py │ │ └── images.py │ │ └── proxy.py ├── response.py ├── __init__.py ├── __main__.py ├── typing.py ├── sanic.py ├── shuffle.py └── server.py ├── .github ├── CODEOWNERS ├── dependabot.yml └── workflows │ ├── type_checking.yml │ └── codeql-analysis.yml ├── .gitattributes ├── .vscode ├── extensions.json └── settings.json ├── requirements-dev.txt ├── .env.example ├── requirements.txt ├── Dockerfile ├── .restyled.yaml ├── .gitignore └── README.md /heliotrope/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @SaidBySolo -------------------------------------------------------------------------------- /heliotrope/database/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /heliotrope/request/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /heliotrope/database/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /heliotrope/hitomi/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "ms-python.vscode-pylance", 4 | "ms-python.python" 5 | ] 6 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.analysis.typeCheckingMode": "strict", 3 | "python.formatting.provider": "black", 4 | "editor.formatOnSave": true, 5 | } -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # Coding style formatter 2 | black==21.7b0 3 | isort==5.9.3 4 | 5 | # Test code 6 | pytest==6.2.4 7 | pytest-cov==2.12.1 8 | sanic-testing==0.6.0 -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | SENTRY_DSN=sentry_dsn 2 | DB_URL=DB_TYPE://USERNAME:PASSWORD@HOST:PORT/DB_NAME 3 | MONGO_DB_URL=mongodb://USERNAME:PASSWORD@URL 4 | FORWARDED_SECRET=secret 5 | HIYOBOT_SECRET=secret -------------------------------------------------------------------------------- /heliotrope/view/__init__.py: -------------------------------------------------------------------------------- 1 | from sanic.blueprints import Blueprint 2 | 3 | from heliotrope.view.api import api_endpoint 4 | 5 | # NOTE: Will fixed 6 | view = Blueprint.group(api_endpoint) 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.7.4.post0 2 | sanic==21.6.2 3 | sentry-sdk==1.3.0 4 | beautifulsoup4==4.9.3 5 | tortoise-orm==0.17.6 6 | lxml==4.6.3 7 | aiomysql==0.0.21 8 | sanic-cors==1.0.0 9 | motor==2.5.0 10 | dnspython==2.1.0 11 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9 2 | 3 | EXPOSE 8000 8001 4 | 5 | WORKDIR /usr 6 | 7 | COPY requirements.txt . 8 | 9 | RUN python -m pip install -U pip && \ 10 | pip install -r requirements.txt 11 | 12 | COPY /heliotrope ./heliotrope 13 | 14 | ENTRYPOINT [ "python", "-m", "heliotrope" ] -------------------------------------------------------------------------------- /heliotrope/response.py: -------------------------------------------------------------------------------- 1 | from sanic.response import json 2 | 3 | 4 | class Response: 5 | not_found = json({"status": 404, "message": "not_found"}, 404) 6 | bad_request = json({"status": 400, "message": "bad_request"}, 400) 7 | forbidden = json({"status": 403, "message": "not_authorized"}, 403) 8 | -------------------------------------------------------------------------------- /heliotrope/view/api/__init__.py: -------------------------------------------------------------------------------- 1 | from sanic.blueprints import Blueprint 2 | 3 | from heliotrope import version_info 4 | from heliotrope.view.api.hitomi import hitomi_endpoint 5 | from heliotrope.view.api.proxy import proxy 6 | 7 | # NOTE: Will fixed 8 | api_endpoint = Blueprint.group( 9 | hitomi_endpoint, proxy, url_prefix="/api", version=version_info.major 10 | ) 11 | -------------------------------------------------------------------------------- /heliotrope/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, NamedTuple 2 | 3 | 4 | class VersionInfo(NamedTuple): 5 | major: int 6 | minor: int 7 | micro: int 8 | releaselevel: Literal["alpha", "beta", "candidate", "final"] 9 | serial: int 10 | 11 | 12 | version_info = VersionInfo(major=5, minor=0, micro=0, releaselevel="alpha", serial=0) 13 | 14 | __version__ = f"{version_info.major}.{version_info.minor}.{version_info.micro}-{version_info.releaselevel}" 15 | -------------------------------------------------------------------------------- /heliotrope/view/api/hitomi/__init__.py: -------------------------------------------------------------------------------- 1 | from sanic.blueprints import Blueprint 2 | 3 | from heliotrope.view.api.hitomi.galleryinfo import hitomi_galleryinfo 4 | from heliotrope.view.api.hitomi.images import hitomi_images 5 | from heliotrope.view.api.hitomi.info import hitomi_info 6 | from heliotrope.view.api.hitomi.list import hitomi_list 7 | from heliotrope.view.api.hitomi.search import hitomi_search 8 | 9 | # NOTE: Will fixed 10 | hitomi_endpoint = Blueprint.group( 11 | hitomi_galleryinfo, hitomi_images, hitomi_info, hitomi_list, hitomi_search, url_prefix="/hitomi" 12 | ) 13 | -------------------------------------------------------------------------------- /heliotrope/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from heliotrope.server import heliotrope 4 | 5 | parser = argparse.ArgumentParser("heliotrope") 6 | 7 | parser.add_argument( 8 | "--host", 9 | "-H", 10 | type=str, 11 | default="0.0.0.0", 12 | help="The hostname to listen on (default: 0.0.0.0)", 13 | ) 14 | parser.add_argument( 15 | "--port", 16 | "-P", 17 | type=int, 18 | default=8000, 19 | help="The port of the webserver (default: 8000)", 20 | ) 21 | 22 | args = parser.parse_args() 23 | 24 | # NOTE: Will fixed 25 | heliotrope.run(args.host, args.port) 26 | -------------------------------------------------------------------------------- /heliotrope/typing.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, Optional, TypedDict 2 | 3 | 4 | class HitomiFilesJSON(TypedDict): 5 | width: int 6 | hash: str 7 | haswebp: int 8 | name: str 9 | height: int 10 | 11 | 12 | class HitomiTagsJSON(TypedDict): 13 | male: Literal["", "1"] 14 | female: Literal["", "1"] 15 | url: str 16 | tag: str 17 | 18 | 19 | class HitomiGalleryinfoJSON(TypedDict): 20 | language_localname: str 21 | language: str 22 | date: str 23 | files: list[HitomiFilesJSON] 24 | tags: list[HitomiTagsJSON] 25 | japanese_title: Optional[str] 26 | title: str 27 | id: str 28 | type: str 29 | -------------------------------------------------------------------------------- /.restyled.yaml: -------------------------------------------------------------------------------- 1 | enabled: true 2 | 3 | exclude: 4 | - ".github/workflows/**/*" # https://github.com/restyled-io/restyler/issues/73 5 | 6 | changed_paths: 7 | maximum: 1000 8 | outcome: error 9 | 10 | remote_files: [] 11 | 12 | auto: false 13 | 14 | pull_requests: true 15 | 16 | comments: false 17 | 18 | statuses: 19 | differences: true 20 | no_differences: true 21 | error: true 22 | 23 | request_review: none 24 | 25 | labels: ["restyled"] 26 | 27 | ignore_labels: 28 | - restyled-ignore 29 | 30 | restylers_version: stable 31 | 32 | commit_template: | 33 | style(${restyler.name}): apply code style 34 | 35 | restylers: 36 | - black 37 | - isort: 38 | arguments: 39 | - "--profile" 40 | - "black" 41 | -------------------------------------------------------------------------------- /heliotrope/view/api/hitomi/info.py: -------------------------------------------------------------------------------- 1 | from sanic.blueprints import Blueprint 2 | from sanic.response import HTTPResponse, json 3 | from sanic.views import HTTPMethodView 4 | 5 | from heliotrope.sanic import HeliotropeRequest 6 | 7 | hitomi_info = Blueprint("hitomi_info", url_prefix="/info") 8 | 9 | 10 | class HitomiInfoView(HTTPMethodView): 11 | async def get(self, request: HeliotropeRequest, index_id: int) -> HTTPResponse: 12 | if info := await request.app.ctx.nosql_query.find_info(index_id): 13 | return json("status", 200, **info) 14 | 15 | return request.app.ctx.response.not_found 16 | 17 | 18 | # TODO: add_route is partially unknown and as_view is partially unknown Need PR Sanic 19 | hitomi_info.add_route(HitomiInfoView.as_view(), "/") # type: ignore 20 | -------------------------------------------------------------------------------- /heliotrope/sanic.py: -------------------------------------------------------------------------------- 1 | from types import SimpleNamespace 2 | 3 | from sanic.app import Sanic 4 | from sanic.config import Config 5 | from sanic.request import Request 6 | 7 | from heliotrope.database.mongo import NoSQLQuery 8 | from heliotrope.database.query import SQLQuery 9 | from heliotrope.request.base import BaseRequest 10 | from heliotrope.request.hitomi import HitomiRequest 11 | from heliotrope.response import Response 12 | 13 | 14 | class HeliotropeContext(SimpleNamespace): 15 | sql_query: SQLQuery 16 | nosql_query: NoSQLQuery 17 | response: Response 18 | hitomi_request: HitomiRequest 19 | base_request: BaseRequest 20 | 21 | 22 | class Heliotrope(Sanic): 23 | ctx: HeliotropeContext 24 | config: Config 25 | 26 | 27 | class HeliotropeRequest(Request): 28 | app: Heliotrope 29 | args: property 30 | -------------------------------------------------------------------------------- /heliotrope/view/api/hitomi/galleryinfo.py: -------------------------------------------------------------------------------- 1 | from sanic.blueprints import Blueprint 2 | from sanic.response import HTTPResponse, json 3 | from sanic.views import HTTPMethodView 4 | 5 | from heliotrope.sanic import HeliotropeRequest 6 | 7 | hitomi_galleryinfo = Blueprint("hitomi_galleryinfo", url_prefix="/galleryinfo") 8 | 9 | 10 | class HitomiGalleryinfoView(HTTPMethodView): 11 | async def get(self, request: HeliotropeRequest, index_id: int) -> HTTPResponse: 12 | if galleryinfo := request.app.ctx.sql_query.get_galleryinfo(index_id): 13 | return json(galleryinfo) 14 | 15 | return request.app.ctx.response.not_found 16 | 17 | 18 | # TODO: add_route is partially unknown and as_view is partially unknown Need PR Sanic 19 | hitomi_galleryinfo.add_route(HitomiGalleryinfoView.as_view(), "/") # type: ignore 20 | -------------------------------------------------------------------------------- /heliotrope/view/api/hitomi/list.py: -------------------------------------------------------------------------------- 1 | from sanic.blueprints import Blueprint 2 | from sanic.response import HTTPResponse, json 3 | from sanic.views import HTTPMethodView 4 | 5 | from heliotrope.sanic import HeliotropeRequest 6 | 7 | hitomi_list = Blueprint("hitomi_list", url_prefix="/list") 8 | 9 | 10 | class HitomiListView(HTTPMethodView): 11 | async def get(self, request: HeliotropeRequest, index: int) -> HTTPResponse: 12 | start_at_zero = index - 1 13 | 14 | if start_at_zero < 0: 15 | return request.app.ctx.response.bad_request 16 | 17 | info_list = await request.app.ctx.nosql_query.get_info_list(start_at_zero) 18 | 19 | return json({"status": 200, "list": info_list}) 20 | 21 | 22 | # TODO: add_route is partially unknown and as_view is partially unknown Need PR Sanic 23 | hitomi_list.add_route(HitomiListView.as_view(), "/") # type: ignore 24 | -------------------------------------------------------------------------------- /.github/workflows/type_checking.yml: -------------------------------------------------------------------------------- 1 | # This workflow will do a clean install of node dependencies, cache/restore them, build the source code and run tests across different versions of node 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions 3 | 4 | name: Type checker 5 | 6 | on: 7 | push: 8 | branches: [ v5 ] 9 | pull_request: 10 | branches: [ v5 ] 11 | 12 | jobs: 13 | type_cheking: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python 3.9 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: '3.9' 23 | - name: Install pyright 24 | run: sudo npm install -g pyright 25 | - name: Install Dependencies 26 | run: | 27 | pip install mypy 28 | pip install -r requirements.txt 29 | - name: Mypy cheking 30 | run: mypy ./heliotrope --strict 31 | - name: pyright checking 32 | run: pyright ./heliotrope --lib --outputjson 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /heliotrope/view/api/hitomi/search.py: -------------------------------------------------------------------------------- 1 | from sanic.blueprints import Blueprint 2 | from sanic.response import HTTPResponse, json 3 | from sanic.views import HTTPMethodView 4 | 5 | from heliotrope.sanic import HeliotropeRequest 6 | 7 | hitomi_search = Blueprint("hitomi_search", url_prefix="/search") 8 | 9 | 10 | class HitomiSearchView(HTTPMethodView): 11 | async def get(self, request: HeliotropeRequest) -> HTTPResponse: 12 | offset = ( 13 | int(offset) - 1 14 | if (offset := request.args.get("offset")) 15 | and (offset.isdigit()) 16 | and not (int(offset) - 1 < 0) 17 | else 0 18 | ) 19 | 20 | if (query := request.args.get("q")) and ( 21 | search_result := await request.app.ctx.nosql_query.search_info_list( 22 | query, offset 23 | ) 24 | ): 25 | result, count = search_result 26 | return json( 27 | { 28 | "status": 200, 29 | "result": result, 30 | "count": count, 31 | } 32 | ) 33 | 34 | return request.app.ctx.response.not_found 35 | 36 | 37 | # TODO: add_route is partially unknown and as_view is partially unknown Need PR Sanic 38 | hitomi_search.add_route(HitomiSearchView.as_view(), "") # type: ignore 39 | -------------------------------------------------------------------------------- /heliotrope/database/models/hitomi.py: -------------------------------------------------------------------------------- 1 | from tortoise.fields.data import CharField, IntField, TextField 2 | from tortoise.fields.relational import ManyToManyField, ManyToManyRelation 3 | from tortoise.models import Model 4 | 5 | 6 | class Index(Model): 7 | id = IntField(pk=True) 8 | index_id = CharField(255) 9 | 10 | 11 | class GalleryInfo(Model): 12 | language_localname = TextField(null=True) 13 | language = TextField(null=True) 14 | date = TextField(null=True) 15 | files: ManyToManyRelation["File"] = ManyToManyField("models.File") 16 | tags: ManyToManyRelation["Tag"] = ManyToManyField("models.Tag") 17 | japanese_title = TextField(null=True) 18 | title = TextField(null=True) 19 | id = CharField(255, pk=True) 20 | type = TextField(null=True) 21 | 22 | 23 | class File(Model): 24 | id = IntField(pk=True) 25 | index_id = CharField(255) 26 | width = IntField(null=True) 27 | hash = CharField(64, null=True) 28 | haswebp = IntField(null=True) 29 | hasavifsmalltn = IntField(null=True) 30 | name = TextField(null=True) 31 | height = IntField(null=True) 32 | hasavif = IntField(null=True) 33 | 34 | 35 | class Tag(Model): 36 | id = IntField(pk=True) 37 | index_id = CharField(255) 38 | male = CharField(1, null=True) 39 | female = CharField(1, null=True) 40 | tag = CharField(255, null=True) 41 | url = CharField(255, null=True) 42 | -------------------------------------------------------------------------------- /heliotrope/view/api/hitomi/images.py: -------------------------------------------------------------------------------- 1 | from sanic.blueprints import Blueprint 2 | from sanic.response import HTTPResponse, json 3 | from sanic.views import HTTPMethodView 4 | 5 | from heliotrope.hitomi.common import image_url_from_image 6 | from heliotrope.hitomi.models import HitomiFiles 7 | from heliotrope.sanic import HeliotropeRequest 8 | from heliotrope.shuffle import shuffle_image_url 9 | 10 | hitomi_images = Blueprint("hitomi_images", url_prefix="/images") 11 | 12 | 13 | class HitomiImagesView(HTTPMethodView): 14 | async def get(self, request: HeliotropeRequest, index_id: int) -> HTTPResponse: 15 | if galleryinfo := await request.app.ctx.sql_query.get_galleryinfo(index_id): 16 | return json( 17 | { 18 | "status": 200, 19 | "files": [ 20 | { 21 | "name": file.name, 22 | "url": shuffle_image_url( 23 | image_url_from_image(index_id, file, True) 24 | ), 25 | } 26 | for file in map(HitomiFiles, galleryinfo["files"]) 27 | ], 28 | } 29 | ) 30 | return request.app.ctx.response.not_found 31 | 32 | 33 | # TODO: add_route is partially unknown and as_view is partially unknown Need PR Sanic 34 | hitomi_images.add_route(HitomiImagesView.as_view(), "/") # type: ignore 35 | -------------------------------------------------------------------------------- /heliotrope/shuffle.py: -------------------------------------------------------------------------------- 1 | import re 2 | from contextlib import suppress 3 | from typing import Optional 4 | 5 | 6 | def shuffle_image_url(url: str) -> Optional[str]: 7 | with suppress(Exception): 8 | url_parse_regex = re.compile( 9 | r"\/\/(..?)(\.hitomi\.la|\.pximg\.net)\/(.+?)\/(.+)" 10 | ) 11 | 12 | parsed_url: list[str] = url_parse_regex.findall(url)[0] 13 | 14 | prefix = parsed_url[0] 15 | main_url = parsed_url[1].replace(".", "_") 16 | type = parsed_url[2] 17 | image = parsed_url[3].replace("/", "_") 18 | 19 | return f"{prefix}_{type}{main_url}_{image}" 20 | return None 21 | 22 | 23 | def solve_shuffle_image_url(shuffled_image_url: str) -> Optional[str]: 24 | with suppress(Exception): 25 | solve_regex: list[str] = re.findall( 26 | r"(.+)_(.+)_(pximg_net|hitomi_la)_(.+)_(.+_.+)", shuffled_image_url 27 | )[0] 28 | prefix = solve_regex[0] 29 | type_ = solve_regex[1] 30 | main_url = solve_regex[2].replace("_", ".") 31 | img_date_or_hitomi_url_etc = solve_regex[3].replace("_", "/") 32 | image = f"/{solve_regex[4]}" 33 | 34 | if "pximg" not in main_url: 35 | image = image.replace("_", "/") 36 | if image.startswith("/p0"): 37 | image = image.replace("/", "_") 38 | 39 | return ( 40 | f"https://{prefix}.{main_url}/{type_}/{img_date_or_hitomi_url_etc}{image}" 41 | ) 42 | 43 | return None 44 | -------------------------------------------------------------------------------- /heliotrope/view/api/proxy.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from sanic.blueprints import Blueprint 4 | from sanic.response import HTTPResponse 5 | from sanic.views import HTTPMethodView 6 | 7 | from heliotrope.sanic import HeliotropeRequest 8 | from heliotrope.shuffle import solve_shuffle_image_url 9 | 10 | proxy = Blueprint("proxy", url_prefix="/proxy") 11 | 12 | 13 | class HeliotropeImageProxyView(HTTPMethodView): 14 | async def get( 15 | self, request: HeliotropeRequest, shuffled_image_url: str 16 | ) -> Optional[HTTPResponse]: 17 | if url := solve_shuffle_image_url(shuffled_image_url): 18 | headers = request.app.ctx.hitomi_request.headers 19 | 20 | if "pximg" in url: 21 | headers.update({"referer": "https://pixiv.net"}) 22 | 23 | async with request.app.ctx.base_request.session.get( 24 | url, headers=headers 25 | ) as request_response: 26 | 27 | if request_response.status != 200: 28 | return request.app.ctx.response.bad_request 29 | 30 | response: HTTPResponse = await request.respond( 31 | content_type=request_response.content_type 32 | ) 33 | 34 | async for data, _ in request_response.content.iter_chunks(): 35 | await response.send(data) 36 | 37 | return None 38 | 39 | return request.app.ctx.response.bad_request 40 | 41 | 42 | # TODO: add_route is partially unknown and as_view is partially unknown Need PR Sanic 43 | proxy.add_route(HeliotropeImageProxyView.as_view(), "/") # type: ignore 44 | -------------------------------------------------------------------------------- /heliotrope/request/base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Any, Literal 3 | 4 | from aiohttp.client import ClientSession 5 | from multidict import CIMultiDictProxy 6 | from yarl import URL 7 | 8 | 9 | @dataclass 10 | class Response: 11 | status: int 12 | returned: Any 13 | url: URL 14 | headers: CIMultiDictProxy[str] 15 | 16 | 17 | class BaseRequest: 18 | def __init__(self, session: ClientSession) -> None: 19 | self.session = session 20 | 21 | @property 22 | def user_agent(self) -> str: 23 | return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36" 24 | 25 | async def close(self) -> None: 26 | if self.session: 27 | await self.session.close() 28 | 29 | async def request( 30 | self, 31 | method: Literal["GET", "POST", "PUT", "DELETE", "PATCH"], 32 | url: str, 33 | return_method: Literal["json", "text", "read"] = "json", 34 | **kwargs: Any 35 | ) -> Response: 36 | 37 | async with self.session.request(method, url, **kwargs) as r: 38 | return Response( 39 | r.status, 40 | await getattr(r, return_method)(), 41 | r.url, 42 | r.headers, 43 | ) 44 | 45 | async def get( 46 | self, 47 | url: str, 48 | return_method: Literal["json", "text", "read"] = "json", 49 | **kwargs: Any 50 | ) -> Response: 51 | return await self.request("GET", url, return_method, **kwargs) 52 | 53 | async def post( 54 | self, 55 | url: str, 56 | return_method: Literal["json", "text", "read"] = "json", 57 | **kwargs: Any 58 | ) -> Response: 59 | return await self.request("POST", url, return_method, **kwargs) 60 | -------------------------------------------------------------------------------- /heliotrope/database/mongo.py: -------------------------------------------------------------------------------- 1 | # pyright: reportUnknownMemberType=false, reportUnknownVariableType=false 2 | 3 | from typing import Any, Optional, cast 4 | 5 | from motor.motor_asyncio import AsyncIOMotorClient # type: ignore 6 | 7 | 8 | class NoSQLQuery: 9 | def __init__(self, mongo_db_url: str) -> None: 10 | self.__collection = AsyncIOMotorClient(mongo_db_url).hitomi.info 11 | 12 | async def get_info_list( 13 | self, offset: int = 0, limit: int = 15 14 | ) -> list[dict[str, Any]]: 15 | return cast( 16 | list[dict[str, Any]], 17 | await self.__collection.find({}, {"_id": 0}) 18 | .sort("index", -1) 19 | .skip(offset) 20 | .limit(limit) 21 | .to_list(15), 22 | ) 23 | 24 | async def search_info_list( 25 | self, query: str, offset: int = 0, limit: int = 15 26 | ) -> Optional[tuple[dict[str, Any], int]]: 27 | search_query = {"$search": {"text": {"query": query, "path": "title"}}} 28 | 29 | if count := ( 30 | await self.__collection.aggregate( 31 | [search_query, {"$count": "count"}] 32 | ).to_list(1) 33 | ): 34 | result = await self.__collection.aggregate( 35 | [ 36 | search_query, 37 | {"$skip": offset}, 38 | {"$limit": limit}, 39 | {"$project": {"_id": 0}}, 40 | ] 41 | ).to_list(15) 42 | 43 | return result, count[0]["count"] 44 | 45 | return None 46 | 47 | async def find_info(self, index_id: int) -> dict[str, Any]: 48 | return cast( 49 | dict[str, Any], await self.__collection.find_one({"index": index_id}) 50 | ) 51 | 52 | async def insert_info(self, info: dict[str, Any]) -> None: 53 | return cast(None, await self.__collection.insert_one(info)) 54 | -------------------------------------------------------------------------------- /heliotrope/tasks/mirroring.py: -------------------------------------------------------------------------------- 1 | from asyncio import sleep 2 | from typing import Any, NoReturn 3 | 4 | from aiohttp.client import ClientSession 5 | 6 | from heliotrope.database.mongo import NoSQLQuery 7 | from heliotrope.database.query import SQLQuery 8 | from heliotrope.request.hitomi import HitomiRequest 9 | from heliotrope.sanic import Heliotrope 10 | 11 | 12 | class Mirroring(HitomiRequest): 13 | def __init__(self, heliotrope: Heliotrope, session: ClientSession): 14 | super().__init__(session) 15 | self.__heliotrope = heliotrope 16 | 17 | @property 18 | def sql(self) -> SQLQuery: 19 | return self.__heliotrope.ctx.sql_query 20 | 21 | @property 22 | def nosql(self) -> NoSQLQuery: 23 | return self.__heliotrope.ctx.nosql_query 24 | 25 | @classmethod 26 | async def setup(cls, **kwargs: Any) -> "Mirroring": 27 | heliotrope = kwargs.pop("heliotrope") 28 | session = ClientSession(**kwargs) 29 | mirroring = cls(heliotrope, session) 30 | mirroring.session.headers.update(mirroring.headers) 31 | return mirroring 32 | 33 | async def compare_index_list(self) -> list[int]: 34 | remote_index_list = await self.fetch_index() 35 | local_index_list = await self.__heliotrope.ctx.sql_query.get_index() 36 | return list(set(remote_index_list) - set(local_index_list)) 37 | 38 | async def mirroring(self, index_list: list[int]) -> None: 39 | for index in index_list: 40 | if galleryinfo := await self.get_galleyinfo(index): 41 | if not await self.sql.get_galleryinfo(index): 42 | await self.sql.add_galleryinfo(galleryinfo) 43 | 44 | if info := await self.get_info(index): 45 | if not await self.nosql.find_info(index): 46 | await self.nosql.insert_info(info.to_dict()) 47 | 48 | if index not in await self.sql.get_index(): 49 | await self.sql.add_index(index) 50 | 51 | async def task(self, delay: float) -> NoReturn: 52 | while True: 53 | if index_list := await self.compare_index_list(): 54 | await self.mirroring(index_list) 55 | 56 | await sleep(delay) 57 | -------------------------------------------------------------------------------- /heliotrope/server.py: -------------------------------------------------------------------------------- 1 | from asyncio.events import AbstractEventLoop 2 | from os import environ, getenv 3 | 4 | from aiohttp.client import ClientSession 5 | from sanic.app import Sanic 6 | from sentry_sdk import init 7 | from sentry_sdk.integrations.sanic import SanicIntegration 8 | from tortoise import Tortoise 9 | 10 | from heliotrope import __version__ 11 | from heliotrope.database.mongo import NoSQLQuery 12 | from heliotrope.database.query import SQLQuery 13 | from heliotrope.request.base import BaseRequest 14 | from heliotrope.request.hitomi import HitomiRequest 15 | from heliotrope.response import Response 16 | from heliotrope.sanic import Heliotrope 17 | from heliotrope.tasks.mirroring import Mirroring 18 | from heliotrope.view import view 19 | 20 | heliotrope = Sanic("heliotrope") 21 | 22 | # NOTE: Will fixed 23 | heliotrope.blueprint(view) # type: ignore 24 | 25 | 26 | async def setup_heliotrope(heliotrope: Heliotrope) -> None: 27 | if not getenv("IS_TEST"): 28 | init( 29 | dsn=environ["SENTRY_DSN"], 30 | integrations=[SanicIntegration()], 31 | release=f"heliotrope@{__version__}", 32 | ) 33 | heliotrope.config.FORWARDED_SECRET = environ["FORWARDED_SECRET"] 34 | 35 | heliotrope.config.FALLBACK_ERROR_FORMAT = "json" 36 | heliotrope.ctx.nosql_query = NoSQLQuery(environ["MONGO_DB_URL"]) 37 | await Tortoise.init( 38 | db_url=environ["DB_URL"], 39 | modules={"models": ["heliotrope.database.models.hitomi"]}, 40 | ) 41 | await Tortoise.generate_schemas() 42 | heliotrope.ctx.sql_query = SQLQuery() 43 | 44 | 45 | # TODO: Type hint 46 | @heliotrope.main_process_start # type: ignore 47 | async def start(heliotrope: Heliotrope, loop: AbstractEventLoop) -> None: 48 | await setup_heliotrope(heliotrope) 49 | heliotrope.ctx.response = Response() 50 | heliotrope.ctx.hitomi_request = await HitomiRequest.setup() 51 | heliotrope.ctx.base_request = BaseRequest(ClientSession()) 52 | heliotrope.add_task(Mirroring.setup(heliotrope=heliotrope)) 53 | 54 | 55 | # TODO: Type hint 56 | @heliotrope.main_process_stop # type: ignore 57 | async def stop(heliotrope: Heliotrope, loop: AbstractEventLoop) -> None: 58 | await Tortoise.close_connections() 59 | -------------------------------------------------------------------------------- /heliotrope/hitomi/common.py: -------------------------------------------------------------------------------- 1 | # https://ltn.hitomi.la/common.js 2 | 3 | import re 4 | from math import isnan 5 | from typing import Optional 6 | 7 | from heliotrope.hitomi.models import HitomiFiles 8 | 9 | 10 | def subdomain_from_galleryid(g: int, number_of_frontends: int) -> str: 11 | o = g % number_of_frontends 12 | return chr(97 + o) 13 | 14 | 15 | def subdomain_from_url(url: str, base: Optional[str] = None) -> str: 16 | retval = "b" 17 | 18 | if base: 19 | retval = base 20 | 21 | # number_of_frontends = 3 22 | b = 16 23 | 24 | r = re.compile(r"\/[0-9a-f]\/([0-9a-f]{2})\/") 25 | m = r.search(url) 26 | 27 | if not m: 28 | return "a" 29 | 30 | g = int(m[1], b) 31 | 32 | if not isnan(g): 33 | o = 0 34 | if g < 0x80: 35 | o = 1 36 | 37 | if g < 0x40: 38 | o = 2 39 | 40 | # retval = subdomain_from_galleryid(g, number_of_frontends) + retval 41 | retval = chr(97 + o) + retval 42 | 43 | return retval 44 | 45 | 46 | def url_from_url(url: str, base: Optional[str] = None) -> str: 47 | return re.sub( 48 | r"\/\/..?\.hitomi\.la\/", 49 | "//" + subdomain_from_url(url, base) + ".hitomi.la/", 50 | url, 51 | ) 52 | 53 | 54 | def full_path_from_hash(hash: str) -> str: 55 | if len(hash) < 3: 56 | return hash 57 | 58 | return re.sub(r"^.*(..)(.)$", r"\2/\1/" + hash, hash) 59 | 60 | 61 | def url_from_hash( 62 | galleryid: int, 63 | image: HitomiFiles, 64 | dir: Optional[str] = None, 65 | ext: Optional[str] = None, 66 | ) -> str: 67 | ext = ext or dir or image.name.split(".")[-1] 68 | dir = dir or "images" 69 | 70 | return ( 71 | "https://a.hitomi.la/" + dir + "/" + full_path_from_hash(image.hash) + "." + ext 72 | ) 73 | 74 | 75 | def url_from_url_from_hash( 76 | galleryid: int, 77 | image: HitomiFiles, 78 | dir: Optional[str] = None, 79 | ext: Optional[str] = None, 80 | base: Optional[str] = None, 81 | ) -> str: 82 | return url_from_url(url_from_hash(galleryid, image, dir, ext), base) 83 | 84 | 85 | def image_url_from_image(galleryid: int, image: HitomiFiles, no_webp: bool) -> str: 86 | webp = None 87 | if image.hash and image.haswebp and not no_webp: 88 | webp = "webp" 89 | 90 | return url_from_url_from_hash(galleryid, image, webp) 91 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ master ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ master ] 20 | schedule: 21 | - cron: '21 15 * * 0' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | language: [ 'python' ] 32 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 33 | # Learn more: 34 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 35 | 36 | steps: 37 | - name: Checkout repository 38 | uses: actions/checkout@v2 39 | 40 | # Initializes the CodeQL tools for scanning. 41 | - name: Initialize CodeQL 42 | uses: github/codeql-action/init@v1 43 | with: 44 | languages: ${{ matrix.language }} 45 | # If you wish to specify custom queries, you can do so here or in a config file. 46 | # By default, queries listed here will override any specified in a config file. 47 | # Prefix the list here with "+" to use these queries and those in the config file. 48 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 49 | 50 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 51 | # If this step fails, then you should remove it and run the build manually (see below) 52 | - name: Autobuild 53 | uses: github/codeql-action/autobuild@v1 54 | 55 | # ℹ️ Command-line programs to run using the OS shell. 56 | # 📚 https://git.io/JvXDl 57 | 58 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 59 | # and modify them (or add more) to build your code if your project 60 | # uses a compiled language 61 | 62 | #- run: | 63 | # make bootstrap 64 | # make release 65 | 66 | - name: Perform CodeQL Analysis 67 | uses: github/codeql-action/analyze@v1 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Heliotrope 2 | 3 | **WARNING** 4 | As of 5.0.0, this repository is deprecated. 5 | 6 | Go to: https://github.com/saebasol/heliotrope 7 | 8 | > Hitomi.la mirror api 9 | 10 | | Project name | Badge | 11 | | ---------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | 12 | | [Hiyobot](https://github.com/Saebasol/Hiyobot) | [![Code Style](https://img.shields.io/badge/code%20style-black-black)](https://github.com/psf/black) | 13 | | [Heliotrope](https://github.com/Saebasol/Heliotrope) | [![Build Status](https://dev.azure.com/Saebasol/Heliotrope/_apis/build/status/Saebasol.Heliotrope?branchName=master)](https://dev.azure.com/Saebasol/Heliotrope/_build/latest?definitionId=1&branchName=master) [![codecov](https://codecov.io/gh/Saebasol/Heliotrope/branch/master/graph/badge.svg?token=VTL1Z4abB7)](https://codecov.io/gh/Saebasol/Heliotrope) [![Code Style](https://img.shields.io/badge/code%20style-black-black)](https://github.com/psf/black) | 14 | | [Mintchoco](https://github.com/Saebasol/Mintchoco) | [![Code Style](https://img.shields.io/badge/code%20style-black-black)](https://github.com/psf/black) | 15 | 16 | ## Feature 17 | 18 | - Asynchronous 19 | - ORM based 20 | - Galleryinfo, Info auto mirroring 21 | - pixiv.net, hitomi.la image proxying 22 | - Search by title 23 | 24 | ## Deployment environment 25 | 26 | - Ubuntu 20.04.1 LTS 27 | - Mongodb atlas (Using Atlas Search) 28 | - Mariadb 15.1 (Self Hosting) 29 | - Nginx (Reverse Proxing) 30 | - Docker 31 | 32 | ## Docs 33 | 34 | https://github.com/Saebasol/Heliotrope/wiki 35 | -------------------------------------------------------------------------------- /heliotrope/database/query.py: -------------------------------------------------------------------------------- 1 | from asyncio.tasks import gather 2 | from typing import Any, Optional, cast 3 | 4 | from tortoise.contrib.pydantic.creator import pydantic_model_creator 5 | 6 | from heliotrope.database.models.hitomi import File, GalleryInfo, Index, Tag 7 | from heliotrope.hitomi.models import HitomiGalleryinfo 8 | from heliotrope.typing import HitomiGalleryinfoJSON 9 | 10 | 11 | class SQLQuery: 12 | def __init__(self) -> None: 13 | self.pydantic_galleryinfo = pydantic_model_creator(GalleryInfo) 14 | 15 | async def add_galleryinfo(self, hitomi_galleryinfo: HitomiGalleryinfo) -> None: 16 | """ 17 | Add a new galleryinfo to the database. 18 | """ 19 | # Cast to dict[str, Any] because Typeddict is immutable 20 | hitomi_galleryinfo_dict = cast(dict[str, Any], hitomi_galleryinfo.to_dict()) 21 | 22 | files = hitomi_galleryinfo_dict.pop("files") 23 | tags = hitomi_galleryinfo_dict.pop("tags") 24 | galleryinfo_orm_object = await GalleryInfo.create(**hitomi_galleryinfo_dict) 25 | 26 | if files: 27 | # Make File ORM objects 28 | file_orm_object_list = [ 29 | File(**{"index_id": hitomi_galleryinfo.id, **file}) for file in files 30 | ] 31 | # Save File ORM objects 32 | await gather( 33 | *[file_orm_object.save() for file_orm_object in file_orm_object_list] 34 | ) 35 | 36 | # MTM Field so add in galleryinfo.files 37 | await galleryinfo_orm_object.files.add(*file_orm_object_list) 38 | 39 | if tags: 40 | # Make Tag ORM objects 41 | tag_orm_object_list = [ 42 | Tag(**{"index_id": hitomi_galleryinfo.id, **tag}) for tag in tags 43 | ] 44 | # Save Tag ORM objects 45 | await gather( 46 | *[tag_orm_object.save() for tag_orm_object in tag_orm_object_list] 47 | ) 48 | # MTM Field so add in galleryinfo.tags 49 | await galleryinfo_orm_object.tags.add(*tag_orm_object_list) 50 | # Save galleryinfo ORM object 51 | await galleryinfo_orm_object.save() 52 | 53 | async def get_galleryinfo(self, index_id: int) -> Optional[HitomiGalleryinfoJSON]: 54 | if galleryinfo := await GalleryInfo.get_or_none(id=index_id): 55 | galleyinfo_pydantic_model = ( 56 | await self.pydantic_galleryinfo.from_tortoise_orm(galleryinfo) 57 | ) 58 | galleryinfo_json = galleyinfo_pydantic_model.dict( 59 | exclude={ 60 | "files": {"__all__": {"index_id", "id"}}, 61 | "tags": {"__all__": {"index_id", "id"}}, 62 | } 63 | ) 64 | return cast(HitomiGalleryinfoJSON, galleryinfo_json) 65 | 66 | return None 67 | 68 | async def add_index(self, index: int) -> None: 69 | await Index.create(index_id=index) 70 | 71 | async def get_index(self) -> list[int]: 72 | return list( 73 | map(int, await Index.all().values_list("index_id", flat=True)), 74 | ) 75 | 76 | async def get_sorted_index(self) -> list[int]: 77 | return sorted(await self.get_index(), reverse=True) 78 | 79 | async def search_galleryinfo( 80 | self, query: str, offset: int = 0, limit: int = 15, include_files: bool = False 81 | ) -> None: 82 | ... 83 | -------------------------------------------------------------------------------- /heliotrope/request/hitomi.py: -------------------------------------------------------------------------------- 1 | from struct import unpack 2 | from typing import Any, Mapping, Optional, cast 3 | from urllib.parse import urlparse 4 | 5 | from aiohttp.client import ClientSession 6 | from bs4 import BeautifulSoup # type: ignore 7 | 8 | from heliotrope.hitomi.models import HitomiGalleryinfo, HitomiInfo 9 | from heliotrope.request.base import BaseRequest 10 | from heliotrope.typing import HitomiGalleryinfoJSON 11 | 12 | 13 | class HitomiRequest(BaseRequest): 14 | def __init__(self, session: ClientSession): 15 | super().__init__(session) 16 | 17 | @property 18 | def domain(self) -> str: 19 | return "hitomi.la" 20 | 21 | @property 22 | def headers(self) -> dict[str, str]: 23 | return { 24 | "referer": f"https://{self.domain}", 25 | "User-Agent": self.user_agent, 26 | } 27 | 28 | @property 29 | def ltn_url(self) -> str: 30 | return f"https://ltn.{self.domain}" 31 | 32 | @property 33 | def url(self) -> str: 34 | return f"https://{self.domain}" 35 | 36 | @classmethod 37 | async def setup(cls, **kwargs: Any) -> "HitomiRequest": 38 | session = ClientSession(**kwargs) 39 | hitomi_request = cls(session) 40 | hitomi_request.session.headers.update(hitomi_request.headers) 41 | return hitomi_request 42 | 43 | async def get_redirect_url(self, index_id: int) -> Optional[tuple[str, str]]: 44 | response = await self.get(f"{self.url}/galleries/{index_id}.html", "text") 45 | if response.status != 200: 46 | return None 47 | 48 | soup = BeautifulSoup(response.returned, "lxml") 49 | url = cast(Mapping[str, str], soup.find("a", href=True))["href"] 50 | hitomi_type = urlparse(url).path.split("/")[1] 51 | return url, hitomi_type 52 | 53 | async def get_galleyinfo(self, index_id: int) -> Optional[HitomiGalleryinfo]: 54 | response = await self.get(f"{self.ltn_url}/galleries/{index_id}.js", "text") 55 | 56 | if response.status != 200: 57 | return None 58 | 59 | js_to_json = cast( 60 | HitomiGalleryinfoJSON, 61 | str(response.returned).replace("var galleryinfo = ", ""), 62 | ) 63 | return HitomiGalleryinfo(js_to_json) 64 | 65 | async def fetch_index( 66 | self, page: int = 1, item: int = 25, index_file: str = "index-korean.nozomi" 67 | ) -> tuple[int, ...]: 68 | byte_start = (page - 1) * item * 4 69 | byte_end = byte_start + item * 4 - 1 70 | 71 | response = await self.get( 72 | f"{self.ltn_url}/{index_file}", 73 | "read", 74 | headers={ 75 | "Range": f"bytes={byte_start}-{byte_end}", 76 | "origin": f"http://{self.domain}", 77 | }, 78 | ) 79 | 80 | total_items = len(response.returned) // 4 81 | return unpack(f">{total_items}i", bytes(response.returned)) 82 | 83 | async def get_info(self, index_id: int) -> Optional[HitomiInfo]: 84 | if url_hitomi_type_tuple := await self.get_redirect_url(index_id): 85 | url, hitomi_type = url_hitomi_type_tuple 86 | response = await self.get(url, "text") 87 | 88 | if response.status != 200: 89 | return None 90 | 91 | if isinstance(response.returned, bytes): 92 | response.returned = response.returned.decode("utf-8") 93 | 94 | return HitomiInfo(response.returned, hitomi_type) 95 | 96 | return None 97 | -------------------------------------------------------------------------------- /heliotrope/hitomi/parser.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Mapping, cast 4 | 5 | from bs4 import BeautifulSoup # type: ignore 6 | from bs4.element import Tag # type: ignore 7 | 8 | 9 | class HitomiBaseParser: 10 | HITOMI_TYPE_MAPPING = { 11 | "manga": "manga", 12 | "doujinshi": "dj", 13 | "cg": "acg", 14 | "gamecg": "cg", 15 | "anime": "anime", 16 | } 17 | 18 | def __init__(self, html: str, hitomi_type: str) -> None: 19 | self.__html = html 20 | self.__hitomi_type = hitomi_type 21 | 22 | @property 23 | def soup_type(self) -> str: 24 | return self.HITOMI_TYPE_MAPPING[self.__hitomi_type] 25 | 26 | @property 27 | def soup(self) -> BeautifulSoup: 28 | return BeautifulSoup(self.__html, "lxml") 29 | 30 | @property 31 | def gallery_element(self) -> Tag: 32 | gallery_element = self.soup.find( 33 | "div", {"class": f"gallery {self.soup_type}-gallery"} 34 | ) 35 | assert isinstance(gallery_element, Tag) 36 | return gallery_element 37 | 38 | @property 39 | def infos(self) -> list[Tag]: 40 | galleryinfo = self.gallery_element.find("div", {"class": "gallery-info"}) 41 | assert isinstance(galleryinfo, Tag) 42 | return cast(list[Tag], galleryinfo.find_all("tr")) 43 | 44 | 45 | class HitomiTagParser(HitomiBaseParser): 46 | def __init__(self, html: str, hitomi_type: str) -> None: 47 | super().__init__(html, hitomi_type) 48 | 49 | @property 50 | def title_element(self) -> Tag: 51 | title_element = self.gallery_element.find("h1") 52 | assert isinstance(title_element, Tag) 53 | title = title_element.find("a") 54 | assert isinstance(title, Tag) 55 | return title 56 | 57 | @property 58 | def thumbnail_element(self) -> Mapping[str, str]: 59 | picture_element = self.soup.find("picture") 60 | assert isinstance(picture_element, Tag) 61 | img_element = picture_element.find("img") 62 | assert isinstance(img_element, Tag) 63 | return cast(Mapping[str, str], img_element.attrs) 64 | 65 | @property 66 | def artist_element(self) -> list[Tag]: 67 | artist_element = self.soup.find("h2") 68 | assert isinstance(artist_element, Tag) 69 | return cast(list[Tag], artist_element.find_all("a")) 70 | 71 | @property 72 | def group_element(self) -> list[Tag]: 73 | return cast(list[Tag], self.infos[0].find_all("a")) 74 | 75 | @property 76 | def type_element(self) -> Tag: 77 | type_element = self.infos[1].find("a") 78 | assert isinstance(type_element, Tag) 79 | return type_element 80 | 81 | @property 82 | def language_element(self) -> Tag: 83 | language_element = self.infos[2].find("a") 84 | assert isinstance(language_element, Tag) 85 | return language_element 86 | 87 | @property 88 | def series_element(self) -> list[Tag]: 89 | return cast(list[Tag], self.infos[3].find_all("a")) 90 | 91 | @property 92 | def character_element(self) -> list[Tag]: 93 | return cast(list[Tag], self.infos[4].find_all("a")) 94 | 95 | @property 96 | def tags_element(self) -> list[Tag]: 97 | return cast(list[Tag], self.infos[5].find_all("a")) 98 | 99 | @property 100 | def date_element(self) -> Tag: 101 | date_elemment = self.soup.find("span", class_="date") 102 | assert isinstance(date_elemment, Tag) 103 | return date_elemment 104 | -------------------------------------------------------------------------------- /heliotrope/hitomi/models.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any, Iterator, Literal, Optional 4 | 5 | from bs4.element import Tag # type: ignore 6 | 7 | from heliotrope.hitomi.parser import HitomiTagParser 8 | from heliotrope.typing import HitomiFilesJSON, HitomiGalleryinfoJSON, HitomiTagsJSON 9 | 10 | 11 | class HitomiFiles: 12 | """ 13 | Make hitomi files object from json response 14 | """ 15 | 16 | def __init__(self, response: HitomiFilesJSON) -> None: 17 | self.__response = response 18 | 19 | @property 20 | def width(self) -> int: 21 | return self.__response["width"] 22 | 23 | @property 24 | def hash(self) -> str: 25 | return self.__response["hash"] 26 | 27 | @property 28 | def haswebp(self) -> int: 29 | return self.__response["haswebp"] 30 | 31 | @property 32 | def name(self) -> str: 33 | return self.__response["name"] 34 | 35 | @property 36 | def height(self) -> int: 37 | return self.__response["height"] 38 | 39 | @classmethod 40 | def to_generator(cls, files: list[HitomiFilesJSON]) -> Iterator["HitomiFiles"]: 41 | for file in files: 42 | yield cls(file) 43 | 44 | def to_dict(self) -> HitomiFilesJSON: 45 | return { 46 | "width": self.width, 47 | "hash": self.hash, 48 | "haswebp": self.haswebp, 49 | "name": self.name, 50 | "height": self.height, 51 | } 52 | 53 | 54 | class HitomiTags: 55 | """ 56 | Make hitomi tags object from json response 57 | """ 58 | 59 | def __init__(self, response: HitomiTagsJSON) -> None: 60 | self.__response = response 61 | 62 | @property 63 | def male(self) -> Literal["", "1"]: 64 | return self.__response["male"] 65 | 66 | @property 67 | def female(self) -> Literal["", "1"]: 68 | return self.__response["female"] 69 | 70 | @property 71 | def url(self) -> str: 72 | return self.__response["url"] 73 | 74 | @property 75 | def tag(self) -> str: 76 | return self.__response["tag"] 77 | 78 | @classmethod 79 | def to_generator(cls, tags: list[HitomiTagsJSON]) -> Iterator["HitomiTags"]: 80 | for tag in tags: 81 | yield HitomiTags(tag) 82 | 83 | @classmethod 84 | def parse_tags(cls, tag: HitomiTagsJSON) -> dict[str, str]: 85 | return { 86 | "value": f"{'female' if tag['female'] else 'male' if tag['male'] else 'tag'}: {tag['tag']}", 87 | "url": tag["url"], 88 | } 89 | 90 | def to_parse_dict(self) -> dict[str, str]: 91 | return self.parse_tags(self.to_dict()) 92 | 93 | def to_dict(self) -> HitomiTagsJSON: 94 | return { 95 | "male": self.male, 96 | "female": self.female, 97 | "url": self.url, 98 | "tag": self.tag, 99 | } 100 | 101 | 102 | class HitomiGalleryinfo: 103 | """ 104 | Make hitomi galleryinfo object from json response 105 | """ 106 | 107 | def __init__(self, response: HitomiGalleryinfoJSON) -> None: 108 | self.__response = response 109 | 110 | @property 111 | def language_localname(self) -> str: 112 | return self.__response["language_localname"] 113 | 114 | @property 115 | def language(self) -> str: 116 | return self.__response["language"] 117 | 118 | @property 119 | def date(self) -> str: 120 | return self.__response["date"] 121 | 122 | @property 123 | def files(self) -> Iterator[HitomiFiles]: 124 | return HitomiFiles.to_generator(self.__response["files"]) 125 | 126 | @property 127 | def tags(self) -> Iterator[HitomiTags]: 128 | return HitomiTags.to_generator(self.__response["tags"]) 129 | 130 | @property 131 | def japanese_title(self) -> Optional[str]: 132 | return self.__response.get("japanese_title") 133 | 134 | @property 135 | def title(self) -> str: 136 | return self.__response["title"] 137 | 138 | @property 139 | def id(self) -> str: 140 | return self.__response["id"] 141 | 142 | @property 143 | def type(self) -> str: 144 | return self.__response["type"] 145 | 146 | def to_dict(self) -> HitomiGalleryinfoJSON: 147 | return { 148 | "language_localname": self.language_localname, 149 | "language": self.language, 150 | "date": self.date, 151 | "files": [file.to_dict() for file in self.files], 152 | "tags": [tag.to_dict() for tag in self.tags], 153 | "japanese_title": self.japanese_title, 154 | "title": self.title, 155 | "id": self.id, 156 | "type": self.type, 157 | } 158 | 159 | 160 | # NOTE: value, url 객체로 나누는것도 한번 생각해볼만함 161 | class HitomiInfo: 162 | def __init__(self, html: str, hitomi_type: str) -> None: 163 | # 상속해서 쓰니까 지저분했음 164 | self.__parser = HitomiTagParser(html, hitomi_type) 165 | 166 | def __parse_list_element(self, elements: list[Tag]) -> list[dict[str, str]]: 167 | return [ 168 | {"value": element.text, "url": str(element.attrs["href"])} 169 | for element in elements 170 | ] 171 | 172 | def __parse_single_element(self, elements: Tag) -> Optional[dict[str, str]]: 173 | if not elements: 174 | return None 175 | return { 176 | "value": elements.text.replace(" ", "").replace("\n", ""), 177 | "url": str(elements.attrs["href"]), 178 | } 179 | 180 | @property 181 | def title(self) -> str: 182 | return str(self.__parser.title_element.text) 183 | 184 | @property 185 | def thumbnail(self) -> str: 186 | return self.__parser.thumbnail_element["src"] 187 | 188 | @property 189 | def artist(self) -> list[dict[str, str]]: 190 | return self.__parse_list_element(self.__parser.artist_element) 191 | 192 | @property 193 | def group(self) -> list[dict[str, str]]: 194 | return self.__parse_list_element(self.__parser.group_element) 195 | 196 | @property 197 | def type(self) -> Optional[dict[str, str]]: 198 | return self.__parse_single_element(self.__parser.type_element) 199 | 200 | @property 201 | def language(self) -> Optional[dict[str, str]]: 202 | return self.__parse_single_element(self.__parser.language_element) 203 | 204 | @property 205 | def series(self) -> list[dict[str, str]]: 206 | return self.__parse_list_element(self.__parser.series_element) 207 | 208 | @property 209 | def character(self) -> list[dict[str, str]]: 210 | return self.__parse_list_element(self.__parser.character_element) 211 | 212 | @property 213 | def tags(self) -> list[dict[str, str]]: 214 | return self.__parse_list_element(self.__parser.tags_element) 215 | 216 | @property 217 | def date(self) -> str: 218 | return str(self.__parser.date_element.text) 219 | 220 | def to_dict(self) -> Any: 221 | return { 222 | "title": self.title, 223 | "thumbnail": self.thumbnail, 224 | "artist": self.artist, 225 | "group": self.group, 226 | "type": self.type, 227 | "language": self.language, 228 | "series": self.series, 229 | "character": self.character, 230 | "tags": self.tags, 231 | "date": self.date, 232 | } 233 | --------------------------------------------------------------------------------