├── .env ├── src ├── conftest.py └── retaggr │ ├── engines │ ├── saucenao │ │ ├── __init__.py │ │ ├── handlers │ │ │ ├── __init__.py │ │ │ ├── yandere.py │ │ │ ├── konachan.py │ │ │ ├── danbooru.py │ │ │ ├── base.py │ │ │ ├── gelbooru.py │ │ │ └── e621.py │ │ └── engine.py │ ├── __init__.py │ ├── dummy.py │ ├── paheal.py │ ├── danbooru.py │ ├── base.py │ └── iqdb.py │ ├── errors.py │ ├── __init__.py │ ├── config.py │ ├── aiohttp_requests │ └── __init__.py │ └── core.py ├── .github └── FUNDING.yml ├── .coveralls.yml ├── requirements.txt ├── .coveragerc ├── test-requirements.txt ├── docs ├── usage.rst ├── config.rst ├── developer.rst ├── core.rst ├── tests.rst ├── exceptions.rst ├── Makefile ├── api.rst ├── index.rst ├── make.bat ├── handlers.rst ├── engines.rst ├── dev-handler.rst ├── dev-engine.rst ├── conf.py └── user.rst ├── .travis.yml ├── test.example.sh ├── .vscode ├── settings.json └── tasks.json ├── tests ├── test_config.py ├── test_engine.py ├── test_saucenao.py └── test_core.py ├── setup.py ├── Makefile ├── README.md ├── .gitignore ├── CHANGELOG.md └── LICENSE /.env: -------------------------------------------------------------------------------- 1 | PYTHONPATH=src -------------------------------------------------------------------------------- /src/conftest.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | patreon: noirscape -------------------------------------------------------------------------------- /.coveralls.yml: -------------------------------------------------------------------------------- 1 | service_name: travis-ci 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | fake-useragent 3 | requests 4 | aiohttp -------------------------------------------------------------------------------- /src/retaggr/engines/saucenao/__init__.py: -------------------------------------------------------------------------------- 1 | from .engine import SauceNao -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = src/retaggr/aiohttp_requests/__init__.py 3 | -------------------------------------------------------------------------------- /test-requirements.txt: -------------------------------------------------------------------------------- 1 | pytest-cov 2 | pytest-asyncio 3 | pytest 4 | coveralls -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ======= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | user 8 | developer -------------------------------------------------------------------------------- /docs/config.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: retaggr 2 | 3 | ReverseSearchConfig 4 | ===================== 5 | 6 | .. autoclass:: ReverseSearchConfig 7 | :members: 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | before_script: 3 | - pip install -r test-requirements.txt -r requirements.txt 4 | script: make clean-test 5 | after_success: coveralls 6 | -------------------------------------------------------------------------------- /test.example.sh: -------------------------------------------------------------------------------- 1 | export DANBOORU_USERNAME="" 2 | export DANBOORU_API_KEY="" 3 | export E621_USERNAME="" 4 | export APP_NAME="" 5 | export APP_VERSION="" 6 | make test 7 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "restructuredtext.confPath": "${workspaceFolder}/docs", 3 | "python.envFile": "${workspaceFolder}/.env", 4 | "python.pythonPath": "/usr/bin/python" 5 | } -------------------------------------------------------------------------------- /docs/developer.rst: -------------------------------------------------------------------------------- 1 | Developer reference 2 | ===================== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | :caption: Developer reference: 7 | 8 | dev-engine 9 | dev-handler 10 | tests -------------------------------------------------------------------------------- /src/retaggr/engines/saucenao/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | from .danbooru import DanbooruHandler 2 | from .gelbooru import GelbooruHandler 3 | from .e621 import E621Handler 4 | from .konachan import KonachanHandler 5 | from .yandere import YandereHandler -------------------------------------------------------------------------------- /src/retaggr/engines/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Engine 2 | from .base import ImageResult 3 | 4 | from .danbooru import Danbooru 5 | from .iqdb import Iqdb 6 | from .paheal import Paheal 7 | from .saucenao import SauceNao 8 | from .dummy import Dummy -------------------------------------------------------------------------------- /docs/core.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: retaggr 2 | 3 | ReverseSearch 4 | =============== 5 | 6 | 7 | Core Class 8 | ------------ 9 | .. autoclass:: ReverseSearch 10 | :members: 11 | 12 | Answer Class 13 | -------------- 14 | .. autoclass:: ReverseResult -------------------------------------------------------------------------------- /docs/tests.rst: -------------------------------------------------------------------------------- 1 | Running the tests 2 | =================== 3 | 4 | Running the tests requires accounts at Danbooru and E621. Both of these can be created for free at their respective sites. 5 | 6 | To run the tests, copy `test.example.sh` to `test.sh` and fill out the variables in it. Then run `test.sh`. 7 | -------------------------------------------------------------------------------- /docs/exceptions.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: retaggr.errors 2 | 3 | Exceptions 4 | ============ 5 | 6 | .. autoclass:: MissingAPIKeysException 7 | :members: 8 | 9 | .. autoclass:: NotAValidEngineException 10 | :members: 11 | 12 | .. autoclass:: NotAvailableSearchException 13 | :members: 14 | 15 | .. autoclass:: EngineCooldownException 16 | :members: 17 | 18 | .. autoclass:: EngineIsDown 19 | :members: -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import retaggr 3 | 4 | # Logging 5 | import logging 6 | logging.basicConfig(level=logging.DEBUG) 7 | 8 | def test_create_empty_config(): 9 | config = retaggr.ReverseSearchConfig() 10 | assert config.__dict__ == {} 11 | 12 | def test_create_config_with_variable(): 13 | config = retaggr.ReverseSearchConfig(app_name="py.test") 14 | assert config.__dict__ == {"app_name": "py.test"} -------------------------------------------------------------------------------- /src/retaggr/errors.py: -------------------------------------------------------------------------------- 1 | class MissingAPIKeysException(Exception): 2 | """Raised if a required API key to search an engine is missing.""" 3 | pass 4 | 5 | class NotAValidEngineException(Exception): 6 | """Raised if the passed in engine does not exist.""" 7 | 8 | class NotAvailableSearchException(Exception): 9 | """This engine is not capable of searching this option.""" 10 | 11 | class EngineCooldownException(Exception): 12 | """This engine is on cooldown.""" 13 | 14 | class EngineIsDown(Exception): 15 | """The engine is currently not available (eg. Database issues).""" 16 | -------------------------------------------------------------------------------- /src/retaggr/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import ReverseSearch, ReverseResult 2 | from .config import ReverseSearchConfig 3 | from .errors import MissingAPIKeysException, NotAValidEngineException, NotAvailableSearchException, EngineCooldownException, EngineIsDown 4 | from .engines import ImageResult 5 | 6 | from collections import namedtuple 7 | 8 | VersionInfo = namedtuple('VersionInfo', 'major minor micro releaselevel serial') 9 | version_info = VersionInfo(major=3, minor=1, micro=0, releaselevel="final", serial=0) 10 | 11 | __version__ = "{}.{}.{}".format(version_info.major, version_info.minor, version_info.micro) -------------------------------------------------------------------------------- /src/retaggr/engines/dummy.py: -------------------------------------------------------------------------------- 1 | from retaggr.engines.base import Engine, ImageResult 2 | 3 | # External modules 4 | import asyncio 5 | import requests as fuck_aiohttp 6 | import functools 7 | 8 | class Dummy(Engine): 9 | """A dummy engine that's only useful for testing. 10 | """ 11 | host = "https://danbooru.donmai.us" 12 | download_required = False 13 | 14 | def __init__(self, fail): 15 | self.fail = fail 16 | 17 | async def search_image(self, url): 18 | if self.fail: 19 | raise Exception("Task failed (intentional)") 20 | else: 21 | return ImageResult(["test"], ["test"], "safe") -------------------------------------------------------------------------------- /src/retaggr/engines/saucenao/handlers/yandere.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import functools 3 | 4 | import requests as fuck_aiohttp 5 | 6 | from .base import SauceNaoHandler 7 | 8 | class YandereHandler(SauceNaoHandler): 9 | engine_id = 12 10 | """""" 11 | 12 | tag_capable = True 13 | """""" 14 | 15 | source_capable = False 16 | """""" 17 | 18 | async def get_tag_data(self, data): 19 | loop = asyncio.get_event_loop() 20 | r = await loop.run_in_executor(None, functools.partial(fuck_aiohttp.get, "https://yande.re/post.json", params={"tags": "id:" + str(data["yandere_id"])})) 21 | j = r.json() 22 | return set(j[0]["tags"].split()) -------------------------------------------------------------------------------- /src/retaggr/engines/saucenao/handlers/konachan.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import functools 3 | 4 | import requests as fuck_aiohttp 5 | 6 | from .base import SauceNaoHandler 7 | 8 | class KonachanHandler(SauceNaoHandler): 9 | engine_id = 26 10 | """""" 11 | 12 | tag_capable = True 13 | """""" 14 | 15 | source_capable = False 16 | """""" 17 | 18 | 19 | async def get_tag_data(self, data): 20 | loop = asyncio.get_event_loop() 21 | r = await loop.run_in_executor(None, functools.partial(fuck_aiohttp.get, "http://konachan.com/post.json", params={"tags": "id:" + str(data["konachan_id"])})) 22 | j = r.json() 23 | return set(j[0]["tags"].split()) -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | =============== 3 | 4 | The following pages lay out the API of retaggr. 5 | 6 | Version reference 7 | ------------------- 8 | 9 | To check the installed retaggr version, there are two variables you can use. 10 | 11 | retaggr follows semantic versioning. Older releases are considered unsupported and will be yanked if they contain major issues. 12 | 13 | .. data:: version_info 14 | 15 | A named tuple similar to :py:obj:`sys.version_info`. 16 | 17 | .. data:: __version__ 18 | 19 | A string representation of the major, minor and micro version. eg. ``"1.2.0"``. 20 | 21 | Table of contents 22 | ------------------- 23 | 24 | .. toctree:: 25 | :maxdepth: 2 26 | :caption: API reference: 27 | 28 | core 29 | config 30 | engines 31 | exceptions 32 | handlers 33 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | // See https://go.microsoft.com/fwlink/?LinkId=733558 3 | // for the documentation about the tasks.json format 4 | "version": "2.0.0", 5 | "tasks": [ 6 | { 7 | "label": "Run all tests", 8 | "type": "shell", 9 | "command": "./test.sh", 10 | "group": "test" 11 | }, 12 | { 13 | "label": "Build a distributable installation", 14 | "type": "shell", 15 | "command": "source venv/bin/activate && python3 setup.py sdist bdist_wheel", 16 | "group": "build", 17 | "problemMatcher": [] 18 | }, 19 | { 20 | "label": "Compile docs", 21 | "type": "shell", 22 | "command": "cd docs && make html", 23 | "problemMatcher": [] 24 | } 25 | ] 26 | } -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. retaggr documentation master file, created by 2 | sphinx-quickstart on Sun Aug 18 23:08:51 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to retaggr's documentation! 7 | ================================================ 8 | 9 | Library to reverse search various boorus. 10 | 11 | .. code-block:: python 12 | 13 | from retaggr import ReverseSearch, ReverseSearchConfig 14 | config = ReverseSearchConfig(min_score=80.0) 15 | rsearch = ReverseSearch(config) 16 | 17 | Check the :ref:`user` page for more. 18 | 19 | .. toctree:: 20 | :maxdepth: 3 21 | :caption: Contents: 22 | 23 | usage 24 | api 25 | 26 | 27 | Indices and tables 28 | ================== 29 | 30 | * :ref:`genindex` 31 | * :ref:`modindex` 32 | * :ref:`search` -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /src/retaggr/engines/saucenao/handlers/danbooru.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import functools 3 | 4 | import requests as fuck_aiohttp 5 | 6 | from .base import SauceNaoHandler 7 | 8 | class DanbooruHandler(SauceNaoHandler): 9 | engine_id = 9 10 | """""" 11 | 12 | tag_capable = True 13 | """""" 14 | 15 | source_capable = True 16 | """""" 17 | 18 | 19 | async def get_tag_data(self, data): 20 | loop = asyncio.get_event_loop() 21 | r = await loop.run_in_executor(None, functools.partial(fuck_aiohttp.get, "https://danbooru.donmai.us/posts/" + str(data["danbooru_id"]) + ".json")) 22 | j = r.json() 23 | return set(j["tag_string"].split()) 24 | 25 | async def get_source_data(self, data): 26 | loop = asyncio.get_event_loop() 27 | r = await loop.run_in_executor(None, functools.partial(fuck_aiohttp.get, "https://danbooru.donmai.us/posts/" + str(data["danbooru_id"]) + ".json")) 28 | j = r.json() 29 | return set([j["source"]]) -------------------------------------------------------------------------------- /src/retaggr/engines/saucenao/handlers/base.py: -------------------------------------------------------------------------------- 1 | from typing import Set 2 | 3 | class SauceNaoHandler: 4 | """Base class to handle SauceNao engine results. 5 | """ 6 | engine_id = None 7 | """The engine ID. Engine IDs can be located on https://saucenao.com/status.html.""" 8 | 9 | tag_capable = False 10 | """This determines if the Handler has the ability to retrieve tags. 11 | 12 | If this is false, :meth:`get_tag_data` may not necessarily exist.""" 13 | 14 | source_capable = False 15 | """This determines if the Handler has the ability to retrieve additional source data. 16 | 17 | If this is false, :meth:`get_source_data` may not necessarily exist.""" 18 | 19 | async def get_tag_data(self, data) -> Set[str]: # pragma: no cover 20 | """Get all the tags matching the supplied data.""" 21 | pass 22 | 23 | async def get_source_data(self, data) -> Set[str]: # pragma: no cover 24 | """Extract the source from the supplied data.""" 25 | pass -------------------------------------------------------------------------------- /docs/handlers.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: retaggr.engines.saucenao.handlers 2 | 3 | Handlers 4 | ========= 5 | 6 | The following is a list of all dedicated handlers for the Saucenao API. 7 | 8 | A handler provides extra capabilities for retrieving source information. 9 | 10 | **The handlers are not considered a part of the public API and as a result do not follow semantic versioning. They are documented here for development purposes.** 11 | 12 | Danbooru 13 | ---------- 14 | 15 | .. autoclass:: DanbooruHandler 16 | :members: 17 | 18 | 19 | Gelbooru 20 | ----- 21 | 22 | .. autoclass:: GelbooruHandler 23 | :members: 24 | 25 | 26 | e621 27 | ----- 28 | 29 | .. autoclass:: E621Handler 30 | :members: 31 | 32 | 33 | Konachan 34 | --------- 35 | 36 | .. autoclass:: KonachanHandler 37 | :members: 38 | 39 | Yandere 40 | --------- 41 | 42 | .. autoclass:: YandereHandler 43 | :members: 44 | 45 | Base 46 | ------ 47 | 48 | .. autoclass:: retaggr.engines.saucenao.handlers.base.SauceNaoHandler 49 | :members: 50 | -------------------------------------------------------------------------------- /docs/engines.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: retaggr 2 | 3 | Engines 4 | ======== 5 | 6 | This file documents some general information about the engines you can search. 7 | 8 | You generally shouldn't instantiate these classes yourself, rather you should use them as a reference. 9 | 10 | ImageResult 11 | ------------ 12 | 13 | .. autoclass:: retaggr.engines.ImageResult 14 | 15 | Danbooru 16 | ---------- 17 | 18 | .. autoclass:: retaggr.engines.Danbooru 19 | :members: 20 | 21 | Iqdb 22 | ------ 23 | 24 | .. autoclass:: retaggr.engines.Iqdb 25 | :members: 26 | 27 | Paheal 28 | ------ 29 | 30 | .. autoclass:: retaggr.engines.Paheal 31 | :members: 32 | 33 | 34 | SauceNao 35 | -------- 36 | 37 | .. autoclass:: retaggr.engines.SauceNao 38 | :members: 39 | 40 | Base 41 | ------ 42 | 43 | This class is the base class for all engines that exist in the application. The attributes and methods listed here should exist in some form on 44 | all the previous classes. 45 | 46 | .. autoclass:: retaggr.engines.Engine 47 | :members: 48 | -------------------------------------------------------------------------------- /src/retaggr/engines/saucenao/handlers/gelbooru.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import functools 3 | 4 | import requests as fuck_aiohttp 5 | 6 | from .base import SauceNaoHandler 7 | 8 | class GelbooruHandler(SauceNaoHandler): 9 | engine_id = 25 10 | """""" 11 | 12 | tag_capable = True 13 | """""" 14 | 15 | source_capable = True 16 | """""" 17 | 18 | 19 | async def get_tag_data(self, data): 20 | loop = asyncio.get_event_loop() 21 | r = await loop.run_in_executor(None, functools.partial(fuck_aiohttp.get, "https://gelbooru.com/index.php?page=dapi&s=post&q=index&json=1&id=" + str(data["gelbooru_id"]))) 22 | j = r.json()[0] 23 | return set(j["tags"].split()) 24 | 25 | async def get_source_data(self, data): 26 | loop = asyncio.get_event_loop() 27 | r = await loop.run_in_executor(None, functools.partial(fuck_aiohttp.get, "https://gelbooru.com/index.php?page=dapi&s=post&q=index&json=1&id=" + str(data["gelbooru_id"]))) 28 | j = r.json()[0] 29 | return set([j["source"]]) 30 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import subprocess 3 | 4 | setup( 5 | name="retaggr", 6 | version="3.1.0", 7 | url="https://github.com/noirscape/retaggr", 8 | license="LGPLv3", 9 | description="Reverse image searching utility for images.", 10 | long_description=open("README.md").read(), 11 | long_description_content_type='text/markdown', 12 | author="Valentijn 'noirscape' V.", 13 | author_email="neko@catgirlsin.space", 14 | classifiers=[ 15 | "Development Status :: 5 - Production/Stable", 16 | "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)", 17 | "Programming Language :: Python :: 3", 18 | ], 19 | keywords="reverse image search booru", 20 | project_urls={ 21 | "Source": "https://github.com/noirscape/retaggr", 22 | "Tracker": "https://github.com/noirscape/retaggr/issues", 23 | "Documentation": "https://retaggr.rtfd.org" 24 | }, 25 | packages=find_packages('src'), 26 | package_dir={'':'src',}, 27 | install_requires=[ 28 | "lxml", 29 | "fake-useragent", 30 | "requests", 31 | "aiohttp" 32 | ] 33 | ) 34 | -------------------------------------------------------------------------------- /src/retaggr/config.py: -------------------------------------------------------------------------------- 1 | class ReverseSearchConfig: 2 | """ 3 | Configuration object for :class:`ReverseSearch` 4 | 5 | All parameters are prefixed with the specific type needed for reverse searching. 6 | 7 | Check the relevant class for the engine for details on the engine. 8 | 9 | :param danbooru_username: Username on :class:`Danbooru` 10 | :type danbooru_username: str 11 | :param danbooru_api_key: API key on :class:`Danbooru` 12 | :type danbooru_api_key: 13 | :param e621_username: Your :class:`E621` username 14 | :type e621_username: str 15 | 16 | :param app_name: The name of your application (required for the e621 handler in :class:`SauceNao`). 17 | :type app_name: str 18 | :param version: The version of your application (required for the e621 handler in :class:`SauceNao`). 19 | :type version: float 20 | 21 | :param saucenao_api_key: An API key for :class:`SauceNao`. 22 | :type saucenao_api_key: str 23 | 24 | :param min_score: Minimum search match percentage needed (required for ALL boorus except :class:`Paheal` and :class:`SauceNao`). 25 | :type min_score: float 26 | 27 | :param skip_iqdb: Don't instantiate the :class:`IQDB` class. 28 | :type skip_iqdb: bool 29 | """ 30 | def __init__(self, **kwargs): 31 | self.__dict__.update(kwargs) -------------------------------------------------------------------------------- /src/retaggr/engines/paheal.py: -------------------------------------------------------------------------------- 1 | from retaggr.engines.base import Engine, ImageResult 2 | from retaggr.errors import NotAvailableSearchException 3 | 4 | # External imports 5 | import hashlib 6 | from retaggr.aiohttp_requests import requests 7 | import xml.etree.ElementTree as ET 8 | 9 | class Paheal(Engine): 10 | """Reverse searches https://rule34.paheal.net for images. 11 | 12 | This booru does require images to be downloaded before searching. 13 | """ 14 | host = None 15 | download_required = True 16 | 17 | def __init__(self): 18 | pass 19 | 20 | async def search_image(self, url): 21 | tags = [] 22 | source = [] 23 | 24 | m = hashlib.md5() 25 | r = await requests.get(url) 26 | async for data in r.content.iter_chunked(8192): 27 | m.update(data) 28 | md5_hash = m.hexdigest() 29 | paheal_request = await requests.get(f"http://rule34.paheal.net/api/danbooru/find_posts?md5={md5_hash}") 30 | xml_tree = ET.fromstring(await paheal_request.text()) 31 | 32 | for post in xml_tree: 33 | for tag in post.attrib["tags"].split(): 34 | tags.append(tag.lower()) 35 | source.append(post.attrib["source"]) 36 | return ImageResult(tags, source, None) 37 | 38 | async def search_tag(self, tag): 39 | """Reverse search the booru for tag data. 40 | """ 41 | raise NotAvailableSearchException("This engine cannot search tags.") -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build test clean-build dist 2 | .DEFAULT_GOAL := help 3 | 4 | bold:= $(shell tput bold) 5 | sgr0:= $(shell tput sgr0) 6 | 7 | pyc-clean: ## Remove cython compilation files from the source directories 8 | @printf "$(bold)Removing __pycache__, .pyc and .pyo files$(sgr0)\n" 9 | find . | grep --extended-regexp '(__pycache__|\.pyc|\.pyo$$)' | xargs rm --force --recursive 10 | 11 | build-clean: ## Remove the build directories 12 | @printf "$(bold)Cleaning up build directories$(sgr0)\n" 13 | rm --force --recursive build/ 14 | rm --force --recursive dist/ 15 | rm --force --recursive src/*.egg-info 16 | 17 | build: ## Build the package for distribution 18 | @printf "$(bold)Building package$(sgr0)\n" 19 | python3 setup.py sdist bdist_wheel 20 | 21 | clean: build-clean pyc-clean ## Run all clean steps 22 | 23 | clean-build: clean build ## Remove the build directory and build the package 24 | 25 | test: ## Run the tests 26 | @printf "$(bold)Running tests$(sgr0)\n" 27 | py.test --cov=src --cov-config=.coveragerc -W ignore::DeprecationWarning 28 | 29 | clean-test: clean test ## Cleanup execution files, then run the tests 30 | 31 | dist: clean build ## Remove build directories, build the package, and run twine 32 | @printf "$(bold)Uploading distribution$(sgr0)\n" 33 | twine upload dist/* 34 | 35 | docs: clean 36 | @printf "$(bold)Building docs$(sgr0)\n" 37 | cd docs && $(MAKE) html coverage 38 | 39 | help: 40 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## retaggr 2 | [![Coverage Status](https://coveralls.io/repos/github/noirscape/retaggr/badge.svg?branch=master)](https://coveralls.io/github/noirscape/retaggr?branch=master) [![GitHub license](https://img.shields.io/github/license/noirscape/retaggr)](https://github.com/noirscape/retaggr/blob/master/LICENSE) [![Build Status](https://travis-ci.org/noirscape/retaggr.svg?branch=master)](https://travis-ci.org/noirscape/retaggr) 3 | [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fnoirscape%2Fretaggr.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2Fnoirscape%2Fretaggr?ref=badge_shield) 4 | 5 | Library to reverse search various boorus. 6 | 7 | See the documentation for details. 8 | 9 | ## Example 10 | 11 | ```py 12 | from retaggr import ReverseSearch, ReverseSearchConfig 13 | config = ReverseSearchConfig(min_score=80.0) 14 | rsearch = ReverseSearch(config) 15 | result = asyncio.run(rsearch.reverse_search("https://danbooru.donmai.us/data/__tsukumo_benben_touhou_drawn_by_elise_piclic__6e6da59922b923391f02ba1ce78f9b42.jpg")) 16 | ``` 17 | 18 | ## Licensing 19 | 20 | This project is under the GNU LGPLv3 license. 21 | 22 | In addition, this project contains a local copy of the aiohttp_requests package (this is to resolve a minor dependency pinning problem aiohttp_requests on pip has). This library is under the MIT. Check the header of the init file for the license. 23 | 24 | [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fnoirscape%2Fretaggr.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fnoirscape%2Fretaggr?ref=badge_large) -------------------------------------------------------------------------------- /src/retaggr/engines/danbooru.py: -------------------------------------------------------------------------------- 1 | from retaggr.engines.base import Engine, ImageResult 2 | 3 | # External modules 4 | import asyncio 5 | import requests as fuck_aiohttp 6 | import functools 7 | 8 | class Danbooru(Engine): 9 | """Reverse searches https://danbooru.donmai.us for images. 10 | 11 | This booru does not required images to be downloaded before searching. 12 | 13 | :param username: The danbooru username you wish to use. 14 | :type username: str 15 | :param api_key: A danbooru API key. You can obtain one from your profile. 16 | :type api_key: str 17 | :param min_score: Minimum search match percentage needed. 18 | :type min_score: float 19 | """ 20 | host = "https://danbooru.donmai.us" 21 | download_required = False 22 | 23 | def __init__(self, username, api_key, min_score): 24 | self.min_score = min_score 25 | self.username = username 26 | self.api_key = api_key 27 | 28 | async def search_image(self, url): 29 | tags = [] 30 | source = [] 31 | rating = None 32 | 33 | iqdb_url = self.host + "/iqdb_queries.json" 34 | loop = asyncio.get_event_loop() 35 | r = await loop.run_in_executor(None, functools.partial(fuck_aiohttp.get, iqdb_url, params={"url":url}, auth=(self.username, self.api_key))) 36 | json = r.json() 37 | if 'success' in json: 38 | if not json['success']: # pragma: no cover 39 | return ImageResult(tags, source, rating) 40 | 41 | if len(json) > 0: 42 | if json[0]['score'] > self.min_score: 43 | tags = json[0]["post"]["tag_string"].split() 44 | source.append(json[0]["post"]["source"]) 45 | rating = json[0]["post"]["rating"] 46 | return ImageResult(tags, source, rating) -------------------------------------------------------------------------------- /src/retaggr/engines/saucenao/handlers/e621.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import functools 3 | import time 4 | 5 | import requests as fuck_aiohttp 6 | 7 | from .base import SauceNaoHandler 8 | from retaggr.aiohttp_requests import requests 9 | 10 | class E621Handler(SauceNaoHandler): 11 | engine_id = 29 12 | """""" 13 | 14 | tag_capable = True 15 | """""" 16 | 17 | source_capable = True 18 | """""" 19 | 20 | 21 | last_request = None 22 | 23 | def __init__(self, username, app_name, version): 24 | self.user_agent = {"User-Agent": f"{app_name}/{version} (by {username} on e621)"} 25 | 26 | async def rate_limit_wait(self): 27 | if self.last_request is not None: # pragma: no cover 28 | current_time = time.time() 29 | if current_time == self.last_request: 30 | asyncio.sleep(1) 31 | 32 | async def get_tag_data(self, data): 33 | await self.rate_limit_wait() 34 | r = await requests.get("https://e621.net/posts.json", headers=self.user_agent, params={"tags": "id:" + str(data["e621_id"])}) 35 | j = await r.json() 36 | self.last_request = time.time() 37 | 38 | tags = set() 39 | for category in j["posts"][0]["tags"]: 40 | for tag in j["posts"][0]["tags"][category]: 41 | tags.add(tag) 42 | 43 | return tags 44 | 45 | async def get_source_data(self, data): 46 | await self.rate_limit_wait() 47 | r = await requests.get("https://e621.net/posts.json", headers=self.user_agent, params={"tags": "id:" + str(data["e621_id"])}) 48 | j = await r.json() 49 | self.last_request = time.time() 50 | 51 | sources = set() 52 | for source in j["posts"][0]["sources"]: 53 | sources.add(source) 54 | 55 | return sources 56 | -------------------------------------------------------------------------------- /src/retaggr/engines/base.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import asyncio 3 | from collections import namedtuple 4 | 5 | ImageResult = namedtuple("ImageResult", ["tags", "source", "rating"]) 6 | """The response from the engine. 7 | 8 | .. py:attribute:: tags 9 | 10 | The tags the engine has located. 11 | 12 | .. py:attribute:: source 13 | 14 | The sources that have been found for the image. 15 | 16 | .. py:attribute:: rating 17 | 18 | The rating on the image. 19 | 20 | """ 21 | 22 | 23 | class Engine: 24 | """Base class for an engine. 25 | 26 | All the engine classes must derive from this class. 27 | 28 | :ivar host: The base URL for the reverse image domain. This is not an API endpoint, but can be a link to IQDB or something similar. 29 | :vartype host: str 30 | :ivar download_required: Determines if the ``search_image()`` function will download the image to be searched beforehand or not. 31 | :vartype download_required: bool 32 | :ivar datetime.datetime ~.last_request: Optional. Instance variable that can be set to make use of :meth:`Booru.sleep_until_ratelimit`. Update after making a request. If you just use the library, this should be done by the engine so there's no need to manually set it. 33 | """ 34 | host = None 35 | download_required = False 36 | 37 | def __init__(self): # pragma: no cover 38 | self.last_request = None 39 | raise NotImplementedError("Expand this method to include all needed keys.") 40 | 41 | async def search_tag(self, tag): # pragma: no cover 42 | """Reverse search the booru for tag data. 43 | """ 44 | raise NotImplementedError("Expand this method to include the logic needed to reverse search.") 45 | 46 | async def search_image(self, url): # pragma: no cover 47 | """Reverse search the engine for ``url``. 48 | 49 | :param str url: URL to search 50 | :rtype: ImageResult 51 | """ 52 | raise NotImplementedError("Expand this method to include the logic needed to reverse search.") -------------------------------------------------------------------------------- /docs/dev-handler.rst: -------------------------------------------------------------------------------- 1 | SauceNao Handler Guidelines 2 | ============================= 3 | 4 | This document details the rough process of adding new handlers to SauceNao 5 | 6 | Step 1: Locate the engine ID 7 | ------------------------------ 8 | 9 | A list of all the engine IDs can be located at https://saucenao.com/status.html . Make sure to note it down. 10 | 11 | Step 2: Copy the base class 12 | ----------------------------- 13 | 14 | A base interface class can be found in the handlers folder (`base.py`). 15 | Copy it and rename the file and class to your new engine. 16 | The rule is that each handler is suffixed with Handler and the name before that must be descriptive of the handler. 17 | 18 | Step 3: Implement the methods 19 | ------------------------------- 20 | 21 | Assuming your handler does not need additional API keys or such, simply implement the :meth:`get_tag_data` and :meth:`get_source_data` methods. 22 | 23 | These methods receive the subset of the result data that is relevant. Usually it is possible to locate an ID of some sorts that can be used to retrieve additional information. 24 | 25 | Step 3a: Additional API keys 26 | ------------------------------ 27 | 28 | If you need additional API keys, follow the instructions on how to change the config in the adding a new engine section of the documentation. The config entry does not have to be changed for this to function. 29 | 30 | Step 4: Writing tests 31 | ----------------------- 32 | 33 | Write a single test for your handler. Mock the relevant input data that your handler needs to function. 34 | 35 | Step 5: Add it to the SauceNao engine 36 | --------------------------------------- 37 | 38 | As written. If you don't need extra API keys, just add an instance to the :attr:`handlers` dictionary that is created at instantiation time. The key is the engine ID. 39 | 40 | Step 5a: Additional API keys 41 | ------------------------------- 42 | 43 | If you have additional API keys, don't add an instance to the :attr:`handlers` attribute, but instead write an additional method that adds the instance to the dictionary. Then modify the core to use this activation method. 44 | 45 | Step 6: PR the changes 46 | ------------------------ 47 | 48 | As it says on the tin. Specify any new environment variables that are required for CI to work in your PR. 49 | -------------------------------------------------------------------------------- /tests/test_engine.py: -------------------------------------------------------------------------------- 1 | import retaggr 2 | import retaggr.engines as engines 3 | import os 4 | import pytest 5 | import time 6 | 7 | # Logging 8 | import logging 9 | logging.basicConfig(level=logging.DEBUG) 10 | 11 | # Grab the relevant keys from the environment 12 | danbooru_username = os.environ.get('DANBOORU_USERNAME', None) 13 | danbooru_api_key = os.environ.get('DANBOORU_API_KEY', None) 14 | e621_username = os.environ.get('E621_USERNAME', None) 15 | app_name = os.environ.get('APP_NAME', None) 16 | version = os.environ.get('APP_VERSION', None) 17 | if not all([danbooru_username, danbooru_api_key, e621_username, app_name, version]): 18 | raise ValueError("Missing Environment variables") 19 | 20 | @pytest.mark.asyncio 21 | async def test_danbooru(): 22 | engine = engines.danbooru.Danbooru(danbooru_username, danbooru_api_key, 80.0) 23 | result = await engine.search_image("https://danbooru.donmai.us/data/__tsukumo_benben_touhou_drawn_by_elise_piclic__6e6da59922b923391f02ba1ce78f9b42.jpg") 24 | assert 'tsukumo_benben' in result.tags 25 | 26 | @pytest.mark.asyncio 27 | async def test_iqdb(): 28 | engine = engines.iqdb.Iqdb(80.0) 29 | result = await engine.search_image("https://danbooru.donmai.us/data/__tsukumo_benben_touhou_drawn_by_elise_piclic__6e6da59922b923391f02ba1ce78f9b42.jpg") 30 | if not result.tags: 31 | pytest.xfail("IQDB retrieved no tag data. This is likely not a code failure, since IQDB relies on HTML parsing which is wonky.") 32 | assert 'biwa_lute' in result.tags 33 | 34 | @pytest.mark.asyncio 35 | async def test_iqdb_tag(): 36 | engine = engines.iqdb.Iqdb(80.0) 37 | with pytest.raises(retaggr.NotAvailableSearchException): 38 | await engine.search_tag("doesnt matter") 39 | 40 | @pytest.mark.asyncio 41 | async def test_paheal(): 42 | engine = engines.paheal.Paheal() 43 | result = await engine.search_image("https://iris.paheal.net/_images/f0a277f7c4e80330b843f8002daf627e/1876780%20-%20Dancer_of_the_Boreal_Valley%20Dark_Souls%20Dark_Souls_3%20Sinensian.jpg") 44 | assert 'dancer_of_the_boreal_valley' in result.tags 45 | 46 | @pytest.mark.asyncio 47 | async def test_paheal_tag(): 48 | engine = engines.paheal.Paheal() 49 | with pytest.raises(retaggr.NotAvailableSearchException): 50 | await engine.search_tag("doesnt matter") 51 | -------------------------------------------------------------------------------- /docs/dev-engine.rst: -------------------------------------------------------------------------------- 1 | Engine Guidelines 2 | ====================== 3 | 4 | This document details the rough process of adding a new engine to retaggr. 5 | 6 | Step 1: Making the engine class 7 | -------------------------------- 8 | 9 | - Start by creating a new file in the engines folder. 10 | - In this file, import the base :class:`Engine` class and subclass it. This class will be used as a base for the engine class. 11 | - Implement the search logic in the :meth:`search_image` method. This method accepts one parameter, which is the image URL. 12 | - If your engine needs an API key or some other user defined variable, add it to the :meth:`__init__` method. 13 | - Define if the engine needs to download the image locally in order to search it by setting the :attr:`Engine.download_required` attribute. 14 | - Define the :attr:`Engine.host` attribute. This should be a human-visitable URL that links to the engine itself. 15 | 16 | Step 2: Expanding the config object. 17 | -------------------------------------- 18 | 19 | This is purely documentation. Add your new parameters to :class:`ReverseSearchConfig`, where their names should be prefixed with 20 | ``filename_``, where ``filename`` is the file you made in step 1. 21 | 22 | 23 | Step 3: Adding it to core 24 | --------------------------- 25 | 26 | - Add the filename you just created to :attr:`ReverseSearch._all_engines`. 27 | - In the :meth:`__init__`, verify that the variables you need are in the config object and if they are, 28 | instantiate the class you just made in the :meth:`__init__` and assign it to :attr:`ReverseSearch.accessible_engines` 29 | where the key is the variable you added to :attr:`ReverseSearch._all_engines` and the value the class. 30 | 31 | Step 4: Writing tests 32 | ----------------------- 33 | 34 | You only need to write one test. This test should search an image using the class you just created, directly instantiating it's underlying 35 | object (so not through ReverseSearch) by calling the :meth:`search_image` method and then asserting the results with an existing list. 36 | 37 | If your engine needs an API key or a user defined variable, add it to an environment variable and load it in (reference the start of `test_engine.py`). 38 | 39 | Step 5: PR the changes 40 | ------------------------ 41 | 42 | As it says on the tin. Specify any new environment variables that are required for CI to work in your PR. 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # pipenv 87 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 88 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 89 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 90 | # install all needed dependencies. 91 | #Pipfile.lock 92 | 93 | # celery beat schedule file 94 | celerybeat-schedule 95 | 96 | # SageMath parsed files 97 | *.sage.py 98 | 99 | # Environments 100 | #.env 101 | .venv 102 | env/ 103 | venv/ 104 | ENV/ 105 | env.bak/ 106 | venv.bak/ 107 | 108 | # Spyder project settings 109 | .spyderproject 110 | .spyproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | 115 | # mkdocs documentation 116 | /site 117 | 118 | # mypy 119 | .mypy_cache/ 120 | .dmypy.json 121 | dmypy.json 122 | 123 | # Pyre type checker 124 | .pyre/ 125 | 126 | # filled out test file 127 | test.sh -------------------------------------------------------------------------------- /tests/test_saucenao.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import retaggr 4 | import retaggr.engines.saucenao.handlers as handlers 5 | from retaggr.engines import SauceNao 6 | 7 | e621_username = os.environ.get('E621_USERNAME', None) 8 | app_name = os.environ.get('APP_NAME', None) 9 | version = os.environ.get('APP_VERSION', None) 10 | if not all([e621_username, app_name, version]): 11 | raise ValueError("Missing Environment variables") 12 | 13 | @pytest.mark.asyncio 14 | async def test_saucenao(): 15 | engine = SauceNao(None, True) 16 | engine.enable_e621(e621_username, app_name, version) 17 | answer = await engine.search_image(None) 18 | assert "touhou" in answer.tags 19 | 20 | @pytest.mark.asyncio 21 | async def test_danbooru(): 22 | handler = handlers.DanbooruHandler() 23 | data = {"danbooru_id": 3820633} 24 | answer = await handler.get_tag_data(data) 25 | assert "persona" in answer 26 | 27 | @pytest.mark.asyncio 28 | async def test_gelbooru_tags(): 29 | handler = handlers.GelbooruHandler() 30 | data = {"gelbooru_id": 5572304} 31 | answer = await handler.get_tag_data(data) 32 | assert "helltaker" in answer 33 | 34 | @pytest.mark.asyncio 35 | async def test_gelbooru_source(): 36 | handler = handlers.GelbooruHandler() 37 | data = {"gelbooru_id": 5572304} 38 | answer = await handler.get_source_data(data) 39 | assert "https://twitter.com/mugenjin/status/1309972603910586369" in answer 40 | 41 | @pytest.mark.asyncio 42 | async def test_e621_tags(): 43 | handler = handlers.E621Handler(e621_username, app_name, version) 44 | data = {"e621_id": 2174881} 45 | answer = await handler.get_tag_data(data) 46 | assert "hornet_(hollow_knight)" in answer 47 | 48 | @pytest.mark.asyncio 49 | async def test_e621_source(): 50 | handler = handlers.E621Handler(e621_username, app_name, version) 51 | data = {"e621_id": 2174881} 52 | answer = await handler.get_source_data(data) 53 | assert "https://twitter.com/kililewd/status/1237068755521462273?s=19" in answer 54 | 55 | @pytest.mark.asyncio 56 | async def test_konachan(): 57 | handler = handlers.KonachanHandler() 58 | data = {"konachan_id": 303016} 59 | answer = await handler.get_tag_data(data) 60 | assert "no-kan" in answer 61 | 62 | @pytest.mark.asyncio 63 | async def test_yandere(): 64 | handler = handlers.YandereHandler() 65 | data = {"yandere_id": 618735} 66 | answer = await handler.get_tag_data(data) 67 | assert "pantsu" in answer 68 | 69 | @pytest.mark.asyncio 70 | async def test_saucenao_tag(): 71 | engine = SauceNao(None, True) 72 | with pytest.raises(retaggr.NotAvailableSearchException): 73 | await engine.search_tag("doesnt matter") 74 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath("../src")) 16 | 17 | # -- Project information ----------------------------------------------------- 18 | 19 | project = 'retaggr' 20 | copyright = '2019-2020, noirscape' 21 | author = 'noirscape' 22 | 23 | # The full version, including alpha/beta/rc tags 24 | from retaggr import version_info 25 | def get_tag(releaselevel): 26 | d = { 27 | "alpha": "a", 28 | "beta": "b", 29 | "candidate": "rc", 30 | "final": "" 31 | } 32 | return d[releaselevel] 33 | 34 | release = "{}.{}.{}{}".format(version_info.major, version_info.minor, version_info.micro, get_tag(version_info.releaselevel)) 35 | 36 | 37 | # -- General configuration --------------------------------------------------- 38 | 39 | # Add any Sphinx extension module names here, as strings. They can be 40 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 41 | # ones. 42 | extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.intersphinx', 'sphinx.ext.viewcode' 43 | ] 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ['_templates'] 47 | 48 | # List of patterns, relative to source directory, that match files and 49 | # directories to ignore when looking for source files. 50 | # This pattern also affects html_static_path and html_extra_path. 51 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 52 | 53 | # Set the master doc 54 | master_doc = "index" 55 | 56 | # -- Options for HTML output ------------------------------------------------- 57 | 58 | # The theme to use for HTML and HTML Help pages. See the documentation for 59 | # a list of builtin themes. 60 | # 61 | html_theme = 'alabaster' 62 | 63 | # Add any paths that contain custom static files (such as style sheets) here, 64 | # relative to this directory. They are copied after the builtin static files, 65 | # so a file named "default.css" will overwrite the builtin "default.css". 66 | html_static_path = ['_static'] 67 | 68 | # Intersphinx stuff 69 | intersphinx_mapping = {'py': ('https://docs.python.org/3', None)} -------------------------------------------------------------------------------- /src/retaggr/engines/iqdb.py: -------------------------------------------------------------------------------- 1 | from retaggr.engines.base import Engine, ImageResult 2 | from retaggr.errors import NotAvailableSearchException 3 | 4 | # External imports 5 | import asyncio 6 | import functools 7 | import requests as fuck_aiohttp 8 | from fake_useragent import UserAgent 9 | from lxml import html 10 | 11 | class Iqdb(Engine): 12 | """Reverse searches https://iqdb.org for images. 13 | 14 | This booru does not required images to be downloaded before searching. 15 | 16 | This method may have unexpected failures relating to HTML parsing. This is because IQDB does not officially offer an API and the raw HTML 17 | parsing is not functional if the page is not fully loaded. 18 | 19 | :param min_score: Minimum search match percentage needed. 20 | :type min_score: float 21 | """ 22 | host = "https://iqdb.org" 23 | download_required = False 24 | 25 | def __init__(self, min_score): 26 | self.min_score = min_score 27 | self.ua = UserAgent() 28 | 29 | async def search_image(self, url): 30 | tags = [] 31 | 32 | params = {"url" : url} 33 | loop = asyncio.get_event_loop() 34 | r = await loop.run_in_executor(None, functools.partial(fuck_aiohttp.post, self.host, headers={"User-Agent": self.ua.random}, params=params)) 35 | 36 | doc = html.fromstring(r.text) 37 | tables = doc.xpath("//div[@id='pages']/div/table/tr/td") 38 | 39 | row = 6 40 | while row < len(tables): 41 | # Percent similair 42 | if (tables[row].text) is None: # pragma: no cover 43 | row = row + 6 44 | continue 45 | try: 46 | percent = float(str.split(tables[row].text)[0][:-1]) 47 | # Create tags list 48 | tags_str = tables[row-2].xpath("//a/img") 49 | temp_tags = tags_str[0].get('alt').split("Tags: ", 1)[1] 50 | # IQDBs tag responses leave... something to be desired. To be clear, 51 | # Given the tag string "a,b, c, D" 52 | # These two lines should sanitize the tags to 53 | # ["a", "b", "c", "d"] 54 | tags = [x.lower().replace(",", " ").split() for x in temp_tags.split()] 55 | tags = [x for y in tags for x in y] 56 | 57 | if percent > self.min_score: 58 | tags.extend(tags) 59 | except: # pragma: no cover 60 | pass 61 | row = row + 6 62 | 63 | return ImageResult(tags, [], None) 64 | 65 | async def search_tag(self, tag): 66 | """Reverse search the booru for tag data. 67 | """ 68 | raise NotAvailableSearchException("This engine cannot search tags.") -------------------------------------------------------------------------------- /tests/test_core.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import retaggr 3 | import os 4 | 5 | # Logging 6 | import logging 7 | logging.basicConfig(level=logging.DEBUG) 8 | 9 | # Grab the relevant keys from the environment 10 | danbooru_username = os.environ.get('DANBOORU_USERNAME', None) 11 | danbooru_api_key = os.environ.get('DANBOORU_API_KEY', None) 12 | e621_username = os.environ.get('E621_USERNAME', None) 13 | app_name = os.environ.get('APP_NAME', None) 14 | version = os.environ.get('APP_VERSION', None) 15 | if not all([danbooru_username, danbooru_api_key, e621_username, app_name, version]): 16 | raise ValueError("Missing Environment variables") 17 | config = retaggr.ReverseSearchConfig(danbooru_username=danbooru_username, danbooru_api_key=danbooru_api_key, e621_username=e621_username, app_name=app_name, version=version, min_score=80.0) 18 | 19 | def test_core_creation(): 20 | core = retaggr.ReverseSearch(config) 21 | assert core.config == config 22 | 23 | def test_core_creation_with_no_iqdb(): 24 | special_config = retaggr.ReverseSearchConfig(skip_iqdb=True, min_score=80.0) # Minimal IQDB config 25 | core = retaggr.ReverseSearch(special_config) 26 | assert core.config == special_config 27 | 28 | @pytest.mark.asyncio 29 | async def test_core_search_image_not_a_booru(): 30 | core = retaggr.ReverseSearch(config) 31 | with pytest.raises(retaggr.NotAValidEngineException): 32 | await core.search_image("nO", "irrelevant") 33 | 34 | @pytest.mark.asyncio 35 | async def test_core_search_no_download(): 36 | core = retaggr.ReverseSearch(config) 37 | result = await core.reverse_search("https://danbooru.donmai.us/data/__tsukumo_benben_touhou_drawn_by_elise_piclic__6e6da59922b923391f02ba1ce78f9b42.jpg") 38 | assert 'tsukumo_benben' in result.tags 39 | 40 | @pytest.mark.asyncio 41 | async def test_core_search_image_not_all_api_keys(): 42 | core = retaggr.ReverseSearch(retaggr.ReverseSearchConfig()) # Since we need a core without the config for this 43 | with pytest.raises(retaggr.MissingAPIKeysException): 44 | await core.search_image("danbooru", "irrelevant") 45 | 46 | @pytest.mark.asyncio 47 | async def test_image_core(): 48 | core = retaggr.ReverseSearch(config) 49 | result = await core.search_image("paheal", "https://iris.paheal.net/_images/f0a277f7c4e80330b843f8002daf627e/1876780%20-%20Dancer_of_the_Boreal_Valley%20Dark_Souls%20Dark_Souls_3%20Sinensian.jpg") 50 | assert 'dancer_of_the_boreal_valley' in result.tags 51 | 52 | @pytest.mark.asyncio 53 | async def test_reverse_search(): 54 | core = retaggr.ReverseSearch(config, True) 55 | result = await core.reverse_search("https://iris.paheal.net/_images/f0a277f7c4e80330b843f8002daf627e/1876780%20-%20Dancer_of_the_Boreal_Valley%20Dark_Souls%20Dark_Souls_3%20Sinensian.jpg", download=True) 56 | assert 'dancer_of_the_boreal_valley' in result.tags 57 | 58 | @pytest.mark.asyncio 59 | async def test_reverse_search_callback(): 60 | core = retaggr.ReverseSearch(config) 61 | calls = 0 62 | async def callback(engine, rresult): 63 | nonlocal calls 64 | calls += 1 65 | await core.reverse_search("https://iris.paheal.net/_images/f0a277f7c4e80330b843f8002daf627e/1876780%20-%20Dancer_of_the_Boreal_Valley%20Dark_Souls%20Dark_Souls_3%20Sinensian.jpg", callback=callback, download=True) 66 | assert calls > 0 67 | -------------------------------------------------------------------------------- /docs/user.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: retaggr 2 | .. _user: 3 | 4 | Getting Started 5 | ================= 6 | 7 | Using retaggr is extremely easy. All functions are provided through the :class:`ReverseSearch` class. 8 | 9 | The :class:`ReverseSearch` should be instantiated with the :class:`ReverseSearchConfig` class. 10 | 11 | :class:`ReverseSearchConfig` technically doesn't need any parameters, but it's highly recommended to at 12 | least pass ``min_score``. 13 | 14 | An example of how to do so can be found below. 15 | 16 | .. code-block:: python 17 | 18 | # Relevant imports 19 | from retaggr import ReverseSearch, ReverseSearchConfig 20 | 21 | # Technically the config object doesn't need any parameters to work. 22 | # That said, the only option available at that point is Paheal. 23 | # min_score is required to search IQDB, whilst other engines 24 | # will require their own API keys. 25 | # See the API reference for relevant keys and values. 26 | config = ReverseSearchConfig(min_score=80.0) 27 | 28 | # Next we instantiate the object 29 | rsearch = ReverseSearch(config) 30 | 31 | After that it's possible to search any properly instantiated engine from an asynchronous context. 32 | 33 | .. code-block:: python 34 | 35 | # Searching IQDB using our previous object. 36 | result = await rsearch.search_image("iqdb", "https://danbooru.donmai.us/data/__tsukumo_benben_touhou_drawn_by_elise_piclic__6e6da59922b923391f02ba1ce78f9b42.jpg") 37 | 38 | Acceptable parameters for :meth:`ReverseSearch.search_image` are filenames found in the ``engines`` subfolder. 39 | 40 | It is also possible to search all instantiated engines through the :meth:`ReverseSearch.reverse_search` method. 41 | 42 | .. code-block:: python 43 | 44 | # This only searches IQDB and Paheal, since we haven't instantiated anything else. 45 | result = await rsearch.reverse_search("https://danbooru.donmai.us/data/__tsukumo_benben_touhou_drawn_by_elise_piclic__6e6da59922b923391f02ba1ce78f9b42.jpg") 46 | 47 | Do note that this method returns a Set, not a List, unlike the search_image function. This is to remove duplicate findings. 48 | 49 | About asyncio 50 | --------------- 51 | 52 | This is an asynchronous library. This means that it can only be called from an asynchronous context. 53 | That said, it might be desirable to call the class methods from a synchronous context. This is possible by 54 | using ``asyncio.run()`` 55 | 56 | See the example below. 57 | 58 | .. code-block:: python 59 | 60 | # Instantiate the main object normally (instantiation is not asynchronous). 61 | rsearch = ReverseSearch(ReverseSearchConfig()) 62 | 63 | # Use asyncio.run() for executing the search methods. 64 | result = asyncio.run(rsearch.reverse_search("https://danbooru.donmai.us/data/__tsukumo_benben_touhou_drawn_by_elise_piclic__6e6da59922b923391f02ba1ce78f9b42.jpg")) 65 | 66 | # result will have the reverse searched data. 67 | 68 | For those using asyncio, this library spawns a couple of threads using ``asyncio.run_in_executor`` at certain points. 69 | 70 | The reason for this is due to the fact that the ``aiohttp`` library attempts to sanitize URLs, and as a result, the library 71 | falls back on using ``requests`` when this becomes an issue. 72 | 73 | The ``aiohttp`` developers have stated that since this is due to a server misconfiguration issue, 74 | and as a result that they do not intent to fix this. -------------------------------------------------------------------------------- /src/retaggr/aiohttp_requests/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2018 Max Zheng 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | import aiohttp 23 | import functools 24 | 25 | # Patch ClientResponse.read to release immediately after read so we don't need to worry about that / use context manager 26 | _read_only = aiohttp.client_reqrep.ClientResponse.read 27 | async def _read_and_release(self): # noqa 28 | try: 29 | data = await _read_only(self) 30 | finally: 31 | self.close() 32 | 33 | return data 34 | aiohttp.client_reqrep.ClientResponse.read = _read_and_release 35 | 36 | 37 | class Requests: 38 | """ Thin wrapper for aiohttp.ClientSession with Requests simplicity """ 39 | def __init__(self, *args, **kwargs): 40 | self._session_args = (args, kwargs) 41 | self._session = None 42 | 43 | @property 44 | def session(self): 45 | """ An instance of aiohttp.ClientSession """ 46 | if not self._session or self._session.closed or self._session.loop.is_closed(): 47 | self._session = aiohttp.ClientSession(*self._session_args[0], **self._session_args[1]) 48 | return self._session 49 | 50 | def __getattr__(self, attr): 51 | if attr.upper() in aiohttp.hdrs.METH_ALL: 52 | @functools.wraps(self.session._request) 53 | def session_request(*args, **kwargs): 54 | """ 55 | This ensures `self.session` is always called where it can check the session/loop state so can't use 56 | functools.partials as monkeypatch seems to do something weird where __getattr__ is only called once for 57 | each attribute after patch is undone 58 | """ 59 | return self.session._request(attr.upper(), *args, **kwargs) 60 | 61 | return session_request 62 | else: 63 | return super().__getattribute__(attr) 64 | 65 | def close(self): 66 | """ 67 | Close aiohttp.ClientSession. 68 | 69 | This is useful to be called manually in tests if each test when each test uses a new loop. After close, new 70 | requests will automatically create a new session. 71 | 72 | Note: We need a sync version for `__del__` and `aiohttp.ClientSession.close()` is async even though it doesn't 73 | have to be. 74 | """ 75 | if self._session: 76 | if not self._session.closed: 77 | # Older aiohttp does not have _connector_owner 78 | if not hasattr(self._session, '_connector_owner') or self._session._connector_owner: 79 | self._session._connector.close() 80 | self._session._connector = None 81 | self._session = None 82 | 83 | def __del__(self): 84 | self.close() 85 | 86 | 87 | requests = Requests() 88 | -------------------------------------------------------------------------------- /src/retaggr/engines/saucenao/engine.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import asyncio 3 | import functools 4 | 5 | from retaggr.engines.base import Engine, ImageResult 6 | from retaggr.engines.saucenao.handlers import DanbooruHandler, GelbooruHandler, E621Handler, KonachanHandler, YandereHandler 7 | from retaggr.errors import NotAvailableSearchException, EngineCooldownException 8 | import requests as fuck_aiohttp 9 | 10 | class SauceNao(Engine): 11 | """Reverse searches the SauceNao API and then does additional matching. 12 | 13 | This booru does not require images to be downloaded before searching. 14 | 15 | This API is subject to rate limits. 16 | 17 | :param api_key: SauceNao API key. You can get this by registering an account on saucenao.com 18 | :type api_key: str 19 | :param test_mode: Enable test mode. Test mode is unique in that it does not need an API key, but it only works on one URL. 20 | """ 21 | host = "https://saucenao.com" 22 | download_required = False 23 | 24 | def __init__(self, api_key, test_mode=False): 25 | self.api_key = api_key 26 | self.handlers = { 27 | DanbooruHandler.engine_id : DanbooruHandler(), 28 | GelbooruHandler.engine_id : GelbooruHandler(), 29 | KonachanHandler.engine_id : KonachanHandler(), 30 | YandereHandler.engine_id : YandereHandler(), 31 | } 32 | self.test_mode = test_mode 33 | 34 | def enable_e621(self, username, app_name, version): 35 | """Enable the E621 parser. This allows for looking up tag information on E621. 36 | 37 | :param username: An E621 username. 38 | :type username: str 39 | :param app_name: The name of the application. 40 | :type app_name: str 41 | :param version: The version of the appliation. 42 | :type version: str 43 | """ 44 | self.handlers[E621Handler.engine_id] = E621Handler(username, app_name, version) 45 | 46 | async def search_image(self, url): 47 | request_url = "https://saucenao.com/search.php" 48 | params = { 49 | "db": "999", # No clever bitmasking -> need help with how to do that. 50 | "api_key": self.api_key, 51 | "output_type": "2", # 2 is the JSON API, 52 | "url": url 53 | } 54 | 55 | if self.test_mode: 56 | params = { 57 | "db": "999", 58 | "output_type": "2", 59 | "testmode": "1", 60 | "numres": "16", 61 | "url": "http://saucenao.com/images/static/banner.gif" 62 | } 63 | 64 | loop = asyncio.get_event_loop() 65 | r = await loop.run_in_executor(None, functools.partial(fuck_aiohttp.get, request_url, params=params)) 66 | j = r.json() 67 | if r.status_code == 200: 68 | return await self.index_parser(j) 69 | elif r.status_code == 429: # pragma: no cover 70 | raise EngineCooldownException() 71 | 72 | async def search_tag(self, tag): 73 | raise NotAvailableSearchException("This engine cannot search tags.") 74 | 75 | async def index_parser(self, json): 76 | """Parse the output from a succesful saucenao search to retrieve data from specific indexes. 77 | 78 | :param json: JSON output from the API. 79 | :type json: dict 80 | :return: Dictionary containing data that matches the output for :meth:`SauceNao.search_image_source` 81 | :rtype: ImageResult 82 | """ 83 | base_similarity = json["header"]["minimum_similarity"] # Grab the minimum similarity saucenao advises, going lower is generally gonna give false positives. 84 | 85 | # Below we cast the _entry_ similarity to a float since somehow it's stored as an str. 86 | # Damn API inaccuracy 87 | valid_results = [entry for entry in json["results"] if float(entry["header"]["similarity"]) > base_similarity] 88 | 89 | # Test mode similarity override 90 | if self.test_mode: 91 | valid_results = json["results"] 92 | 93 | # Kinda looks stupid, but whatever. 94 | loop = asyncio.get_event_loop() 95 | source = set() 96 | tags = set() 97 | for entry in valid_results: 98 | if "ext_urls" in entry["data"]: # Some of these responses dont have ext_url... 99 | for url in entry["data"]["ext_urls"]: 100 | source.add(url) 101 | handler = self.handlers.get(entry["header"]["index_id"], None) 102 | if handler: 103 | if handler.tag_capable: 104 | tags.update(await handler.get_tag_data(entry["data"])) 105 | if handler.source_capable: 106 | source.update(await handler.get_source_data(entry["data"])) 107 | 108 | return ImageResult(tags, source, None) 109 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | 7 | ## [Unreleased] 8 | 9 | ## [3.1.0] - 2020-10-19 10 | 11 | - Added Gelbooru handler to saucenao. 12 | 13 | ## [3.0.0] - 2020-03-29 14 | 15 | - Removed E621 engine. (iqdb.harry.lu shut down) 16 | - Added E621 handler to saucenao. 17 | - Changed saucenao engine internally to allow for adding handlers easier. 18 | - Changed response data for engines to allow returning more than one valid source. 19 | 20 | ## [2.3.1] - 2019-10-20 21 | 22 | - Recover from organization takedown/repo removal. 23 | - Added makefile for testing/building simplification 24 | - Added aiohttp to repository dependencies, it went missing after removing aiohttp_requests 25 | - Added attribute to config to skip IQDB creation. 26 | - Added dedicated exception type to handle engines that are unavailable due to external reasons. 27 | 28 | ## [2.3.0] - 2019-10-06 29 | 30 | - Changed logging so it goes to the correct logger. 31 | - Removed ratelimiting for SauceNao. It just raises an exception instead, no matter what. 32 | - Sleeping causes underflows. Unless there's a way to properly handle this, there's no way around it. 33 | - Changed asyncio tasking 34 | - Somewhere around a 30% speed gain comparatively to how it was before. 35 | - Fixed IQDB tag names. 36 | 37 | ## [2.2.0] - 2019-09-12 38 | 39 | - Added logging to core. 40 | - Removed dependency on aiohttp_requests library 41 | - Well, sorta. It localizes the library to an internal folder instead. 42 | - This is to prevent aiohttp from being version pinned. 43 | - Fixed UnboundLocalError in E621. 44 | - Fixed Core bug where a source would be split up into individual characters (and then added to a set). 45 | 46 | ## [2.1.4] - 2019-09-12 47 | 48 | - Fixed e621 premature ratelimit call 49 | 50 | ## [2.1.3] - 2019-09-12 51 | 52 | - Fixed ratelimit underflow bug on low values. 53 | 54 | ## [2.1.2] - 2019-09-12 55 | 56 | - Added source code references to documentation. 57 | - Fixed the callback, it now returns ImageResult rather than ReverseResult. 58 | - Fixed premature reference assignment in Paheal engine (not sure how this slipped past testing). 59 | 60 | ## [2.1.1] - 2019-09-11 61 | 62 | - Fixed crucial endless loop bug due to incorrect ratelimit checking. 63 | - Fixed formatting error in documentation. 64 | 65 | ## [2.1.0] - 2019-09-11 [YANKED] 66 | 67 | - Changed search methods to be more async (request calls weren't run_in_executor). 68 | - Changed SauceNao ratelimit accounting to function better. 69 | 70 | ## [2.0.0] - 2019-09-10 71 | 72 | - Changed callback to be more comprehensive. 73 | - Changed SauceNao to account for ratelimits properly. 74 | - Changed E621 to accunt for ratelimits properly. 75 | - Changed responses to namedtuples. Namedtuples can be better documented and permit dotted access. 76 | - Two new classes: ImageResult and ReverseResult. 77 | - Classes share attributes but differ in types on said attributes. 78 | - Changed search_image in API classes: 79 | - Removed deprecated search_image 80 | - Renamed search_image_source to search_image. 81 | - As a result of this, search_image_source is effectively removed. 82 | - Changed reverse_search in core class: 83 | - Removed deprecated reverse_search 84 | - Renamed search_image_source to reverse_search 85 | - As a result of this, search_image_source is effectively removed. 86 | - Rename all mentions of booru to engine. 87 | - Renamed NotAValidBooruException to NotAValidEngineException 88 | - Added new helper method to base API to handle ratelimits. 89 | - Added VS Code Build and test tasks. 90 | - Added new package variable: `__version__` and `version_info` to track versioning. 91 | - Added sample `test.sh` for testing purposes. 92 | - Fixed reverse_search to skip saucenao if on long ratelimit in reverse_search 93 | 94 | ## [1.2.0] - 2019-08-29 95 | 96 | - Expanded base engine class to also permit searching for sources. 97 | - Changed base engine class to have default behavior for searching for sources. 98 | - Added SauceNao parser. 99 | - Added source searching to Danbooru. 100 | - Added source searching to E621. 101 | - Added source searching to paheal. 102 | - Removed code cruft. 103 | - Added dedicated exception for options an engine isn't capable of. 104 | 105 | ## [1.1.1] - 2019-08-20 106 | 107 | - Added PyPi dependencies. 108 | 109 | ## [1.1.0] - 2019-08-20 110 | 111 | - Changed output from lists to sets to remove duplicitous items. 112 | - Project released on PyPi. 113 | 114 | ## [1.0.0] - 2019-08-20 115 | 116 | - Added base booru class 117 | - Added Danbooru engine 118 | - Added IQDB engine 119 | - Added E621 engine 120 | - Added paheal engine 121 | - Added documentation 122 | - Licensed project to LGPLv3 123 | - Added core class 124 | - Added config class 125 | 126 | [Unreleased]: https://github.com/noirscape/retaggr/compare/3.0.1...HEAD 127 | [3.0.0]: https://github.com/noirscape/retaggr/compare/3.0.0...2.3.1 128 | [2.3.1]: https://github.com/noirscape/retaggr/compare/2.3.0...2.3.1 129 | [2.3.0]: https://github.com/noirscape/retaggr/compare/2.2.0...2.3.0 130 | [2.2.0]: https://github.com/noirscape/retaggr/compare/2.1.4...2.2.0 131 | [2.1.4]: https://github.com/noirscape/retaggr/compare/2.1.3...2.1.4 132 | [2.1.3]: https://github.com/noirscape/retaggr/compare/2.1.2...2.1.3 133 | [2.1.2]: https://github.com/noirscape/retaggr/compare/2.1.1...2.1.2 134 | [2.1.1]: https://github.com/noirscape/retaggr/compare/2.1.0...2.1.1 135 | [2.1.0]: https://github.com/noirscape/retaggr/compare/2.0.0...2.1.0 136 | [2.0.0]: https://github.com/noirscape/retaggr/compare/1.2.0...2.0.0 137 | [1.2.0]: https://github.com/noirscape/retaggr/compare/1.1.1...1.2.0 138 | [1.1.1]: https://github.com/noirscape/retaggr/compare/1.1.0...1.1.1 139 | [1.1.0]: https://github.com/noirscape/retaggr/compare/1.0.0...1.1.0 140 | [1.0.0]: https://github.com/noirscape/retaggr/releases/tag/1.0.0 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /src/retaggr/core.py: -------------------------------------------------------------------------------- 1 | # stdlib 2 | import asyncio 3 | from collections import namedtuple 4 | import logging 5 | import traceback 6 | 7 | # Config 8 | from retaggr.config import ReverseSearchConfig 9 | 10 | # Engines 11 | from retaggr.engines.base import ImageResult 12 | from retaggr.engines.danbooru import Danbooru 13 | from retaggr.engines.iqdb import Iqdb 14 | from retaggr.engines.paheal import Paheal 15 | from retaggr.engines.saucenao import SauceNao 16 | from retaggr.engines.dummy import Dummy 17 | 18 | # Exceptions 19 | from retaggr.errors import MissingAPIKeysException, NotAValidEngineException, EngineCooldownException 20 | 21 | ReverseResult = namedtuple("ReverseResult", ["tags", "source", "rating"]) 22 | """The response from a reverse image search. All attributes are Sets. 23 | 24 | .. py:attribute:: tags 25 | 26 | The tags the engine has located. 27 | 28 | .. py:attribute:: source 29 | 30 | All the sources that have been found for the image. 31 | 32 | .. py:attribute:: rating 33 | 34 | The rating on the image. 35 | 36 | """ 37 | 38 | # Set up logger 39 | logger = logging.getLogger(__name__) 40 | 41 | class ReverseSearch: 42 | r"""Core class used for Reverse Searching. 43 | 44 | This class can only be instantiated with a :class:`ReverseSearchConfig` instance. 45 | 46 | All listed methods can only be ran from an asynchronous context. 47 | 48 | :ivar accessible_engines: The accessible boorus from the passed in configuration object. 49 | :param config: The config object. 50 | :type config: ReverseSearchConfig 51 | :param test_mode: Enable test mode. Test mode adds two dummy engines that can fail as well as return some very basic values. 52 | :type test_mode: bool 53 | """ 54 | _all_engines = [ 55 | "danbooru", 56 | "e621", 57 | "iqdb", 58 | "paheal", 59 | "saucenao" 60 | ] 61 | 62 | def __init__(self, config, test_mode=False): 63 | self.config = config 64 | self.accessible_engines = {} 65 | 66 | if hasattr(self.config, "min_score"): 67 | if hasattr(self.config, "danbooru_username") and hasattr(self.config, "danbooru_api_key"): 68 | self.accessible_engines["danbooru"] = Danbooru(self.config.danbooru_username, self.config.danbooru_api_key, self.config.min_score) 69 | logger.info("Created Danbooru engine") 70 | 71 | # IQDB stuff -> we do the check _first_ since someone might not specify this at all, in which case we do still instantiate it. 72 | if hasattr(self.config, "skip_iqdb") and self.config.skip_iqdb: 73 | skip_iqdb = True 74 | else: 75 | skip_iqdb = False 76 | if not skip_iqdb: 77 | self.accessible_engines["iqdb"] = Iqdb(self.config.min_score) 78 | logger.info("Created IQDB engine") 79 | 80 | if hasattr(self.config, "saucenao_api_key"): 81 | self.accessible_engines["saucenao"] = SauceNao(self.config.saucenao_api_key) 82 | logger.info("Created SauceNao engine") 83 | if hasattr(self.config, "e621_username") and hasattr(self.config, "app_name") and hasattr(self.config, "version"): 84 | self.accessible_engines["saucenao"].enable_e621(self.config.e621_username, self.config.app_name, self.config.version) 85 | logger.info("Activated E621 capabilites on saucenao.") 86 | 87 | self.accessible_engines["paheal"] = Paheal() 88 | 89 | if test_mode: 90 | self.accessible_engines["dummy"] = Dummy(False) 91 | self.accessible_engines["fail_dummy"] = Dummy(True) 92 | self._all_engines.append("dummy") 93 | self._all_engines.append("fail_dummy") 94 | 95 | async def reverse_search(self, url, callback=None, download=False): 96 | """ 97 | Reverse searches all accessible boorus for ``url``. 98 | 99 | .. note:: 100 | Callback is a callback function that can be passed in. This can be used to keep track of 101 | progress for certain methods and functions. 102 | 103 | .. code-block:: python 104 | :linenos: 105 | 106 | async def callback(engine, rresult): 107 | print("This booru was searched: %s", booru) 108 | print("These tags were found: %s", rresult.tags) 109 | print("This source was found: %s", rresult.source) 110 | print("This rating was found: %s", rresult.rating) 111 | 112 | # Callback will be called each time a search finishes. 113 | rs.reverse_search(url, callback) 114 | 115 | After a reverse search, this method will be called with the engine name that was just searched. 116 | 117 | :param url: The URL to search. 118 | :type url: str 119 | :param callback: Callback function. 120 | :type callback: Optional[function] 121 | :param download: Run searches on boorus that require a file download. Defaults to False. 122 | :type download: Optional[bool] 123 | :return: A :class:`ReverseResult` instance containing your data. 124 | :rtype: ReverseResult 125 | """ 126 | tags = set() 127 | source = set() 128 | rating = set() 129 | tasks = [] 130 | logger.info("Creating reverse search engine tasks.") 131 | for engine in self.accessible_engines: 132 | if self.accessible_engines[engine].download_required: 133 | if not download: 134 | logger.info("[%s] Downloading files has been disabled. Skipping [%s]", url, engine) 135 | continue 136 | tasks.append(self._gather_reverse_task(engine, url, callback)) 137 | results = await asyncio.gather(*tasks) 138 | for result in results: 139 | if isinstance(result, Exception): # pragma: no cover 140 | logger.warning("[%s] An engine has failed!", url) 141 | logger.warning("[%s] This may or may not be an issue. Report it on the issue tracker: https://github.com/noirscape/retaggr/issues if the issue persists.", url) 142 | traceback.print_exc() 143 | continue 144 | if result.tags: 145 | tags.update(result.tags) 146 | if result.source: 147 | source.update(result.source) 148 | if result.rating: 149 | rating.add(result.rating) 150 | return ReverseResult(tags, source, rating) 151 | 152 | async def _gather_reverse_task(self, engine, url, callback) -> ImageResult: 153 | """Underlying method used to run reverse_search more asynchronously.""" 154 | logger.info("[%s] Starting search in [%s] engine", url, engine) 155 | try: 156 | result = await self.search_image(engine, url) 157 | except: # pragma: no cover 158 | # reverse_search just can't except, it's meant to keep trucking no matter what. 159 | return ImageResult([], None, None) 160 | else: 161 | logger.info("[%s][%s] Found tags: %s", url, engine, result.tags) 162 | logger.info("[%s][%s] Found source: %s", url, engine, result.source) 163 | logger.info("[%s][%s] Found rating: %s", url, engine, result.rating) 164 | if callback: 165 | logger.info("[%s][%s] Executing callback", url, engine) 166 | await callback(engine, result) 167 | logger.info("[%s] Finished searching [%s]", url, engine) 168 | return result 169 | 170 | async def search_image(self, booru, url): 171 | r"""Reverse search a booru for ``url``. 172 | 173 | :param booru: Booru to search, this must match a filename in the boorus folder. 174 | :type booru: str 175 | :param url: The URL to search. 176 | :type url: str 177 | :raises MissingAPIKeysException: Required keys in config object missing. 178 | :raises NotAValidEngineException: The passed in booru is not a valid booru. 179 | :return: A :class:`ImageResult` instance containing your data. 180 | :rtype: ImageResult 181 | """ 182 | if booru not in self._all_engines: 183 | raise NotAValidEngineException("%s is not a valid engine", booru) 184 | if booru not in self.accessible_engines: 185 | raise MissingAPIKeysException("%s is misisng one or more needed API keys. Check the documentation.") 186 | return await self.accessible_engines[booru].search_image(url) 187 | --------------------------------------------------------------------------------