├── .gitignore
├── 00.jpg
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── nonebot_plugin_imgexploration
├── __init__.py
└── imgexploration.py
├── pyproject.toml
├── requirements.txt
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | dist/
3 | nonebot_plugin_imgexploration.egg-info/
4 | *.html
5 | *.json
6 |
--------------------------------------------------------------------------------
/00.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cpuopt/nonebot_plugin_imgexploration/23a6bba5f747092ea3bea7fb391b8f64bf281913/00.jpg
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) [year] [fullname]
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include pyproject.toml
2 |
3 | # Include the README
4 | include *.md
5 |
6 | # Include the license file
7 | include LICENSE.txt
8 |
9 | # Include the data files
10 | #recursive-include data *
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 | # nonebot_plugin_imgexploration
5 |
6 | _✨ [Nonebot2](https://github.com/nonebot/nonebot2) 插件,Google、Yandx和基于PicImageSearch的saucenao、ascii2d搜图 ✨_
7 |
8 |
9 |
10 |
11 |
12 | **需要能稳定访问Google等网站的代理**
13 | ## 一.**安装**
14 | ### 1.使用nb-cli安装
15 |
16 | ```
17 | nb plugin install nonebot-plugin-imgexploration
18 | ```
19 |
20 | 或其他任意加载方式
21 | ### 2.需要字体
22 | ```
23 | HarmonyOS_Sans_SC_Regular.ttf
24 | HarmonyOS_Sans_SC_Bold.ttf
25 | HarmonyOS_Sans_SC_Light.ttf
26 | ```
27 | https://developer.harmonyos.com/cn/docs/design/des-guides/font-0000001157868583
28 | 安装到系统字体即可
29 |
30 | ### 3.依赖 (nb-cli或pip安装无需配置依赖)
31 |
32 | 展开/收起
33 |
34 | ```
35 | pip install -r requirements.txt
36 | ```
37 |
38 |
39 |
40 | ## 二.**配置**
41 | ### 1.env中的配置
42 | ```
43 | #代理端口号(不使用本地代理可缺省,例如:使用软路由透明代理、程序运行在境外)
44 | proxy_port=7890
45 |
46 | #saucenao apikey 在https://saucenao.com/user.php?page=search-api注册获取
47 | saucenao_apikey=xxxxx
48 |
49 | #Google Cookies json文件路径,建议使用Chrome插件Cookie-Editor复制导出json并手动存入文件
50 | google_cookies_filepath="C:\Users\...\google_cookies.json"
51 |
52 | #等待用户回复的超时时间(可选)
53 | #注意,nonebot2新版本该配置项格式有变化!请根据nonebot版本参照文档 https://nonebot.dev/docs/api/config#Config-session-expire-timeout
54 | #SESSION_EXPIRE_TIMEOUT=180
55 | #SESSION_EXPIRE_TIMEOUT=PT3M
56 | ```
57 | ## 三.**使用**
58 | ###
59 | ```
60 | /搜图
61 | /搜图 <图片>
62 | ```
63 | ### **使用示例**
64 |
65 |
66 |

67 |
68 |
69 | ### 搜图结果
70 |
71 |
72 |

73 |
74 |
--------------------------------------------------------------------------------
/nonebot_plugin_imgexploration/__init__.py:
--------------------------------------------------------------------------------
1 | import nonebot
2 | import httpx
3 | from typing import Union
4 | from nonebot import on_command, require
5 | from nonebot.log import logger
6 | from nonebot.typing import T_State
7 | from nonebot.params import Arg, CommandArg
8 | from nonebot.plugin import PluginMetadata
9 | from nonebot.adapters.onebot.v11 import (
10 | Bot,
11 | Message,
12 | MessageSegment,
13 | GroupMessageEvent,
14 | PrivateMessageEvent,
15 | )
16 | from .imgexploration import Cookies, Imgexploration
17 |
18 |
19 | __plugin_meta__ = PluginMetadata(
20 | name="查找图片出处",
21 | description="通过saucenao、ascii2d、Google、Yandx查询图片出处",
22 | usage="command:搜图",
23 | type="application",
24 | homepage="https://github.com/cpuopt/nonebot_plugin_imgexploration",
25 | supported_adapters={"~onebot.v11"},
26 | extra={},
27 | )
28 |
29 | proxy_port = getattr(nonebot.get_driver().config, "proxy_port", None)
30 | saucenao_apikey = getattr(nonebot.get_driver().config, "saucenao_apikey", "")
31 | google_cookies_filepath = getattr(nonebot.get_driver().config, "google_cookies_filepath", "")
32 |
33 |
34 | proxies = f"http://127.0.0.1:{proxy_port}" if proxy_port else None
35 |
36 |
37 | def numspilt(args: str, max: int):
38 | args_list = list(args.split())
39 | r_li = []
40 | for arg in args_list:
41 | if arg.isnumeric() and 1 <= int(arg) <= max:
42 | r_li.append(int(arg))
43 | elif arg.isnumeric() and int(arg) >= max:
44 | for i in arg:
45 | if i.isnumeric() and 1 <= int(i) <= max:
46 | r_li.append(int(i))
47 | return r_li
48 |
49 |
50 | imgexploration = on_command(cmd="搜图", priority=1, block=True)
51 |
52 |
53 | @imgexploration.handle()
54 | async def cmd_receive(
55 | event: Union[GroupMessageEvent, PrivateMessageEvent],
56 | state: T_State,
57 | pic: Message = CommandArg(),
58 | ):
59 | if pic:
60 | state["Message_pic"] = pic
61 |
62 |
63 | @imgexploration.got("Message_pic", prompt="请发送要搜索的图片")
64 | async def get_pic(
65 | bot: Bot,
66 | event: Union[GroupMessageEvent, PrivateMessageEvent],
67 | state: T_State,
68 | msgpic: Message = Arg("Message_pic"),
69 | ):
70 | for segment in msgpic:
71 | if segment.type == "image":
72 | pic_url: str = segment.data["url"] # 图片链接
73 | logger.success(f"获取到图片: {pic_url}")
74 | async with httpx.AsyncClient(
75 | proxy=proxies,
76 | headers={
77 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
78 | },
79 | ) as client:
80 | search = Imgexploration(
81 | pic_url=pic_url,
82 | client=client,
83 | proxy=proxies,
84 | saucenao_apikey=saucenao_apikey,
85 | google_cookies=Cookies(google_cookies_filepath),
86 | )
87 | await imgexploration.send(
88 | message=Message(MessageSegment.text("搜索进行中……")),
89 | reply_message=True,
90 | )
91 | await search.doSearch()
92 | result_dict = search.getResultDict()
93 | state["result_dict"] = result_dict
94 | await imgexploration.send(
95 | message=Message(
96 | MessageSegment.image(file=result_dict["pic"]) + MessageSegment.text("请在180s内发送序号以获得对应结果的链接,一次可以发送多个序号,例如:1 5 6"),
97 | ),
98 | reply_message=True,
99 | )
100 | break
101 |
102 | else:
103 | await imgexploration.reject("你发送的不是图片,请以“图片”形式发送!")
104 |
105 |
106 | @imgexploration.got("need_num")
107 | async def get_num(
108 | bot: Bot,
109 | event: Union[GroupMessageEvent, PrivateMessageEvent],
110 | state: T_State,
111 | nummsg: Message = Arg("need_num"),
112 | ):
113 | try:
114 | args = list(map(int, str(nummsg).split()))
115 | if args[0] == 0:
116 | await imgexploration.finish(message=Message(MessageSegment.reply(event.message_id) + MessageSegment.text("搜图结束")))
117 | msg = MessageSegment.text("")
118 | res_len = len(state["result_dict"]["info"])
119 | args = numspilt(str(nummsg), res_len)
120 | for no in args:
121 | url = state["result_dict"]["info"][no - 1]["url"]
122 | msg += MessageSegment.text(f"{no} - {url}\n")
123 | await bot.send(
124 | event,
125 | message=Message(msg + "你还有机会发送序号以获取链接\n发送非数字消息或0以结束搜图"),
126 | reply_message=True,
127 | )
128 | await imgexploration.reject()
129 | except (IndexError, ValueError):
130 | logger.error("参数错误,没有发送序号,搜图结束")
131 | await imgexploration.finish(
132 | message=Message(MessageSegment.text(f"你没有发送序号,搜图结束!")),
133 | reply_message=True,
134 | )
135 |
--------------------------------------------------------------------------------
/nonebot_plugin_imgexploration/imgexploration.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import base64
3 | import re
4 | import traceback
5 | from PIL import Image, ImageDraw, ImageFont, ImageFilter
6 | from PicImageSearch import Ascii2D, Network, SauceNAO
7 | from lxml import etree
8 | import httpx, json
9 | from io import BytesIO
10 | from loguru import logger
11 | import PIL
12 | import urllib
13 |
14 |
15 | class Imgexploration:
16 | def __init__(self, pic_url, client: httpx.AsyncClient, proxy, saucenao_apikey, google_cookies):
17 | """
18 | Parameters
19 | ----------
20 | * pic_url : 图片url
21 | * proxy_port : 代理端口
22 | * saucenao_apikey : saucenao_apikey
23 | """
24 | self.client = client
25 | self.__proxy = proxy
26 | self.__pic_url = pic_url
27 | self.setFront(big_size=25, nomal_size=20, small_size=15)
28 |
29 | general_header = {
30 | "sec-ch-ua": '"Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"',
31 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
32 | }
33 |
34 | self.setHeadersCookieApikey(saucenao_apikey=saucenao_apikey, general_header=general_header, google_cookies=google_cookies)
35 |
36 | def setHeadersCookieApikey(self, saucenao_apikey, general_header, google_cookies):
37 | """
38 | Args:
39 | ----------
40 | * saucenao_apikey (str):
41 | """
42 | self.__saucenao_apikey = saucenao_apikey
43 | self.__generalHeader = general_header
44 | self.__google_cookies = google_cookies
45 |
46 | async def __getImgbytes(self):
47 | try:
48 | self.__pic_bytes = (await self.client.get(url=self.__pic_url, timeout=10)).content
49 | img = Image.open(BytesIO(self.__pic_bytes))
50 | img = img.convert("RGB")
51 | width = img.width
52 | height = img.height
53 | if width > 2000 or height > 2000:
54 | radius = width // 1000 if width > height else height // 1000
55 | img = img.resize((int(width / radius), int(height / radius)))
56 | res = BytesIO()
57 | img.save(res, format="JPEG")
58 | self.__pic_bytes = res.getvalue()
59 | except Exception as e:
60 | logger.error(e)
61 |
62 | async def __uploadToImgops(self):
63 | logger.info("图片上传到Imgops")
64 | try:
65 | files = {"photo": self.__pic_bytes}
66 | data = {"isAjax": "true"}
67 | headers = {
68 | "sec-ch-ua": '"Google Chrome";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
69 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
70 | "sec-ch-ua-platform": '"Windows"',
71 | "Origin": "https://imgops.com",
72 | "Referer": "https://imgops.com/",
73 | "Accept-Encoding": "gzip, deflate, br",
74 | "Accept-Language": "zh-CN,zh;q=0.9",
75 | }
76 | post = await self.client.post("https://imgops.com/store", files=files, data=data, headers=headers, timeout=10)
77 |
78 | self.__imgopsUrl = "https:/" + post.text
79 | except Exception as e:
80 | self.__imgopsUrl = self.__pic_url
81 | logger.error(e)
82 |
83 | def setFront(self, big_size: int, nomal_size: int, small_size: int):
84 | """
85 | Parameters
86 | ----------
87 | * big_size 大号字体字号
88 | * nomal_size : 中号字体字号
89 | * small_size : 小号字体字号
90 |
91 | """
92 | self.__font_b_size = big_size
93 | self.__font_b = ImageFont.truetype("HarmonyOS_Sans_SC_Regular", big_size)
94 | self.__font_n = ImageFont.truetype("HarmonyOS_Sans_SC_Bold", nomal_size)
95 | self.__font_s = ImageFont.truetype("HarmonyOS_Sans_SC_Light", small_size)
96 |
97 | @staticmethod
98 | async def ImageBatchDownload(urls: list, client: httpx.AsyncClient) -> list[bytes]:
99 | tasks = [asyncio.create_task(client.get(url)) for url in urls]
100 | return [((await task).content) for task in tasks]
101 |
102 | async def __draw(self) -> bytes:
103 | try:
104 | font_size = self.__font_b_size
105 | font = self.__font_b
106 | font2 = self.__font_n
107 | font3 = self.__font_s
108 | num = len(self.__result_info)
109 | width = 900
110 | height = 200
111 | total_height = height * num if num != 0 else 10
112 | line_width = 2
113 | line_fill = (200, 200, 200)
114 | text_x = 300
115 | logger.info(f"Drawing... total:{num}")
116 | img = Image.new(mode="RGB", size=(width, total_height), color=(255, 255, 255))
117 |
118 | draw = ImageDraw.Draw(img)
119 | margin = 20
120 | for i in range(1, num):
121 | draw.line(
122 | (margin, i * height, width - margin, i * height),
123 | fill=line_fill,
124 | width=line_width,
125 | )
126 |
127 | vernier = 0
128 | seat = 0
129 |
130 | for single in self.__result_info:
131 | seat += 1
132 |
133 | if "thumbnail_bytes" in single:
134 | thumbnail = single["thumbnail_bytes"]
135 | try:
136 | thumbnail = Image.open(fp=BytesIO(thumbnail)).convert("RGB")
137 | except PIL.UnidentifiedImageError:
138 | thumbnail = Image.new(mode="RGB", size=(200, 200), color=(255, 255, 255))
139 |
140 | thumbnail = thumbnail.resize((int((height - 2 * margin) * thumbnail.width / thumbnail.height), height - 2 * margin))
141 | if single["source"] == "ascii2d":
142 | thumbnail = thumbnail.filter(ImageFilter.GaussianBlur(radius=3))
143 |
144 | if thumbnail.width > text_x - 2 * margin:
145 | thumbnail = thumbnail.crop((0, 0, text_x - 2 * margin, thumbnail.height))
146 | img.paste(im=thumbnail, box=(margin, vernier + margin))
147 | else:
148 | img.paste(im=thumbnail, box=(text_x - thumbnail.width - margin, vernier + margin))
149 |
150 | text_ver = 2 * margin
151 | draw.text(
152 | xy=(width - margin, vernier + 10),
153 | text=f"NO.{seat} from {single['source']}",
154 | fill=(150, 150, 150),
155 | font=font2,
156 | anchor="ra",
157 | )
158 |
159 | if single["title"]:
160 | text = single["title"].replace("\n", "")
161 | lw = font.getlength(text)
162 | text = text if lw < 450 else f"{text[:int(len(text)*450/lw)]}..."
163 | draw.text(xy=(text_x, vernier + text_ver), text="Title: ", fill=(160, 160, 160), font=font, anchor="la")
164 | draw.text(xy=(text_x + 60, vernier + text_ver), text=text, fill=(0, 0, 0), font=font, anchor="la")
165 | text_ver = text_ver + font_size + margin / 2
166 |
167 | if ("similarity" in single) and single["similarity"]: # saucenao
168 | text = single["similarity"]
169 | draw.text(xy=(text_x, vernier + text_ver), text="similarity: ", fill=(160, 160, 160), font=font, anchor="la")
170 | draw.text(xy=(text_x + 115, vernier + text_ver), text=f"{text}", fill=(0, 0, 0), font=font, anchor="la")
171 | text_ver = text_ver + font_size + margin / 2
172 |
173 | if ("description" in single) and single["description"]:
174 | text = single["description"]
175 | lw = font.getlength(text)
176 | text = text if lw < 520 else f"{text[:int(len(text)*520/lw)]}..."
177 | draw.text(xy=(text_x, vernier + text_ver), text=text, fill=(0, 0, 0), font=font, anchor="la")
178 | text_ver = text_ver + font_size + margin / 2
179 |
180 | if ("domain" in single) and single["domain"]: # Yandex
181 | text = single["domain"]
182 | draw.text(xy=(text_x, vernier + text_ver), text="Source: ", fill=(160, 160, 160), font=font, anchor="la")
183 | draw.text(xy=(text_x + 86, vernier + text_ver), text=f"{text}", fill=(0, 0, 0), font=font, anchor="la")
184 | text_ver = text_ver + font_size + margin / 2
185 |
186 | if single["url"]:
187 | url = single["url"]
188 | lw = font3.getlength(url)
189 | url = url if lw < 560 else f"{url[:int(len(url)*560/lw)]}..."
190 | draw.text(xy=(text_x, vernier + text_ver), text=url, fill=(100, 100, 100), font=font3, anchor="la")
191 | vernier += height
192 |
193 | save = BytesIO()
194 | img.save(save, format="PNG", quality=100)
195 | return save.getvalue()
196 | except Exception as e:
197 | raise e
198 |
199 | async def __saucenao_build_result(self, result_num=10, minsim=60, max_num=5) -> dict:
200 | resList = []
201 | logger.info("saucenao searching...")
202 | try:
203 | async with Network(proxies=self.__proxy, timeout=20) as client:
204 | saucenao = SauceNAO(client=client, api_key=self.__saucenao_apikey, numres=result_num)
205 | saucenao_result = await saucenao.search(url=self.__imgopsUrl)
206 |
207 | thumbnail_urls = []
208 | for single in saucenao_result.raw:
209 | if single.similarity < minsim or single.url == "" or single.thumbnail == "":
210 | continue
211 | thumbnail_urls.append(single.thumbnail)
212 | thumbnail_bytes = await self.ImageBatchDownload(thumbnail_urls, self.client)
213 | i = 0
214 | for single in saucenao_result.raw:
215 | if single.similarity < minsim or single.url == "" or single.thumbnail == "":
216 | continue
217 | sin_di = {
218 | "title": single.title, # 标题
219 | "thumbnail": single.thumbnail, # 缩略图url
220 | "url": urllib.parse.unquote(single.url),
221 | "similarity": single.similarity,
222 | "source": "saucenao",
223 | "thumbnail_bytes": thumbnail_bytes[i],
224 | }
225 | i += 1
226 | resList.append(sin_di)
227 | return resList
228 | except IndexError as e:
229 | logger.error(e)
230 | return []
231 | finally:
232 | logger.success(f"saucenao result:{len(resList)}")
233 | return resList
234 |
235 | async def __google_build_result(self, result_num=5) -> dict:
236 | google_header = {
237 | "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
238 | "accept-encoding": "gzip, deflate, br, zstd:gzip, deflate, br, zstd",
239 | "accept-language": "zh-CN,zh-HK;q=0.9,zh;q=0.8,en-US;q=0.7,en;q=0.6:zh-CN,zh-HK;q=0.9,zh;q=0.8,en-US;q=0.7,en;q=0.6",
240 | "cache-control": "no-cache:no-cache",
241 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
242 | }
243 | resList = []
244 | logger.info("google searching...")
245 | try:
246 | params = {
247 | "url": self.__imgopsUrl,
248 | }
249 | google_lens = await self.client.get(f"https://lens.google.com/uploadbyurl", params=params, headers=google_header, timeout=10, cookies=self.__google_cookies.cookies)
250 | self.__google_cookies.update(google_lens.headers)
251 | redirect_url = google_lens.headers.get("location")
252 | google_header["referer"] = str(google_lens.url)
253 | google_lens = await self.client.get(redirect_url, headers=google_header, timeout=10, cookies=self.__google_cookies.cookies)
254 | self.__google_cookies.update(google_lens.headers)
255 | google_lens_text = google_lens.text
256 | main_page = etree.HTML(google_lens_text)
257 | elements = main_page.xpath("//span[text()='查看完全匹配的结果']/ancestor::a[1]")
258 | if elements:
259 | href = "https://www.google.com" + elements[0].get("href")
260 | google_lens = await self.client.get(href, headers=google_header, timeout=10, cookies=self.__google_cookies.cookies)
261 | full_match_page = etree.HTML(google_lens.text)
262 | id_base64_mapping = parseBase64Image(full_match_page)
263 | with open("Googlelens_test.html", "w+", encoding="utf-8") as file:
264 | file.write(google_lens.text)
265 | res_items = full_match_page.xpath("//div[@id='search']/div/div/div")
266 | for item in res_items:
267 | link = item.xpath(".//a")[0].get("href")
268 | img_id = item.xpath(".//a//img")[0].get("id")
269 | title = item.xpath(".//a/div/div[2]/div[1]/text()")[0]
270 | img_base64 = id_base64_mapping[img_id] if img_id in id_base64_mapping.keys() else None
271 | img_bytes = base64.b64decode(img_base64) if img_base64 else None
272 | if img_bytes != None:
273 | sin_di = {
274 | "title": title,
275 | "thumbnail_bytes": img_bytes,
276 | "url": link,
277 | "source": "Google",
278 | }
279 | resList.append(sin_di)
280 | else:
281 | pass
282 |
283 | return resList
284 |
285 | except Exception as e:
286 | logger.error(traceback.format_exc())
287 | with open("Googlelens_error_page.html", "w+", encoding="utf-8") as file:
288 | file.write(google_lens_text)
289 | raise e
290 |
291 | finally:
292 | logger.success(f"google result:{len(resList)}")
293 | return resList
294 |
295 | def __ascii2d_get_external_url(self, rawhtml):
296 | rawhtml = str(rawhtml)
297 | external_url_li = etree.HTML(rawhtml).xpath('//div[@class="external"]/a[1]/@href')
298 | if external_url_li:
299 | return external_url_li[0] # 可能的手动登记结果:list
300 | else:
301 | return False
302 |
303 | async def __ascii2d_build_result(self, sh_num: int = 2, tz_num: int = 3) -> dict:
304 | """
305 | Parameters
306 | ----------
307 | * sh_num : 色和搜索获取结果数量
308 | * tz_num : 特征搜索获取结果数量
309 |
310 | """
311 | logger.info("ascii2d searching...")
312 | result_li = []
313 | try:
314 | async with Network(proxies=self.__proxy, timeout=20) as client:
315 | ascii2d_sh = Ascii2D(client=client, bovw=False)
316 |
317 | # ascii2d_sh_result = await ascii2d_sh.search(url=self.__pic_url)
318 |
319 | ascii2d_tz = Ascii2D(client=client, bovw=True)
320 | # ascii2d_tz_result = await ascii2d_tz.search(url=self.__pic_url)
321 |
322 | ascii2d_sh_result = await asyncio.create_task(ascii2d_sh.search(url=self.__imgopsUrl))
323 | ascii2d_tz_result = await asyncio.create_task(ascii2d_tz.search(url=self.__imgopsUrl))
324 |
325 | thumbnail_urls = []
326 | for single in ascii2d_tz_result.raw[0:tz_num] + ascii2d_sh_result.raw[0:sh_num]:
327 | external_url_li = self.__ascii2d_get_external_url(single.origin)
328 | if not external_url_li and not single.url:
329 | continue
330 | elif single.url:
331 | url = single.url
332 | else:
333 | url = external_url_li
334 | sin_di = {
335 | "title": single.title,
336 | "thumbnail": single.thumbnail,
337 | "url": urllib.parse.unquote(url),
338 | "source": "ascii2d",
339 | }
340 | thumbnail_urls.append(single.thumbnail)
341 | result_li.append(sin_di)
342 | thumbnail_bytes = await self.ImageBatchDownload(thumbnail_urls, self.client)
343 | i = 0
344 | for single in result_li:
345 | single["thumbnail_bytes"] = thumbnail_bytes[i]
346 | i += 1
347 | except Exception as e:
348 | logger.error(e)
349 | return []
350 | finally:
351 | logger.success(f"ascii2d result:{len(result_li)}")
352 | return result_li
353 |
354 | async def __yandex_build_result(self, result_num=5) -> dict:
355 | """
356 | Parameter:
357 | ---------
358 | * result_num : 需要的结果数量
359 | """
360 | logger.info("yandex searching...")
361 | try:
362 | yandexurl = f"https://yandex.com/images/search"
363 | data = {
364 | "rpt": "imageview",
365 | "url": self.__imgopsUrl,
366 | }
367 | result_li = []
368 |
369 | yandexPage = await self.client.get(url=yandexurl, params=data, headers=self.__generalHeader, timeout=20)
370 | yandexHtml = etree.HTML(yandexPage.text)
371 | InfoJSON = yandexHtml.xpath('//*[@class="cbir-section cbir-section_name_sites"]/div/@data-state')[0]
372 | result_dict = json.loads(InfoJSON)
373 | thumbnail_urls = []
374 | for single in result_dict["sites"][:result_num]:
375 | thumbnail_urls.append("https:" + single["thumb"]["url"])
376 | thumbnail_bytes = await self.ImageBatchDownload(thumbnail_urls, self.client)
377 | i = 0
378 | for single in result_dict["sites"][:result_num]:
379 | sin_di = {
380 | "source": "Yandex",
381 | "title": single["title"], # 标题
382 | "thumbnail": "https:" + single["thumb"]["url"], # 预览图url
383 | "url": urllib.parse.unquote(single["url"]), # 来源网址
384 | "description": single["description"], # 描述
385 | "domain": single["domain"], # 来源网站域名
386 | "thumbnail_bytes": thumbnail_bytes[i],
387 | }
388 | i += 1
389 | result_li.append(sin_di)
390 | logger.success(f"yandex result:{len(result_li)}")
391 | return result_li
392 | except Exception as e:
393 | raise e
394 | logger.error(e)
395 | finally:
396 | return result_li
397 |
398 | async def doSearch(self) -> dict:
399 | await self.__getImgbytes()
400 | await self.__uploadToImgops()
401 | task_saucenao = asyncio.create_task(self.__saucenao_build_result())
402 | task_ascii2d = asyncio.create_task(self.__ascii2d_build_result())
403 | task_google = asyncio.create_task(self.__google_build_result())
404 | task_yandex = asyncio.create_task(self.__yandex_build_result())
405 |
406 | # self.__result_info = (await task_saucenao) + (await task_ascii2d) + (await task_google) + (await task_yandex)
407 | self.__result_info = (await task_saucenao) + (await task_ascii2d) + (await task_google) + (await task_yandex)
408 | result_pic = await self.__draw()
409 |
410 | self.__picNinfo = {
411 | "pic": result_pic,
412 | "info": self.__result_info,
413 | }
414 |
415 | def getResultDict(self):
416 | """
417 | Returns
418 | ----------
419 | {
420 | "pic": bytes,
421 | "info": list,
422 | }
423 | """
424 | return self.__picNinfo
425 |
426 |
427 | class Cookies:
428 |
429 | cookies = httpx.Cookies()
430 | filepath: str
431 | cookies_json = []
432 |
433 | def __init__(self, filepath):
434 | self.filepath = filepath
435 |
436 | with open(filepath, "r") as f:
437 | self.cookies_json = json.loads(f.read())
438 |
439 | for cookie in self.cookies_json:
440 | self.cookies.set(cookie["name"], cookie["value"], cookie["domain"])
441 |
442 | def update(self, response_headers: httpx.Headers):
443 | set_cookies = response_headers.get_list("set-cookie")
444 |
445 | if not set_cookies:
446 | return self.cookies
447 |
448 | new_cookies = httpx.Cookies()
449 | for cookie in self.cookies.jar:
450 | new_cookies.set(cookie.name, cookie.value, cookie.domain)
451 |
452 | for cookie_str in set_cookies:
453 | cookie_parts = cookie_str.split(";")
454 | if not cookie_parts:
455 | continue
456 |
457 | name_value = cookie_parts[0].strip().split("=", 1)
458 | if len(name_value) != 2:
459 | continue
460 |
461 | name, value = name_value
462 |
463 | for part in cookie_parts[1:]:
464 | if part.strip().lower().startswith("domain="):
465 | domain = part.split("=", 1)[1].strip()
466 | break
467 |
468 | new_cookies.set(name, value, domain)
469 | for cookie in self.cookies_json:
470 | if cookie["name"] == name:
471 | cookie = {"domain": domain, "name": name, "value": value}
472 |
473 | self.cookies = new_cookies
474 | self.save()
475 |
476 | def save(self):
477 | with open(self.filepath, "w", encoding="utf-8") as f:
478 | json.dump(self.cookies_json, f, ensure_ascii=False, indent=4)
479 |
480 | def parseBase64Image(document: etree.Element) -> dict[str, str]:
481 | """从HTML文档中解析id和对应的图片base64
482 | """
483 | res_dic = {}
484 | for script in document.xpath("//script[@nonce]"):
485 | func = script.xpath("./text()")
486 | func_text: str = func[0] if func and len(func) > 0 else ""
487 |
488 | id_match = re.search(r"\['(.*?)'\]", func_text)
489 | id = id_match.group(1) if id_match else None
490 |
491 | base64_match = re.search(r"data:image/jpeg;base64,(.*?)'", func_text)
492 | b64 = base64_match.group(1).replace(r"\x3d", "=") if base64_match else None
493 |
494 | if id != None and b64 != None:
495 |
496 | if "','" in id:
497 | for dimg in id.split("','"):
498 | res_dic[dimg] = b64
499 | else:
500 | res_dic[id] = b64
501 |
502 | return res_dic
503 |
504 |
505 | if __name__ == "__main__":
506 | url = r"https://p.inari.site/usr/369/67821385a0fe3.jpg"
507 | google_cookies = Cookies("googleaCookies.json")
508 | proxy_port = 7890
509 |
510 | async def main():
511 | async with httpx.AsyncClient(
512 | proxies=f"http://127.0.0.1:{proxy_port}",
513 | headers={
514 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
515 | },
516 | ) as client:
517 |
518 | aa = Imgexploration(
519 | pic_url=url,
520 | client=client,
521 | proxy=f"http://127.0.0.1:{proxy_port}",
522 | saucenao_apikey="",
523 | google_cookies=google_cookies,
524 | )
525 | await aa.doSearch()
526 | img = Image.open(BytesIO(aa.getResultDict()["pic"]))
527 | img.show()
528 | img.save("00.jpg", format="JPEG", quality=100)
529 |
530 | asyncio.run(main())
531 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "nonebot-plugin-imgexploration"
3 | version = "0.4.4"
4 | description = "Google、Yandx和基于PicImageSearch的saucenao、ascii2d搜图"
5 | authors = ["cpufan"]
6 | readme = "README.md"
7 | packages = [{include = "nonebot_plugin_imgexploration"}]
8 |
9 | [tool.poetry.dependencies]
10 | python = "^3.10"
11 | nonebot2 = "^2.0.0rc2"
12 | nonebot-adapter-onebot = "^2.1.5"
13 | brotli = "^1.1.0"
14 | PicImageSearch = "^3.9.0"
15 | httpx = "^0.23.0"
16 | lxml = "^4.9.0"
17 | loguru = "^0.6.0"
18 | pillow = "^9.0.0"
19 |
20 | [build-system]
21 | requires = ["poetry-core"]
22 | build-backend = "poetry.core.masonry.api"
23 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | httpx>=0.25.2
2 | loguru>=0.7.0
3 | lxml>=4.9.4
4 | Pillow>=10.0.1
5 | nonebot2>=2.0.0rc2
6 | nonebot-adapter-onebot>=2.1.5
7 | PicImageSearch>=3.9.7
8 | brotli>=1.1.0
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """A setuptools based setup module.
2 | See:
3 | https://packaging.python.org/guides/distributing-packages-using-setuptools/
4 | https://github.com/pypa/sampleproject
5 | """
6 | import setuptools
7 | import os
8 |
9 | CUR_DIR = os.path.abspath(os.path.dirname(__file__))
10 | README = os.path.join(CUR_DIR, "README.md")
11 | with open("README.md", "r", encoding="utf-8") as fd:
12 | long_description = fd.read()
13 |
14 | # Arguments marked as "Required" below must be included for upload to PyPI.
15 | # Fields marked as "Optional" may be commented out.
16 |
17 | setuptools.setup(
18 | # This is the name of your project. The first time you publish this
19 | # package, this name will be registered for you. It will determine how
20 | # users can install this project, e.g.:
21 | #
22 | # $ pip install sampleproject
23 | #
24 | # And where it will live on PyPI: https://pypi.org/project/sampleproject/
25 | #
26 | # There are some restrictions on what makes a valid project name
27 | # specification here:
28 | # https://packaging.python.org/specifications/core-metadata/#name
29 | # Required
30 | name="nonebot-plugin-imgexploration",
31 | # Versions should comply with PEP 440:
32 | # https://www.python.org/dev/peps/pep-0440/
33 | #
34 | # For a discussion on single-sourcing the version across setup.py and the
35 | # project code, see
36 | # https://packaging.python.org/en/latest/single_source_version.html
37 | # Required
38 | version="0.4.4",
39 | # This is a one-line description or tagline of what your project does. This
40 | # corresponds to the "Summary" metadata field:
41 | # https://packaging.python.org/specifications/core-metadata/#summary
42 | # Optional
43 | description="Nonebot2 插件,Google、Yandx和基于PicImageSearch的saucenao、ascii2d搜图",
44 | # This is an optional longer description of your project that represents
45 | # the body of text which users will see when they visit PyPI.
46 | #
47 | # Often, this is the same as your README, so you can just read it in from
48 | # that file directly (as we have already done above)
49 | #
50 | # This field corresponds to the "Description" metadata field:
51 | # https://packaging.python.org/specifications/core-metadata/#description-optional
52 | # Optional
53 | long_description=long_description,
54 | # Denotes that our long_description is in Markdown; valid values are
55 | # text/plain, text/x-rst, and text/markdown
56 | #
57 | # Optional if long_description is written in reStructuredText (rst) but
58 | # required for plain-text or Markdown; if unspecified, "applications should
59 | # attempt to render [the long_description] as text/x-rst; charset=UTF-8 and
60 | # fall back to text/plain if it is not valid rst" (see link below)
61 | #
62 | # This field corresponds to the "Description-Content-Type" metadata field:
63 | # https://packaging.python.org/specifications/core-metadata/#description-content-type-optional
64 | # Optional
65 | long_description_content_type="text/markdown",
66 | # This should be a valid link to your project's main homepage.
67 | #
68 | # This field corresponds to the "Home-Page" metadata field:
69 | # https://packaging.python.org/specifications/core-metadata/#home-page-optional
70 | # Optional
71 | url="https://github.com/cpuopt/nonebot_plugin_imgexploration",
72 | # This should be your name or the name of the organization which owns the
73 | # project.
74 | # Optional
75 | author="cpufan",
76 | # This should be a valid email address corresponding to the author listed
77 | # above.
78 | # Optional
79 | author_email="cpufan2001@gmail.com",
80 | # You can just specify package directories manually here if your project is
81 | # simple. Or you can use find_packages().
82 | #
83 | # Alternatively, if you just want to distribute a single Python file, use
84 | # the `py_modules` argument instead as follows, which will expect a file
85 | # called `my_module.py` to exist:
86 | #
87 | # py_modules=["my_module"],
88 | #
89 | # Required
90 | # packages = [""],
91 | packages=setuptools.find_packages(),
92 | # This field lists other packages that your project depends on to run.
93 | # Any package you put here will be installed by pip when your project is
94 | # installed, so they must be valid existing projects.
95 | #
96 | # For an analysis of "install_requires" vs pip's requirements files see:
97 | # https://packaging.python.org/en/latest/requirements.html
98 | # Optional
99 | install_requires=[
100 | "colorama>=0.4.2",
101 | "nonebot2 >= 2.0.0rc2",
102 | "nonebot-adapter-onebot >= 2.1.5",
103 | "brotli >= 1.0.0",
104 | "PicImageSearch >= 3.9.0",
105 | "httpx >= 0.23.0",
106 | "lxml >= 4.9.0",
107 | "loguru >= 0.6.0",
108 | "pillow >= 9.0.0"
109 | ],
110 | # To provide executable scripts, use entry points in preference to the
111 | # "scripts" keyword. Entry points provide cross-platform support and allow
112 | # `pip` to create the appropriate form of executable for the target
113 | # platform.
114 | #
115 | # For example, the following would provide a command called `sample` which
116 | # executes the function `main` from this package when invoked:
117 | # Optional
118 | entry_points={},
119 | # Specify which Python versions you support. In contrast to the
120 | # 'Programming Language' classifiers above, 'pip install' will check this
121 | # and refuse to install the project if the version does not match. If you
122 | # do not support Python 2, you can simplify this to '>=3.5' or similar, see
123 | # https://packaging.python.org/guides/distributing-packages-using-setuptools/#python-requires
124 | # Optional
125 | # python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4',
126 | # If there are data files included in your packages that need to be
127 | # installed, specify them here.
128 | #
129 | # If using Python 2.6 or earlier, then these have to be included in
130 | # MANIFEST.in as well.
131 | # package_data={ # Optional
132 | # 'sample': ['package_data.dat'],
133 | # },
134 | # Although 'package_data' is the preferred approach, in some case you may
135 | # need to place data files outside of your packages. See:
136 | # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files
137 | #
138 | # In this case, 'data_file' will be installed into '/my_data'
139 | # Optional
140 | # data_files=[('my_data', ['data/data_file'])],
141 | # Classifiers help users find your project by categorizing it.
142 | #
143 | # For a list of valid classifiers, see https://pypi.org/classifiers/
144 | # Optional
145 | classifiers=(
146 | # How mature is this project? Common values are
147 | # 3 - Alpha
148 | # 4 - Beta
149 | # 5 - Production/Stable
150 | "Development Status :: 3 - Alpha",
151 | # Indicate who your project is intended for
152 | "Intended Audience :: Developers",
153 | "Topic :: Software Development :: Build Tools",
154 | # Pick your license as you wish
155 | "License :: OSI Approved :: MIT License",
156 | # Specify the Python versions you support here. In particular, ensure
157 | # that you indicate whether you support Python 2, Python 3 or both.
158 | # These classifiers are *not* checked by 'pip install'. See instead
159 | # 'python_requires' below.
160 | "Programming Language :: Python :: 3.10",
161 | "Programming Language :: Python :: 3.11",
162 | "Programming Language :: Python",
163 | ),
164 | # This field adds keywords for your project which will appear on the
165 | # project page. What does your project relate to?
166 | #
167 | # Note that this is a string of words separated by whitespace, not a list.
168 | # Optional
169 | keywords="nonebot nonebot-plugin imagesearch",
170 | # When your source code is in a subdirectory under the project root, e.g.
171 | # `src/`, it is necessary to specify the `package_dir` argument.
172 | # Optional
173 | # package_dir={'': 'src'},
174 | # List additional URLs that are relevant to your project as a dict.
175 | #
176 | # This field corresponds to the "Project-URL" metadata fields:
177 | # https://packaging.python.org/specifications/core-metadata/#project-url-multiple-use
178 | #
179 | # Examples listed include a pattern for specifying where the package tracks
180 | # issues, where the source is hosted, where to say thanks to the package
181 | # maintainers, and where to support the project financially. The key is
182 | # what's used to render the link text on PyPI.
183 | # project_urls={ # Optional
184 | # 'Bug Reports': 'https://github.com/pypa/sampleproject/issues',
185 | # 'Funding': 'https://donate.pypi.org',
186 | # 'Say Thanks!': 'http://saythanks.io/to/example',
187 | # 'Source': 'https://github.com/pypa/sampleproject/',
188 | # },
189 | )
190 |
--------------------------------------------------------------------------------