├── .gitignore ├── LICENSE ├── app ├── common.py ├── gt3 │ ├── nine │ │ ├── nine.py │ │ ├── nine4jsapp.py │ │ └── ninejs.py │ └── word │ │ ├── word.py │ │ ├── wordjs.py │ │ └── wordjsapp.py ├── gt4 │ └── iconmi │ │ └── iconmi.py ├── handleprocess.py ├── loadmodel.py ├── utils.py └── uvicorn_config.json ├── assets ├── icon4 │ ├── 0a5573.png │ ├── 0ab207.png │ ├── 0ac1df.png │ ├── 0b236d.png │ ├── 0b41a3.png │ ├── 0c2974.png │ ├── 3f9cdf.png │ ├── 94cb8d.png │ ├── c59e7a.png │ ├── imgs_00141_20081.png │ ├── imgs_00142_59845.png │ ├── imgs_00145_79210.png │ ├── imgs_00146_99736.png │ └── imgs_00150_46045.png ├── nine3 │ ├── img_00000_37458.png │ ├── img_00001_54480.png │ ├── img_00002_59670.png │ ├── img_00003_47146.png │ ├── img_00004_50080.png │ ├── img_00005_67809.png │ ├── img_00006_25480.png │ ├── ques_00000_37458.png │ ├── ques_00001_54480.png │ ├── ques_00002_59670.png │ ├── ques_00003_47146.png │ ├── ques_00004_50080.png │ ├── ques_00005_67809.png │ └── ques_00006_25480.png ├── temp1_output.png ├── temp2_output.png ├── temp3_output.png └── word3 │ ├── pic_00355_54552.png │ ├── pic_00355_67108.png │ ├── pic_00355_91218.png │ ├── pic_00356_20119.png │ ├── pic_00356_24524.png │ ├── pic_00356_46593.png │ ├── pic_00356_73261.png │ └── pic_00357_13273.png ├── conf ├── config.py └── config.yaml ├── demo_geetest3nine.py ├── demo_geetest3word.py ├── demo_geetest4icon4mi.py ├── imgsrc ├── a.jpg └── w.png ├── jsdemo_g3word_1.py ├── jsdemo_g3word_2.py ├── jsdemo_g4icon.py ├── jsdemo_nine3.py ├── model └── sha256.txt ├── package.json ├── readme.md ├── requirements.txt ├── service.py ├── src ├── method │ ├── GTClick.py │ └── GTnine.py └── utils │ ├── MakeCharImage.py │ ├── SiameseOnnx.py │ ├── YoloOnnx.py │ ├── nine.py │ ├── outdata.py │ ├── siamese.py │ ├── simsun.ttc │ ├── utils.py │ └── yoloclass.py └── webjs ├── icon4 ├── demo_g4icon.js └── tools.py ├── nine3 ├── demo.js └── utils.py └── word3 ├── f1 ├── demo.js └── tools.py └── f2 ├── biblg3word.js ├── loadmodel.py └── tools.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.onnx 3 | *.pt 4 | model/** 5 | *.log 6 | temp 7 | temp1/ 8 | temp2/ 9 | temp3/ 10 | .DS_Store 11 | node_modules 12 | package-lock.json 13 | log.txt 14 | !model/sha256.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 caisxg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /app/common.py: -------------------------------------------------------------------------------- 1 | """ 2 | 一些公共的类 3 | """ 4 | from enum import Enum 5 | from typing import Optional, Union 6 | from typing import List, Optional 7 | from pydantic import BaseModel, Field, validator 8 | 9 | 10 | 11 | class Input(BaseModel): 12 | """ 13 | 输入的数据类 14 | """ 15 | dataType: int = Field(..., description="数据类型, 1: url, 2: 文件流", example=1) # 16 | imageSource: list[str] = Field(..., description="base64的图片", example="[base64编码的图片数据1]") 17 | extraicon: Optional[list] = Field(None, description="base64的icon图片", example="[base64编码的图片数据1, base64编码的图片数据2]") 18 | imageID: Optional[str] = Field(None, description="图片的id", example="string") 19 | token: Optional[str] = Field(None, description="token", example="string") 20 | 21 | # # 验证器 22 | @validator('dataType') 23 | def dataType_must_be_int(cls, v): 24 | assert v in [1, 2], "dataType must be 1 or 2" 25 | return v 26 | 27 | 28 | class InputChangeIdword3(BaseModel): 29 | """ 30 | 输入的数据类 31 | """ 32 | challenge: str = Field(..., description="challenge参数", example="string") 33 | gt: str = Field(..., description="gt参数", example="string") 34 | key: str = Field(..., description="key参数(授权参数)", example="string") 35 | referer: Optional[str] = Field(None, description="referer 参数(可选)", example="string") 36 | ua: Optional[str] = Field(None, description="ua 参数(可选)", example="string") 37 | origin: Optional[str] = Field(None, description="origin 参数(可选)", example="string") 38 | pic: Optional[str] = Field(None, description="pic 参数(可选)", example="string") 39 | c: Optional[List[int]] = Field(None, description="c 参数(可选)", example=[1, 2, 3, 4]) 40 | s: Optional[str] = Field(None, description="s 参数(可选)", example="string") 41 | 42 | 43 | class InputChangeIdnine(BaseModel): 44 | """ 45 | 输入的数据类 46 | """ 47 | gt: str = Field(..., description="gt", example="string") 48 | key: str = Field(..., description="key", example="string") 49 | referer: Optional[str] = Field(None, description="referer", example="string") 50 | ua: Optional[str] = Field(None, description="ua", example="string") 51 | origin: Optional[str] = Field(None, description="origin", example="string") 52 | 53 | 54 | 55 | class Output(BaseModel): 56 | code: int = Field(..., description="state code(状态码, 如果是 200,证明这边提供的服务没有问题) ", example=200) #code 的值只能是 StatusCodeEnum 枚举中的值, 200 或 500 57 | msg: str = Field(..., description="state massage(状态的信息, 这边提供的服务简单的做一下筛选,然后转发给极验)", example="success") 58 | data: Union[list, dict, str] = Field(..., description="return data(返回的数据, 这是验证码识别的结果,原封不动的返回,不做任何修改)", example={"imageID": "string", "res": [[184, 0, 259, 67], [176,238, 244, 310], [63,70,132,142]]}) 59 | 60 | @validator('code') 61 | def code_must_be_int(cls, v): 62 | assert v in list(range(100, 6000)), "code must be 100 or 6000,放飞自我" 63 | return v 64 | 65 | responsesdict ={ 66 | 403: {"description": "Token Error"}, 67 | 422: {"description": "Input Error - Invalid DataType"}, 68 | 456: {"description": "算法未实现"}, 69 | 500: {"description": "Input Error"} 70 | } -------------------------------------------------------------------------------- /app/gt3/nine/nine.py: -------------------------------------------------------------------------------- 1 | """ 2 | 主要实现九宫格验证码的识别 3 | """ 4 | 5 | import asyncio 6 | 7 | from concurrent.futures import ThreadPoolExecutor 8 | 9 | from fastapi import APIRouter, Request 10 | 11 | from app.common import Input, Output, responsesdict 12 | from app.utils import get_res, token_validation 13 | 14 | gt3nine = APIRouter() 15 | 16 | 17 | # 这 @gt3 是一个路径操作装饰器, 18 | @gt3nine.post("/nine3", 19 | status_code=200, 20 | response_model=Output, 21 | summary = "三代九宫格", 22 | description = "这是一个三代九宫格", 23 | response_description = "返回json格式", 24 | responses=responsesdict 25 | ) 26 | async def nine3( 27 | input: Input, 28 | request: Request, 29 | ) -> Output: 30 | try: 31 | headers, input_data = token_validation(input, request) 32 | if headers is None and input_data is None: 33 | Output(code=403, msg="Token error", data={}) 34 | # data = get_res("gt3nine",input_data, headers = headers) 35 | loop = asyncio.get_event_loop() 36 | newexecutor = ThreadPoolExecutor(max_workers=3) 37 | data = await loop.run_in_executor(newexecutor, get_res, "gt3nine", input_data, headers) 38 | return Output(code=200, msg="success", data=data) 39 | except: 40 | return Output(code=500, msg="Server Error", data={}) 41 | -------------------------------------------------------------------------------- /app/gt3/nine/nine4jsapp.py: -------------------------------------------------------------------------------- 1 | """ 2 | 主要实现九宫格验证码的识别 3 | """ 4 | 5 | import asyncio 6 | 7 | from concurrent.futures import ThreadPoolExecutor 8 | 9 | from fastapi import APIRouter, Request 10 | 11 | from app.common import Input, Output, responsesdict, InputChangeIdnine 12 | from app.utils import token_validation 13 | from app.gt3.nine.ninejs import get_resjs 14 | 15 | gt4ninejs = APIRouter() 16 | 17 | 18 | 19 | @gt4ninejs.post("/gt4nine", 20 | status_code=200, 21 | response_model=Output, 22 | summary = "si代九宫格js", 23 | description = "这是一个四代九宫格js", 24 | response_description = "返回json格式", 25 | responses=responsesdict 26 | ) 27 | async def gt4nine( 28 | input: InputChangeIdnine, 29 | request: Request, 30 | ) -> Output: 31 | try: 32 | headers, input_data = token_validation(input, request) 33 | if headers is None and input_data is None: 34 | Output(code=403, msg="Token error", data={}) 35 | # data = get_res("gt3nine",input_data, headers = headers) 36 | loop = asyncio.get_event_loop() 37 | newexecutor = ThreadPoolExecutor(max_workers=3) 38 | data = await loop.run_in_executor(newexecutor, get_resjs, input_data, headers) 39 | return Output(code=200, msg="success", data=data) 40 | except: 41 | return Output(code=500, msg="Server Error", data={}) 42 | -------------------------------------------------------------------------------- /app/gt3/nine/ninejs.py: -------------------------------------------------------------------------------- 1 |  2 | """ 3 | 主要实现九宫格验证码的识别 4 | """ 5 | import re, requests, time, uuid, execjs, json 6 | from jsonpath import jsonpath 7 | from pathlib import Path 8 | from webjs.word3.f2.loadmodel import gt3nine 9 | from webjs.nine3.utils import * 10 | 11 | 12 | headers = { 13 | 'authority': 'gt4.geetest.com', 14 | 'accept': '*/*', 15 | 'accept-language': 'zh-CN,zh;q=0.9', 16 | 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" 17 | } 18 | 19 | def get_resjson(captcha_id, headers=None): 20 | if headers is None: 21 | headers = headers 22 | 23 | 24 | session = requests.Session() 25 | params = { 26 | 'callback': f'geetest_{int(time.time() * 1000)}', 27 | 'captcha_id': captcha_id, 28 | 'challenge': str(uuid.uuid4()), 29 | 'client_type': 'web', 30 | 'risk_type': 'nine', 31 | 'lang': 'zh', 32 | } 33 | response = session.get('https://gcaptcha4.geetest.com/load', params=params, headers=headers) 34 | res = response.text 35 | resp_json = json.loads(res[res.find("(") + 1:res.rfind(")")]) 36 | captcha_type =jsonpath(resp_json, '$..captcha_type')[0] 37 | assert captcha_type == 'nine', "captcha_type should be nine" 38 | 39 | kk, ss = 1, 1 40 | imgs_dir = ques_dir = "temp_nine" 41 | Path(imgs_dir).mkdir(parents=True, exist_ok=True) 42 | imgs, imgs_count = generate_url(jsonpath(resp_json, '$..imgs')[0]) 43 | imgs_path = generate_paths(imgs_dir, f"img_{kk}_{ss}", imgs_count) 44 | download_img(imgs, imgs_path) 45 | 46 | ques, ques_count = generate_url(jsonpath(resp_json, '$..ques')[0]) 47 | ques_path = generate_paths(ques_dir, f"ques_{kk}_{ss}", ques_count) 48 | download_img(ques, ques_path) 49 | return resp_json, captcha_id, imgs_path, ques_path, session, headers 50 | 51 | 52 | def get_verify(session, captcha_id, resp_json, userresponse, headers): 53 | #下面传递的参数都是从resp_json中获取的 54 | lot_number = jsonpath(resp_json, '$..lot_number')[0] 55 | nine_nums = jsonpath(resp_json, '$..nine_nums')[0] 56 | payload = jsonpath(resp_json, '$..payload')[0] 57 | payload_protocol = jsonpath(resp_json, '$..payload_protocol')[0] 58 | datetime = jsonpath(resp_json, '$..datetime')[0] 59 | process_token = jsonpath(resp_json, '$..process_token')[0] 60 | with open("webjs/nine3/demo.js", "r") as f: 61 | jscode = f.read() 62 | ctx = execjs.compile(jscode) 63 | w = ctx.call("get_w", captcha_id, lot_number, datetime, userresponse) 64 | 65 | params = { 66 | 'callback': f'geetest_{int(time.time() * 1000)}', 67 | 'captcha_id': captcha_id, 68 | 'client_type': 'web', 69 | 'lot_number': lot_number, 70 | 'risk_type': 'nine', 71 | 'payload': payload, 72 | 'process_token': process_token, 73 | 'payload_protocol': '1', 74 | 'pt': '1', 75 | 'w': w, 76 | } 77 | url3 = 'https://gcaptcha4.geetest.com/verify' 78 | response = session.get(url3, params=params, headers=headers) 79 | session.close() 80 | return response.text 81 | 82 | 83 | 84 | def get_resjs(input_dict: dict, headers: dict, 85 | get_resjson = get_resjson, get_verify = get_verify): 86 | ''' 87 | 四代九宫格js, 传入验证码id, 返回验证结果, 88 | ''' 89 | try: 90 | captcha_id = input_dict.get("gt", None) 91 | assert captcha_id is not None, "captcha_id is None" 92 | except: 93 | return {"code": 400, "msg": "captcha_id is None", "data": {}} 94 | if headers is None: 95 | headers = headers 96 | 97 | resp_json, captcha_id, imgs_path, ques_path, session, headers = get_resjson(captcha_id, headers) 98 | out = gt3nine.run(imgs_path[0], ques_path) 99 | userresponse = out.nine_rowcol 100 | resp = get_verify(session, captcha_id, resp_json, userresponse, headers) 101 | try: 102 | resp1 = resp.json() 103 | except: 104 | try: 105 | resp1 = json.loads(resp[resp.find("(") + 1:resp.rfind(")")]) 106 | except: 107 | resp1 = resp.text 108 | if not resp1: 109 | return {"code": 500, "msg": "Server Error", "data": {}} 110 | return resp1 111 | 112 | if __name__ == "__main__": 113 | pass -------------------------------------------------------------------------------- /app/gt3/word/word.py: -------------------------------------------------------------------------------- 1 | """ 2 | 主要实现文字点选验证码的识别 3 | """ 4 | 5 | import asyncio 6 | from concurrent.futures import ThreadPoolExecutor 7 | 8 | from fastapi import APIRouter, Request 9 | 10 | from app.common import Input, Output, responsesdict 11 | from app.utils import get_res, token_validation 12 | 13 | gt3word = APIRouter() 14 | 15 | 16 | # 这 @gt3 是一个路径操作装饰器, 17 | @gt3word.post("/word3", 18 | status_code=200, 19 | response_model=Output, 20 | summary = "三代文字点选 ", 21 | description = "这是一个三代的文字点选", 22 | response_description = "返回json格式", 23 | responses=responsesdict 24 | ) 25 | async def word3( 26 | input: Input, 27 | request: Request, 28 | ) -> Output: 29 | try: 30 | headers, input_data = token_validation(input, request) 31 | if headers is None and input_data is None: 32 | Output(code=403, msg="Token error", data={}) 33 | # data = get_res("gt3word", input_data, headers = headers) 34 | loop = asyncio.get_event_loop() 35 | newexecutor = ThreadPoolExecutor(max_workers=8) 36 | data = await loop.run_in_executor(newexecutor, get_res, "gt3word", input_data, headers) 37 | return Output(code=200, msg="success", data=data) 38 | except: 39 | return Output(code=500, msg="Server Error", data={}) 40 | -------------------------------------------------------------------------------- /app/gt3/word/wordjs.py: -------------------------------------------------------------------------------- 1 |  2 | import requests, execjs, json, time, os 3 | from jsonpath import jsonpath 4 | from webjs.word3.f2.tools import download_img, headers, cookies, HD 5 | from webjs.word3.f2.loadmodel import gt3word 6 | import execjs 7 | import random 8 | from typing import Any 9 | from loguru import logger 10 | import asyncio 11 | import uuid 12 | ### 2. 获取点击类型 13 | def get_click_type(gt, challenge, headers=None): 14 | headers = headers if headers is not None else HD 15 | session = requests.Session() 16 | params = { 17 | 'gt': gt, 18 | 'challenge': challenge, 19 | 'lang': 'zh-cn', 20 | 'pt': '0', 21 | 'client_type': 'web', 22 | 'callback': f'geetest_{int(time.time() * 1000) - 1000}' 23 | } 24 | ## 获取点击类型 25 | 26 | response = session.get('https://api.geetest.com/ajax.php', headers=headers, params=params) 27 | restext = response.text 28 | result = json.loads(restext[restext.find("(") + 1:restext.rfind(")")]) 29 | click_type = jsonpath(result, '$..result')[0] 30 | # assert click_type == 'click', "点击类型不是 click" 31 | if click_type != 'click': 32 | logger.warning("点击类型不是 click") 33 | return click_type, session, headers 34 | 35 | 36 | ### 3. 获取 json 详细信息 37 | def get_gtresponse(gt, challenge, session,headers=None): 38 | headers = headers if headers is not None else HD 39 | params = { 40 | 'is_next': 'true', 41 | 'type': 'click', 42 | 'gt': gt, 43 | 'challenge': challenge, 44 | 'lang': 'zh-cn', 45 | 'https': 'false', 46 | 'protocol': 'https://', 47 | 'offline': 'false', 48 | 'product': 'embed', 49 | 'api_server': 'api.geetest.com', 50 | 'isPC': 'true', 51 | 'autoReset': 'true', 52 | 'width': '100%', 53 | 'callback': f'geetest_{int(time.time() * 1000) - 1000}', 54 | } 55 | response = session.get('https://api.geetest.com/get.php', params=params, headers=headers) 56 | restext = response.text 57 | result = json.loads(restext[restext.find("(") + 1:restext.rfind(")")]) 58 | myc = jsonpath(result, '$..c')[0] 59 | mys = jsonpath(result, '$..s')[0] 60 | pic = jsonpath(result, '$..pic')[0] 61 | return myc, mys, pic, session, headers, result 62 | 63 | 64 | 65 | 66 | def get_resjson(gt, challenge, session, headers=None): 67 | ''' 68 | 三代点选js, 传入验证码id, 返回验证结果, 69 | ''' 70 | headers = headers if headers is not None else HD 71 | ### 3. 获取 json 详细信息 72 | myc, mys, pic, session, headers, result = get_gtresponse(gt, challenge, session, headers) 73 | ### 4. 下载图片,获取坐标, 并转为极验需要的格式 74 | os.makedirs('temp', exist_ok=True) 75 | imgs_path = 'temp/a.jpg' 76 | download_img(pic, imgs_path) 77 | 78 | return myc, mys, pic, session, headers, result, imgs_path 79 | 80 | 81 | 82 | ### 4. 最后验证 83 | def validate(gt, challenge, w, session, headers: dict = None): 84 | headers = headers if headers is not None else HD 85 | params = { 86 | 'gt': gt, 87 | 'challenge': challenge, 88 | 'lang': 'zh-cn', 89 | 'pt': '0', 90 | 'client_type': 'web', 91 | "w": w, 92 | 'callback': f'geetest_{int(time.time() * 1000) + 1000}', 93 | } 94 | time.sleep(random.uniform(0.1, 0.3)) #### 休息一下,防止太快 95 | response = session.get('https://api.geetest.com/ajax.php', headers=headers, params=params) 96 | session.close() 97 | return response.text 98 | 99 | 100 | 101 | 102 | with open("./webjs/word3/f2/biblg3word.js", 'r', encoding='utf-8') as f: 103 | jscode = f.read() 104 | ctx = execjs.compile(jscode) 105 | 106 | def get_resjs(input_dict: dict, 107 | headers: dict, 108 | get_resjson = get_resjson, 109 | validate = validate, 110 | ctx = ctx, 111 | gt3word = gt3word, 112 | get_click_type = get_click_type 113 | ) -> dict[str, Any] | Any: 114 | ''' 115 | 三代点选js, 传入验证码id, 返回验证结果, 116 | ''' 117 | try: 118 | gt = input_dict.get("gt", None) 119 | challenge = input_dict.get("challenge", None) 120 | assert gt is not None, "captcha_id is None" 121 | assert challenge is not None, "challenge is None" 122 | except: 123 | return {"code": 400, "msg": "captcha_id is None", "data": {}} 124 | 125 | headers = headers if headers is not None else HD 126 | ### 2. 获取点击类型 127 | click_type, session, headers = get_click_type(gt, challenge, headers) 128 | if click_type != 'click': 129 | return {"code": 400, "msg": "click_type is not click", "data": {}} 130 | ### 3. 获取 json 详细信息 --- 下载图片 131 | myc, mys, pic, session, headers, result, imgs_path = get_resjson(gt, challenge, session, headers) 132 | ### 4.获取坐标 133 | out = gt3word.run(imgs_path) 134 | xyxy = out.targets_xyxy 135 | time.sleep(random.uniform(0.9, 1.3)) #### 休息一下,防止太快 136 | ### 4. 获取 w--- 已经自动转换为极验需要的格式 137 | w = ctx.call('get_w',xyxy , pic, gt, challenge, myc, mys ) 138 | 139 | ### 5. 最后验证 140 | resptext = validate(gt, challenge, w, session, headers) 141 | try: 142 | resp1 = json.loads(resptext[resptext.find("(") + 1:resptext.rfind(")")]) 143 | except: 144 | try: 145 | resp1 = json.loads(resptext) 146 | except: 147 | resp1 = resptext 148 | if not resp1: 149 | return {"code": 500, "msg": "Server Error", "data": {"w": w, "xyxy": xyxy, "pic": pic, "myc": myc, "mys": mys, "result": result}} 150 | return resp1 151 | 152 | 153 | 154 | async def aioget_resjs(input_dict: dict, 155 | headers: dict, 156 | get_resjson = get_resjson, 157 | validate = validate, 158 | ctx = ctx, 159 | gt3word = gt3word, 160 | get_click_type = get_click_type 161 | ) -> dict[str, Any] | Any: 162 | ''' 163 | 三代点选js, 传入验证码id, 返回验证结果, 164 | ''' 165 | try: 166 | gt = input_dict.get("gt", None) 167 | challenge = input_dict.get("challenge", None) 168 | assert gt is not None, "captcha_id is None" 169 | assert challenge is not None, "challenge is None" 170 | except: 171 | return {"code": 400, "msg": "captcha_id is None", "data": {}} 172 | 173 | headers = headers if headers is not None else HD 174 | ### 2. 获取点击类型 175 | click_type, session, headers = get_click_type(gt, challenge, headers) 176 | if click_type != 'click': 177 | return {"code": 400, "msg": "click_type is not click", "data": {}} 178 | ### 3. 获取 json 详细信息 --- 下载图片 179 | myc, mys, pic, session, headers, result, imgs_path = get_resjson(gt, challenge, session, headers) 180 | ### 4.获取坐标 181 | out = gt3word.run(imgs_path) 182 | xyxy = out.targets_xyxy 183 | # time.sleep(random.uniform(0.9, 1.3)) #### 休息一下,防止太快 184 | tt = random.uniform(1, 1.5) 185 | # 产生一个 uuid 186 | random_uuid = str(uuid.uuid4()) 187 | logger.warning(f"uuid: {random_uuid}, 休息时间: {tt}") 188 | await asyncio.sleep(tt) 189 | logger.warning(f"uuid: {random_uuid}, 休息结束") 190 | ### 4. 获取 w--- 已经自动转换为极验需要的格式 191 | w = ctx.call('get_w',xyxy , pic, gt, challenge, myc, mys ) 192 | 193 | ### 5. 最后验证 194 | resptext = validate(gt, challenge, w, session, headers) 195 | try: 196 | resp1 = json.loads(resptext[resptext.find("(") + 1:resptext.rfind(")")]) 197 | except: 198 | try: 199 | resp1 = json.loads(resptext) 200 | except: 201 | resp1 = resptext 202 | if not resp1: 203 | return {"code": 500, "msg": "Server Error", "data": {"w": w, "xyxy": xyxy, "pic": pic, "myc": myc, "mys": mys, "result": result}} 204 | return resp1 205 | 206 | 207 | if __name__ == "__main__": 208 | pass -------------------------------------------------------------------------------- /app/gt3/word/wordjsapp.py: -------------------------------------------------------------------------------- 1 | """ 2 | 主要实现三代文字点选字js的接口 3 | """ 4 | 5 | import asyncio 6 | 7 | from concurrent.futures import ThreadPoolExecutor 8 | 9 | from fastapi import APIRouter, Request 10 | 11 | from app.common import Input, Output, responsesdict, InputChangeIdword3 12 | from app.utils import token_validation 13 | from app.gt3.word.wordjs import get_resjs, aioget_resjs 14 | from loguru import logger 15 | gt3wordjs = APIRouter() 16 | 17 | 18 | @gt3wordjs.post("/gt3word", 19 | status_code=200, 20 | response_model=Output, 21 | summary = "三代文字点选(适合b站)", 22 | description = "三代文字点选(适合b站), 只需要传递 gt 和 challenge 两个参数即可,返回 json 格式数据识别结果, data 字段是极验返回的识别结果", 23 | response_description = "返回json格式, data 字段是极验返回的识别结果", 24 | responses=responsesdict 25 | ) 26 | async def gt3word( 27 | input: InputChangeIdword3, 28 | request: Request, 29 | ) -> Output: 30 | """ 31 | 参数: 32 | - input: 传入参数 33 | - request: 请求 34 | """ 35 | try: 36 | headers, input_data = token_validation(input, request) 37 | if headers is None and input_data is None: 38 | logger.info(f"ip地址:{client_host}, input_data: {input_data}, 1返回信息: Token error") 39 | Output(code=403, msg="Token error", data={}) 40 | except: 41 | logger.info(f"ip地址:{client_host}, input_data: {input_data}, 2返回信息: headers or input_data is None") 42 | return Output(code=403, msg="headers or input_data is None", data={"headers": headers, "input_data": input_data}) 43 | try: 44 | client_host = request.client.host 45 | 46 | # data = get_resjs(input_data, headers = headers) 47 | data = await asyncio.gather(aioget_resjs(input_data, headers)) 48 | # 如果 data 是 list 类型, 则返回第一个元素 49 | if isinstance(data, list): 50 | data = data[0] 51 | logger.info(f"ip地址:{client_host}, input_data: {input_data}, 3返回信息: {data}") 52 | return Output(code=200, msg="success", data=data) 53 | except: 54 | logger.info(f"ip地址:{client_host}, input_data: {input_data}, 4返回信息: Server Error") 55 | return Output(code=500, msg="Server Error", data={}) 56 | -------------------------------------------------------------------------------- /app/gt4/iconmi/iconmi.py: -------------------------------------------------------------------------------- 1 | """ 2 | 主要实现图标点选验证码的识别 3 | """ 4 | import asyncio 5 | from concurrent.futures import ThreadPoolExecutor 6 | from fastapi import APIRouter, Request 7 | from app.common import Input, Output, responsesdict 8 | from app.utils import get_res, token_validation 9 | 10 | 11 | gt4iconmi = APIRouter() 12 | @gt4iconmi.post("/icon4mi", 13 | status_code=200, 14 | response_model=Output, 15 | summary = "四代图标点选 ", 16 | description = "这是一个四代的文字点选", 17 | response_description = "返回json格式", 18 | responses=responsesdict 19 | ) 20 | async def icon4mi( 21 | input: Input, 22 | request: Request, # 请求对象,用来获取请求头,请求体等信息 23 | ) -> Output: 24 | try: 25 | headers, input_data = token_validation(input, request) 26 | if headers is None and input_data is None: 27 | Output(code=403, msg="Token error", data={}) 28 | # data = get_res("gt4icon", input_data, headers = headers) 29 | loop = asyncio.get_event_loop() 30 | newexecutor = ThreadPoolExecutor(max_workers=4) 31 | data = await loop.run_in_executor(newexecutor, get_res, "gt4icon", input_data, headers) 32 | return Output(code=200, msg="success", data=data) 33 | except: 34 | return Output(code=500, msg="Server Error", data={}) 35 | -------------------------------------------------------------------------------- /app/handleprocess.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import time 3 | 4 | def get_pids(port)-> list | list[int]: 5 | try: 6 | # 尝试使用sudo权限运行lsof命令 7 | output = subprocess.run(['sudo', 'lsof', '-i', f':{port}'], capture_output=True, text=True, check=True).stdout 8 | except: 9 | try: 10 | # 如果sudo失败,则尝试不使用sudo权限运行lsof命令 11 | output = subprocess.run(['lsof', '-i', f':{port}'], capture_output=True, text=True, check=True).stdout 12 | except subprocess.CalledProcessError: 13 | # 处理lsof命令执行失败的情况 14 | return [] 15 | 16 | # 使用列表推导式获取进程ID 17 | # print(output) # 输出 str 类型的进程信息, 18 | # output.splitlines() # 按将字符串按照分隔符\n、\r、\r\n进行分割。 返回一个列表。 keepends=False:不保留分隔符(默认值)。 19 | pids = [int(line.split()[1]) for line in output.splitlines()[1:] if line.strip()] 20 | return pids 21 | 22 | def kill_process(port) -> None: 23 | pids = get_pids(port) 24 | if not pids: 25 | return 26 | for pid in pids: 27 | subprocess.run(['sudo', 'kill', '-9', str(pid)]) 28 | time.sleep(2) # 等待进程结束,不然太快了,易出错 29 | print(f'====== Killed processes on port: {port} ======') 30 | return 31 | 32 | if __name__ == '__main__': 33 | port = 9100 34 | print(get_pids(port)) 35 | # kill_process(port) 36 | print(f'Killed processes on port {port}') 37 | -------------------------------------------------------------------------------- /app/loadmodel.py: -------------------------------------------------------------------------------- 1 | from src.method.GTClick import GTClick 2 | from src.method.GTnine import GTnine 3 | from conf.config import gtconf 4 | 5 | gt3word = GTClick( 6 | pdetect = gtconf['word']['pdetect'], 7 | per = gtconf['word']['per'], 8 | pclass = gtconf['word']['pclass'], 9 | pclasstags = gtconf['word']['pclasstags'], 10 | chars_issorted = False, 11 | rmalpha = True, 12 | ) 13 | 14 | gt3nine = GTnine(pclass=gtconf['nine']['pclass']) 15 | 16 | gt4icon = GTClick( 17 | pdetect = gtconf['icon4mi']['pdetect'], 18 | per = gtconf['icon4mi']['per'], 19 | pclass = gtconf['icon4mi']['pclass'], 20 | pclasstags = gtconf['icon4mi']['pclasstags'], 21 | chars_issorted = True, 22 | rmalpha = True, 23 | ) 24 | 25 | -------------------------------------------------------------------------------- /app/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | 函数通用接口 3 | """ 4 | import base64 5 | import hashlib 6 | from typing import Optional, Union 7 | import requests 8 | from app.loadmodel import gt4icon, gt3nine, gt3word 9 | from app.common import Input 10 | from src.utils.utils import open_image 11 | from fastapi import Request 12 | from app.gt3.nine.ninejs import get_resjson, get_verify 13 | 14 | token_list = ["abc", "abc1", "abcdQwm123dnine4", "abcdQwm123gtword3"] # abcdQwm123dnine4 这个 已出售给了客户, 请勿删除 15 | 16 | def token_validation(input: Input, request: Request): 17 | # 请求对象,用来获取请求头,请求体等信息): 18 | ############################################################ 19 | ###### 用于 js逆向, 这里用不到, 可以删除 ################ 20 | ua = request.headers.get("user-agent", "") 21 | origin = request.headers.get("origin", "") 22 | referer = request.headers.get("referer", "") 23 | headers = { 24 | "user-agent": ua, 25 | "origin": origin, 26 | "referer": referer 27 | } 28 | ############################################################ 29 | ############################################################ 30 | #################### 简单的 token 验证 ####################### 31 | input_data = input.model_dump() 32 | token = input_data.get("token", "") 33 | key = input_data.get("key", "") 34 | 35 | if token not in token_list and key not in token_list: 36 | return None, None 37 | 38 | input_data.pop("token", None) 39 | input_data.pop("key", None) 40 | 41 | return headers, input_data 42 | ############################################################ 43 | 44 | 45 | 46 | def set_imageSource(data: dict, headers = None) -> Optional[bytes]: 47 | """ 48 | 把传入的图片数据保存到本地,并返回图片的二进制数据 49 | :param data: 传入的图片数据,是一个字典 50 | 如果 dataType 为 1, 则 imageSource 是一个 url, 则直接下载图片,保存图片(丢弃) 51 | 如果 dataType 为 2, 则 imageSource 是一个 base64 编码的字符串, 则解码后保存图片 52 | :return: 返回图片的二进制数据 53 | """ 54 | if data.get('dataType', None) == 1: 55 | rep = requests.get( 56 | data['imageSource'], 57 | verify=False, 58 | headers=headers 59 | ) 60 | imageSource = rep.content 61 | img = [imageSource] 62 | extraicon = None 63 | elif data.get('dataType', None) == 2: 64 | img = data.get('imageSource', None) 65 | extraicon = data.get('extraicon', None) 66 | assert img is not None, "imageSource is None" 67 | assert isinstance(img, list), "imageSource must be a list" 68 | else: 69 | assert False, "dataType is not 1 or 2" 70 | 71 | imageSource_list = [open_image(i, rmalpha=True) for i in img] 72 | extraicon_list = [open_image(i, rmalpha=True) for i in extraicon] if extraicon else None 73 | return imageSource_list, extraicon_list 74 | 75 | maplist = { 76 | "gt4icon": gt4icon.run, 77 | "gt3nine": gt3nine.run, 78 | "gt3word": gt3word.run 79 | } 80 | def get_res(obj_name: str, input_dict: dict, headers: dict): 81 | imageID = input_dict.get("imageID", "") 82 | imageSource_list, extraicon_list = set_imageSource(input_dict, headers=headers) 83 | # 获取对象 84 | res = maplist.get(obj_name)(imageSource_list[0], extraicon_list) # 调用对象的 run 方法 85 | targets_xyxy = res.targets_xyxy 86 | data = {"imageID": imageID, "res": targets_xyxy} 87 | return data 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /app/uvicorn_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 1, 3 | "disable_existing_loggers": false, 4 | "formatters": { 5 | "default": { 6 | "()": "uvicorn.logging.DefaultFormatter", 7 | "fmt": "%(asctime)s - %(levelprefix)s %(message)s", 8 | "use_colors": null 9 | }, 10 | "access": { 11 | "()": "uvicorn.logging.AccessFormatter", 12 | "fmt": "%(asctime)s - %(levelprefix)s %(client_addr)s - \"%(request_line)s\" %(status_code)s", 13 | "use_colors": true 14 | } 15 | }, 16 | "handlers": { 17 | "default": { 18 | "formatter": "default", 19 | "class": "logging.FileHandler", 20 | "filename": "log/uvicorn_default.log" 21 | }, 22 | "access": { 23 | "formatter": "access", 24 | "class": "logging.FileHandler", 25 | "filename": "log/uvicorn_access.log" 26 | } 27 | }, 28 | "loggers": { 29 | "uvicorn": { 30 | "handlers": [ 31 | "default" 32 | ], 33 | "level": "INFO" 34 | }, 35 | "uvicorn.error": { 36 | "level": "INFO" 37 | }, 38 | "uvicorn.access": { 39 | "handlers": [ 40 | "access" 41 | ], 42 | "level": "INFO", 43 | "propagate": false 44 | } 45 | } 46 | } -------------------------------------------------------------------------------- /assets/icon4/0a5573.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/0a5573.png -------------------------------------------------------------------------------- /assets/icon4/0ab207.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/0ab207.png -------------------------------------------------------------------------------- /assets/icon4/0ac1df.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/0ac1df.png -------------------------------------------------------------------------------- /assets/icon4/0b236d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/0b236d.png -------------------------------------------------------------------------------- /assets/icon4/0b41a3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/0b41a3.png -------------------------------------------------------------------------------- /assets/icon4/0c2974.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/0c2974.png -------------------------------------------------------------------------------- /assets/icon4/3f9cdf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/3f9cdf.png -------------------------------------------------------------------------------- /assets/icon4/94cb8d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/94cb8d.png -------------------------------------------------------------------------------- /assets/icon4/c59e7a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/c59e7a.png -------------------------------------------------------------------------------- /assets/icon4/imgs_00141_20081.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/imgs_00141_20081.png -------------------------------------------------------------------------------- /assets/icon4/imgs_00142_59845.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/imgs_00142_59845.png -------------------------------------------------------------------------------- /assets/icon4/imgs_00145_79210.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/imgs_00145_79210.png -------------------------------------------------------------------------------- /assets/icon4/imgs_00146_99736.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/imgs_00146_99736.png -------------------------------------------------------------------------------- /assets/icon4/imgs_00150_46045.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/imgs_00150_46045.png -------------------------------------------------------------------------------- /assets/nine3/img_00000_37458.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00000_37458.png -------------------------------------------------------------------------------- /assets/nine3/img_00001_54480.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00001_54480.png -------------------------------------------------------------------------------- /assets/nine3/img_00002_59670.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00002_59670.png -------------------------------------------------------------------------------- /assets/nine3/img_00003_47146.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00003_47146.png -------------------------------------------------------------------------------- /assets/nine3/img_00004_50080.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00004_50080.png -------------------------------------------------------------------------------- /assets/nine3/img_00005_67809.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00005_67809.png -------------------------------------------------------------------------------- /assets/nine3/img_00006_25480.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00006_25480.png -------------------------------------------------------------------------------- /assets/nine3/ques_00000_37458.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00000_37458.png -------------------------------------------------------------------------------- /assets/nine3/ques_00001_54480.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00001_54480.png -------------------------------------------------------------------------------- /assets/nine3/ques_00002_59670.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00002_59670.png -------------------------------------------------------------------------------- /assets/nine3/ques_00003_47146.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00003_47146.png -------------------------------------------------------------------------------- /assets/nine3/ques_00004_50080.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00004_50080.png -------------------------------------------------------------------------------- /assets/nine3/ques_00005_67809.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00005_67809.png -------------------------------------------------------------------------------- /assets/nine3/ques_00006_25480.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00006_25480.png -------------------------------------------------------------------------------- /assets/temp1_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/temp1_output.png -------------------------------------------------------------------------------- /assets/temp2_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/temp2_output.png -------------------------------------------------------------------------------- /assets/temp3_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/temp3_output.png -------------------------------------------------------------------------------- /assets/word3/pic_00355_54552.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00355_54552.png -------------------------------------------------------------------------------- /assets/word3/pic_00355_67108.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00355_67108.png -------------------------------------------------------------------------------- /assets/word3/pic_00355_91218.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00355_91218.png -------------------------------------------------------------------------------- /assets/word3/pic_00356_20119.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00356_20119.png -------------------------------------------------------------------------------- /assets/word3/pic_00356_24524.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00356_24524.png -------------------------------------------------------------------------------- /assets/word3/pic_00356_46593.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00356_46593.png -------------------------------------------------------------------------------- /assets/word3/pic_00356_73261.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00356_73261.png -------------------------------------------------------------------------------- /assets/word3/pic_00357_13273.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00357_13273.png -------------------------------------------------------------------------------- /conf/config.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | # 假设你的 YAML 配置文件名为 config.yaml 4 | yaml_file_path = 'conf/config.yaml' 5 | 6 | # 读取 YAML 文件 7 | with open(yaml_file_path, 'r') as file: 8 | config = yaml.safe_load(file) 9 | 10 | gtconf = config['gt']['click'] 11 | 12 | 13 | -------------------------------------------------------------------------------- /conf/config.yaml: -------------------------------------------------------------------------------- 1 | gt: 2 | click: 3 | word: 4 | # yolo 检测模型路径 5 | pdetect: model/g3word6300/detect.pt 6 | #孪生神经网络模型路径 ,如果没有则用 detect 模型 7 | per: "model/g3word6300/simvgg19.onnx" 8 | # yolo 分类模型(一定要填写下面的标签) 9 | # 会根据这个类别来进行分类, 最多支持两个类别, 如果是两个,则第一个是具有顺序的字符类别,第二个是目标类别 10 | # 当有两个标签的时候,具有顺序的一定要放在第一个 11 | # 一个 char 标签, 一个 target 标签 12 | pclass: "model/g3word6300/muti.pt" 13 | pclasstags: ["char", "target"] 14 | nine: 15 | # 可以直接根据九宫格把图片进行划分,因此只需要一个分类模型即可 16 | pclass: model/nine3/best.pt 17 | icon4mi: 18 | pdetect: model/icon4mi800/detect.pt 19 | per: "model/icon4mi800/simvgg19.onnx" 20 | pclass: model/icon4mi800/muti.pt 21 | pclasstags: ["target"] 22 | -------------------------------------------------------------------------------- /demo_geetest3nine.py: -------------------------------------------------------------------------------- 1 | from src.utils.nine import crop_nine 2 | from src.method.GTnine import GTnine 3 | from src.utils.outdata import Outfile 4 | from pathlib import Path 5 | from conf.config import gtconf 6 | 7 | 8 | 9 | 10 | if __name__ == "__main__": 11 | gt = GTnine(pclass=gtconf["nine"]["pclass"]) 12 | charimg = ["assets/nine3/ques_00001_54480.png"] 13 | background = "assets/nine3/img_00000_37458.png" 14 | 15 | outdir = "example/temp3" 16 | Path(outdir).mkdir(parents=True, exist_ok=True) 17 | Path(outdir).mkdir(exist_ok=True) 18 | test_img = crop_nine(background) 19 | for index, i in enumerate(test_img): 20 | i.save(f"{outdir}/{index}.png") 21 | 22 | out = gt.run(background, charimg) 23 | # 高 * 宽 24 | Outfile.to_labelme(background, out, size = (261, 300), output_dir = outdir ) 25 | Outfile.draw_image(background, 26 | chars_xyxy= out.get_value("charsImage"), 27 | targets_xyxy = out.get_value("targets_xyxy"), 28 | out_path=f"{outdir}/output.png" 29 | ) 30 | ## 结果在temp3/output.png 中 -------------------------------------------------------------------------------- /demo_geetest3word.py: -------------------------------------------------------------------------------- 1 | from src.method.GTClick import GTClick 2 | from src.utils.outdata import Outfile 3 | from pathlib import Path 4 | from conf.config import gtconf 5 | 6 | gtclick = GTClick( 7 | pdetect = gtconf["word"]['pdetect'], 8 | per = gtconf["word"]['per'], 9 | pclass = gtconf["word"]['pclass'], 10 | pclasstags =gtconf["word"]['pclasstags'], 11 | chars_issorted = False, 12 | rmalpha = True, 13 | ) 14 | 15 | 16 | if __name__ == '__main__': 17 | i = "assets/word3/pic_00356_20119.png" 18 | outdir = "example/temp1" 19 | Path(outdir).mkdir(parents=True, exist_ok=True) 20 | Path(outdir).mkdir(exist_ok=True) 21 | out = gtclick.run(i) 22 | # 高 * 宽 23 | Outfile.to_labelme(i, out, size=(384, 344), output_dir=outdir ) 24 | charsImage = out.charsImage 25 | targetsImage = out.targetsImage 26 | for index, temp in enumerate(charsImage): 27 | temp.save(f"{outdir}/char_{index}.png") 28 | for index, temp in enumerate(targetsImage): 29 | temp.save(f"{outdir}/target_{index}.png") 30 | 31 | Outfile.draw_image(i, out.chars_xyxy, out.targets_xyxy , f"{outdir}/output.png") 32 | exit() 33 | 34 | -------------------------------------------------------------------------------- /demo_geetest4icon4mi.py: -------------------------------------------------------------------------------- 1 | from src.method.GTClick import GTClick 2 | from src.utils.outdata import Outfile 3 | 4 | from pathlib import Path 5 | from conf.config import gtconf 6 | 7 | gtclick = GTClick( 8 | pdetect = gtconf['icon4mi']['pdetect'], 9 | per = gtconf['icon4mi']['per'], 10 | pclass = gtconf['icon4mi']['pclass'], 11 | pclasstags = gtconf['icon4mi']['pclasstags'], 12 | chars_issorted = True, 13 | rmalpha = True, 14 | ) 15 | if __name__ == '__main__': 16 | i = "assets/icon4/imgs_00142_59845.png" 17 | extraicon = ["assets/icon4/3f9cdf.png", "assets/icon4/c59e7a.png", "assets/icon4/94cb8d.png"] 18 | outdir = "example/temp2" 19 | Path(outdir).mkdir(parents=True, exist_ok=True) 20 | Path(outdir).parent.mkdir(exist_ok=True) 21 | out = gtclick.run(i, extraicon) 22 | # 高 * 宽 23 | Outfile.to_labelme(i, out, size = (200,300), output_dir = outdir ) 24 | 25 | targetsImage = out.targetsImage 26 | for index, temp in enumerate(targetsImage): 27 | temp.save(f"{outdir}/target_{index}.png") 28 | Outfile.draw_image(i, out.charsImage, out.targets_xyxy , f"{outdir}/output.png") 29 | exit() 30 | 31 | -------------------------------------------------------------------------------- /imgsrc/a.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/imgsrc/a.jpg -------------------------------------------------------------------------------- /imgsrc/w.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/imgsrc/w.png -------------------------------------------------------------------------------- /jsdemo_g3word_1.py: -------------------------------------------------------------------------------- 1 | import json, re, time, execjs, os, random, requests 2 | from typing import Literal, Union 3 | from webjs.word3.f1.tools import send_image2server, download_img 4 | 5 | 6 | URL = [ 7 | 'https://passport.bilibili.com/x/passport-login/captcha', # 初始化获取挑战 8 | 'https://api.geetest.com/gettype.php', # 初始化相关 9 | 'https://api.geetest.com/ajax.php', # 初始化相关 10 | 'https://api.geetest.com/get.php', # 获取图片 11 | ] 12 | 13 | Method = Literal['get', 'post', 'POST', 'GET'] 14 | pattern = re.compile(r'\((.*?)\)', re.S) 15 | 16 | 17 | class Gessts: 18 | # 设置请求session 19 | session = requests.Session() 20 | # 返回指定数据类型 21 | dataProcessors = { 22 | 'json': lambda resp: resp.json(), 23 | 'text': lambda resp: resp.text, 24 | 'contents': lambda resp: resp.content 25 | } 26 | # 请求方式 27 | methodProcessors = { 28 | 'get': session.get, 29 | 'post': session.post 30 | } 31 | 32 | def __init__(self): 33 | self.cookies = None 34 | 35 | self.headers = { 36 | 'authority': 'passport.bilibili.com', 37 | 'accept': '*/*', 38 | 'accept-language': 'zh-CN,zh;q=0.9', 39 | 'Referer': 'https://www.bilibili.com', 40 | 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', 41 | } 42 | 43 | def ajax_requests( 44 | self, url: str, method: Method, headers: dict, 45 | cookies: dict, params: Union[dict, str, None], 46 | jsonData: Union[dict, None], retryTimes: int = 5, 47 | timeOut: int = 20 48 | ) -> requests.Response: 49 | # 初始化请求发送器以及数据获取器 50 | 51 | methodProcessor = self.methodProcessors[method] 52 | for _ in range(retryTimes): 53 | try: 54 | return methodProcessor( 55 | url=url, 56 | headers=headers, 57 | cookies=cookies, 58 | params=params, 59 | data=json.dumps(jsonData, ensure_ascii=False, separators=(',', ':')), 60 | timeout=timeOut 61 | ) 62 | except Exception as e: 63 | print( 64 | f"错误链接: {url}", 65 | f"请求出现错误, 正在重试: {_}/{retryTimes}", 66 | f"错误信息为: {e}", 67 | sep='\n' 68 | ) 69 | else: 70 | raise '重试5次后仍然无法获取数据,可能是加密参数错误或者ip风控' 71 | 72 | def init_challenge(self): 73 | """ 74 | 通过 B 站的接口获取验证码的 challenge 和 gt 75 | """ 76 | url = URL[0] 77 | params = { 78 | 'source': 'main-fe-header', 79 | 't': '0.26599063907171017', 80 | } 81 | resp: dict = self.ajax_requests( 82 | url=url, 83 | params=params, 84 | method='get', 85 | jsonData=None, 86 | cookies=self.cookies, 87 | headers=self.headers 88 | ).json() 89 | challenge, gt = resp['data']['geetest'].values() 90 | return challenge, gt 91 | 92 | def get_all_info(self, challenge: str, gt: str, header:dict = None, cookies:dict = None) -> tuple: 93 | """ 94 | 根据 gt 和 challenge 获取 c, s 以及图片的地址等详细参数 95 | 这个函数是获取c,s以及坐标信息,这里的坐标是未经过处理的 96 | 参数: 97 | gt: str: 98 | challenge: str: 99 | header: dict: 请求头, 建议越全面越好, 比如: 100 | { 101 | 'authority': ***, 102 | 'accept': ***, 103 | 'accept-language': ***, 104 | 'Referer': ***, 105 | 'user-agent': ***, 106 | } 107 | cookies: dict: cookies ,这两个参数传递给get_all_info, 不过一般情况下 cookies 不需要传递 108 | 返回: 109 | tuple: 返回图片的地址(带前缀), gt, challenge, c, s 110 | """ 111 | if header is not None and isinstance(header, dict): 112 | hd = header 113 | else: 114 | hd = self.headers 115 | if cookies is not None and isinstance(cookies, dict): 116 | ck = cookies 117 | else: 118 | ck = self.cookies 119 | # ck = None # 这里不需要cookies, 但是为了方便调试,先注释掉 120 | now_stamp = int(time.time() * 1000) 121 | self._now_stamp = now_stamp 122 | par = { 123 | 'gt': gt, 124 | 'callback': f'geetest_{self._now_stamp}', 125 | } 126 | self.ajax_requests(url=URL[1], headers=hd, cookies=ck, jsonData=None, method='get', params=par) 127 | par.update({ 128 | 'challenge': challenge, 129 | 'lang': 'zh-cn', 130 | 'pt': '0', 131 | 'client_type': 'web', 132 | 'w': '', 133 | }) 134 | self.ajax_requests(url=URL[3], method='get', headers=hd, cookies=ck, params=par, jsonData=None) 135 | self.ajax_requests(url=URL[2], method='get', headers=hd, cookies=ck, params=par, jsonData=None) 136 | par.update({ 137 | 'is_next': 'true', 138 | 'type': 'click', 139 | 'https': 'false', 140 | 'protocol': 'https://', 141 | 'offline': 'false', 142 | 'product': 'embed', 143 | 'api_server': 'api.geetest.com', 144 | 'isPC': 'true', 145 | 'autoReset': 'true', 146 | 'width': '100%', 147 | 'callback': f'geetest_{self._now_stamp}', 148 | }) 149 | resp = self.ajax_requests(url=URL[3], method='get', headers=hd, cookies=ck, params=par, jsonData=None) 150 | # 上述顺序不能打乱,必须严格相同 151 | result: dict = json.loads(pattern.findall(resp.text)[0])['data'] 152 | pic: str = 'https://static.geetest.com' + result['pic'] 153 | c = result['c'] 154 | s = result['s'] 155 | assert "word" in pic, "这不是点选验证码" 156 | return pic, gt, challenge, c, s 157 | 158 | def xyxy2gt(self, xyxy_list: list[list[float]] ) -> str: 159 | """ 160 | 将坐标转换为极验需要的格式 161 | """ 162 | assert isinstance(xyxy_list, list), "xyxy_list 应该是一个列表" 163 | assert all(len(i) == 4 for i in xyxy_list), "xyxy_list 中的每个元素应该是一个长度为4的列表" 164 | new = [] 165 | # 处理坐标,变为极验需要的样子 166 | for code in xyxy_list: 167 | x, y = (code[0] + code[2]) / 2, (code[1] + code[3]) / 2 168 | final_x = int(round(int(x) / 333.375 * 100 * 100, 0)) 169 | final_y = int(round(int(y) / 333.375 * 100 * 100, 0)) 170 | final = f'{final_x}_{final_y}' 171 | new.append(final) 172 | 173 | return ','.join(new) 174 | 175 | def do_verify(self, challenge: str, gt: str, pic_name:str="image.jpg", header:dict = None, cookies:dict = None) -> dict: 176 | """ 177 | 处理验证的主要函数 178 | 参数: 179 | gt: str: 180 | challenge: str: 181 | pic_name: str: 图片的保存路径,本地路径, 默认为 image.jpg 182 | header: dict: 请求头, 建议越全面越好, 比如: 183 | { 184 | 'authority': ***, 185 | 'accept': ***, 186 | 'accept-language': ***, 187 | 'Referer': ***, 188 | 'user-agent': ***, 189 | } 190 | cookies: dict: cookies ,这两个参数传递给get_all_info, 不过一般情况下 cookies 不需要传递 191 | 返回: 192 | dict: 返回验证的结果 193 | """ 194 | pic, gt, challenge, c, s = self.get_all_info(challenge, gt, header, cookies) 195 | # 获取坐标信息 196 | download_img(pic, pic_name) 197 | codes = send_image2server(pic_name) 198 | print(f"处理之前的坐标: {codes}") 199 | stringCodes = self.xyxy2gt(codes) 200 | print( 201 | f'处理后坐标: {stringCodes}', 202 | f'图片地址: {pic}', 203 | f'gt:{gt}, challenge:{challenge}', 204 | f'c: {c}, s: {s}', sep='\n' 205 | ) 206 | with open('./webjs/word3/f1/demo.js', 'r', encoding='utf-8') as f: 207 | jscode = f.read() 208 | ctx = execjs.compile(jscode) 209 | print(f"stringCodes: {stringCodes}") 210 | w = ctx.call('get_w', stringCodes, pic, gt, challenge, c, s ) 211 | 212 | ### 方法 2 213 | # with open('./webjs/word3/f2/biblg3word.js', 'r', encoding='utf-8') as f: 214 | # jscode = f.read() 215 | # ctx = execjs.compile(jscode) 216 | # w = ctx.call('get_w', stringCodes, pic, gt, challenge, c, s ) 217 | 218 | params = { 219 | "gt": gt, 220 | "challenge": challenge, 221 | "lang": "zh-cn", 222 | "pt": "0", 223 | "client_type": "web", 224 | "w": w, 225 | "callback": f"geetest_{self._now_stamp}", 226 | } 227 | # print(f"参数: {params}") 228 | # 避免出现点选过快的情况 229 | time.sleep(random.uniform(3, 6)) 230 | resp = self.ajax_requests( 231 | url='https://api.geetest.com/ajax.php', 232 | method='get', 233 | headers=self.headers, 234 | cookies=self.cookies, 235 | jsonData=None, 236 | params=params 237 | ) 238 | # 处理 jsonp 数据 239 | try: 240 | resp_json = resp.json() 241 | except: 242 | resp_json = self.headle_jsonp(resp.text) 243 | return resp_json 244 | 245 | def is_valid_jsonp(self, text: str) -> bool: 246 | """ 247 | 判断是否是 JSONP 格式的数据,以 'geetest_数字(' 开头. 当然可以换成其他的, 比如: re.compile(r'\((.*?)\)', re.S) 248 | 参数: 249 | text: str: 需要判断的文本 250 | 返回: 251 | bool: 返回是否是 JSONP 格式的数据 252 | """ 253 | if not isinstance(text, str): 254 | return False 255 | pattern = re.compile(r"^geetest_\d+\(") # 256 | match = pattern.match(text) 257 | # 如果匹配成功,返回 True,否则返回 False 258 | return bool(match) 259 | def headle_jsonp(self, text) -> dict: 260 | """ 261 | 处理 JSONP 格式的数据,去掉头尾的无用字符 262 | 参数: 263 | text: str: 需要处理的文本 264 | 返回: 265 | dict: 返回处理后的数据,如果不是 JSONP 格式的数据,则抛出异常 266 | """ 267 | 268 | if self.is_valid_jsonp(text): 269 | jsonppattern = re.compile(r'\((.*?)\)', re.S) 270 | return json.loads(jsonppattern.findall(text)[0]) 271 | else: 272 | assert False, '不是 JSONP 格式的数据' 273 | 274 | 275 | if __name__ == '__main__': 276 | bili = Gessts() 277 | challenge, gt = bili.init_challenge() # 调用 b 站的接口获取 challenge 和 gt 278 | print(f"challenge: {challenge}, gt: {gt}") 279 | pic_name = os.path.join("temp", f"pic_{challenge[0:5]}.jpg") 280 | os.makedirs(os.path.dirname(pic_name), exist_ok=True) 281 | resp = bili.do_verify(challenge, gt, pic_name) #传递参数获取验证结果 282 | print(resp) -------------------------------------------------------------------------------- /jsdemo_g3word_2.py: -------------------------------------------------------------------------------- 1 | import requests, execjs, json, time, os 2 | from jsonpath import jsonpath 3 | from webjs.word3.f2.tools import download_img, headers, cookies 4 | from webjs.word3.f2.loadmodel import gt3word 5 | 6 | 7 | session = requests.Session() 8 | 9 | ### 1. 获取challenge 和 gt 10 | def get_challengeid(): 11 | params = { 12 | 'source': 'main-fe-header', 13 | 't': '0.7758525919151655', 14 | } 15 | url1 = 'https://passport.bilibili.com/x/passport-login/captcha' 16 | response = session.get(url1, params=params, cookies=cookies, headers=headers) 17 | resjson = response.json() 18 | challenge = jsonpath(resjson, '$..challenge')[0] 19 | gt = jsonpath(resjson, '$..gt')[0] 20 | return challenge, gt 21 | 22 | ### 2. 获取点击类型 23 | def get_click_type(gt, challenge): 24 | params = { 25 | 'gt': gt, 26 | 'challenge': challenge, 27 | 'lang': 'zh-cn', 28 | 'pt': '0', 29 | 'client_type': 'web', 30 | 'callback': f'geetest_{int(time.time() * 1000)}' 31 | } 32 | ## 获取点击类型 33 | response = session.get( 34 | 'https://api.geetest.com/ajax.php', headers=headers, params=params 35 | ) 36 | restext = response.text 37 | result = json.loads(restext[restext.find("(") + 1:restext.rfind(")")]) 38 | click_type = jsonpath(result, '$..result')[0] 39 | assert click_type == 'click', "点击类型不是 click" 40 | return click_type 41 | 42 | ### 3. 获取 json 详细信息 43 | def get_gtresponse(gt, challenge): 44 | params = { 45 | 'is_next': 'true', 46 | 'type': 'click', 47 | 'gt': gt, 48 | 'challenge': challenge, 49 | 'lang': 'zh-cn', 50 | 'https': 'false', 51 | 'protocol': 'https://', 52 | 'offline': 'false', 53 | 'product': 'embed', 54 | 'api_server': 'api.geetest.com', 55 | 'isPC': 'true', 56 | 'autoReset': 'true', 57 | 'width': '100%', 58 | 'callback': f'geetest_{int(time.time() * 1000)}', 59 | } 60 | response = session.get('https://api.geetest.com/get.php', params=params, headers=headers) 61 | restext = response.text 62 | result = json.loads(restext[restext.find("(") + 1:restext.rfind(")")]) 63 | myc = jsonpath(result, '$..c')[0] 64 | mys = jsonpath(result, '$..s')[0] 65 | pic = jsonpath(result, '$..pic')[0] 66 | return myc, mys, pic 67 | 68 | 69 | ### 4. 最后验证 70 | def validate(gt, challenge, w): 71 | params = { 72 | 'gt': gt, 73 | 'challenge': challenge, 74 | 'lang': 'zh-cn', 75 | 'pt': '0', 76 | 'client_type': 'web', 77 | "w": w, 78 | 'callback': f'geetest_{int(time.time() * 1000)}', 79 | } 80 | 81 | response = session.get('https://api.geetest.com/ajax.php', headers=headers, params=params) 82 | return response.text 83 | 84 | 85 | 86 | if __name__ == "__main__": 87 | 88 | ### 1. 获取challenge 和 gt 89 | challenge, gt = get_challengeid() 90 | 91 | ### 2. 获取点击类型 92 | click_type = get_click_type(gt, challenge) 93 | 94 | ### 3. 获取 json 详细信息 95 | myc, mys, pic = get_gtresponse(gt, challenge) 96 | time.sleep(1) 97 | ### 4. 下载图片,获取坐标, 并转为极验需要的格式 98 | os.makedirs('temp', exist_ok=True) 99 | download_img(pic, 'temp/a.jpg') 100 | out = gt3word.run('temp/a.jpg') 101 | # xyxy = poses2geetest(out.targets_xyxy) 102 | 103 | ### 5. 处理 w 参数 104 | time.sleep(1) 105 | import execjs 106 | with open("./webjs/word3/f2/biblg3word.js", 'r', encoding='utf-8') as f: 107 | jscode = f.read() 108 | ctx = execjs.compile(jscode) 109 | 110 | w = ctx.call('get_w', out.targets_xyxy, pic, gt, challenge, myc, mys ) 111 | 112 | ### 6. 最后验证 113 | res = validate(gt, challenge, w) 114 | print(res) 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /jsdemo_g4icon.py: -------------------------------------------------------------------------------- 1 | import requests, json, time, execjs, uuid, os 2 | from jsonpath import jsonpath 3 | from webjs.word3.f2.loadmodel import gt4icon 4 | from webjs.icon4.tools import xyxy2gtformat, download_img, headers, cookies, now_str 5 | """ 6 | 由于该网站比较严格,需要 header 和 cookies, 以及一些参数, 本代码只是一个示例, 不能直接运行 7 | """ 8 | params = { 9 | 'scene_type': '1', 10 | 'now': now_str, 11 | 'reason': 'user.mihoyo.com#/login/password', 12 | 'action_type': 'login_by_password', 13 | 'account': '19196951600', 14 | 't': now_str, 15 | } 16 | 17 | 18 | session = requests.session() 19 | for ii in range(10): 20 | try: 21 | url1 = 'https://webapi.account.mihoyo.com/Api/create_mmt' 22 | response = session.get(url1, params=params, cookies=cookies, headers=headers) 23 | gt1 = jsonpath(response.json(), '$..gt') 24 | mmt_key = jsonpath(response.json(), '$..mmt_key') 25 | if gt1: 26 | gt = gt1[0] 27 | break 28 | time.sleep(1) 29 | data = { 30 | 'account': '19194931000', 31 | 'password': 'MERS6bUrEYw9LMhf2mLL9j2CeWmdowp5Vgadu58jZeYN7LT1BdWIh8ASiD35xaFRoPKg3Uz5B4ka4P+QzQB6ViopvqRPUW3VOhcpZLM/RM8RIDvHOtRZzHwJjGyQfw/gbZf2YPbARE3kplpvbTYvcX/3SjSuLkqG0XJapIvfKFc=', 32 | 'is_crypto': 'true', 33 | 'mmt_key': mmt_key, 34 | 'source': 'user.mihoyo.com', 35 | 't': str(int(time.time() * 1000)), 36 | } 37 | url2 = 'https://webapi.account.mihoyo.com/Api/login_by_password' 38 | response = session.post(url2, cookies=cookies, headers=headers, data=data) 39 | time.sleep(1) 40 | gt2 = jsonpath(response.json(), '$..gt') 41 | if gt2: 42 | gt = gt2[0] 43 | break 44 | except: 45 | print(f"第{ii}次获取gt和mmt_key失败") 46 | continue 47 | 48 | 49 | params = { 50 | 'callback': f'geetest_{int(time.time() * 1000)}', 51 | 'captcha_id': gt, 52 | 'challenge': str(uuid.uuid4()), 53 | 'client_type': 'web', 54 | 'risk_type': 'icon', 55 | 'user_info': json.dumps({"mmt_key": mmt_key }, separators=(',', ':')), 56 | 'lang': 'zho', 57 | } 58 | 59 | 60 | response = session.get('https://gcaptcha4.geetest.com/load', params=params, cookies=cookies, headers=headers) 61 | res = response.text 62 | json_data = json.loads(res[res.index("(") + 1:res.rindex(")")]) 63 | os.makedirs("temp", exist_ok=True) 64 | 65 | # with open("temp/icon4.json", "w") as f: 66 | # json.dump(json_data, f, ensure_ascii=False, indent=2) 67 | 68 | imgs = jsonpath(json_data, '$..imgs')[0] 69 | download_img(imgs, "temp/a.png") 70 | 71 | ques = jsonpath(json_data, '$..ques')[0] 72 | ques_path = [f"temp/ques_{i}.png" for i in range(len(ques))] 73 | download_img(ques, ques_path) 74 | 75 | imgs_path = "temp/a.png" 76 | 77 | out = gt4icon.run(imgs_path, ques_path) 78 | xyxy = out.targets_xyxy 79 | 80 | userresponse = xyxy2gtformat(xyxy) 81 | 82 | with open("webjs/icon4/demo_g4icon.js", "r") as f: 83 | jscode = f.read() 84 | ctx = execjs.compile(jscode) 85 | 86 | 87 | lot_number = jsonpath(json_data, '$..lot_number')[0] 88 | pow_detail = jsonpath(json_data, '$..pow_detail')[0] 89 | detail_time = jsonpath(pow_detail, '$..datetime')[0] 90 | payload = jsonpath(json_data, '$..payload')[0] 91 | process_token = jsonpath(json_data, '$..process_token')[0] 92 | 93 | 94 | w = ctx.call("get_w2", gt, lot_number, detail_time, userresponse) 95 | 96 | 97 | 98 | params = { 99 | 'callback': f'geetest_{int(time.time() * 1000)}', 100 | 'captcha_id': gt, 101 | 'client_type': 'web', 102 | 'lot_number': lot_number, 103 | 'risk_type': 'icon', 104 | 'payload': payload, 105 | 'process_token': process_token, 106 | 'payload_protocol': '1', 107 | 'pt': '1', 108 | 'w': w, 109 | } 110 | 111 | response = session.get('https://gcaptcha4.geetest.com/verify', params=params, cookies=cookies, headers=headers) 112 | print(response.text) 113 | 114 | -------------------------------------------------------------------------------- /jsdemo_nine3.py: -------------------------------------------------------------------------------- 1 | import re, requests, time, uuid, execjs, json 2 | from lxml import etree 3 | from urllib.parse import urljoin 4 | from jsonpath import jsonpath 5 | from pathlib import Path 6 | from webjs.nine3.utils import * 7 | from webjs.word3.f2.loadmodel import gt3nine 8 | 9 | 10 | 11 | 12 | headers = { 13 | 'authority': 'gt4.geetest.com', 14 | 'accept': '*/*', 15 | 'accept-language': 'zh-CN,zh;q=0.9', 16 | 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" 17 | } 18 | 19 | def get_captchaId(): 20 | global headers 21 | session = requests.Session() 22 | response = session.get('https://gt4.geetest.com/', headers=headers) 23 | 24 | HTML = etree.HTML(response.text) 25 | js_url = HTML.xpath('//script[contains(@src, "/assets/index")]/@src')[0] 26 | 27 | res = session.get(urljoin("https://gt4.geetest.com", js_url), headers=headers).text 28 | captchaId = re.search('captcha_id:"([0-9a-z]+)"', res).group(1) 29 | return captchaId, session 30 | 31 | 32 | 33 | 34 | def get_resjson(captcha_id): 35 | params = { 36 | 'callback': f'geetest_{int(time.time() * 1000)}', 37 | 'captcha_id': captcha_id, 38 | 'challenge': str(uuid.uuid4()), 39 | 'client_type': 'web', 40 | 'risk_type': 'nine', 41 | 'lang': 'zh', 42 | } 43 | response = session.get('https://gcaptcha4.geetest.com/load', params=params, headers=headers) 44 | res = response.text 45 | resp_json = json.loads(res[res.find("(") + 1:res.rfind(")")]) 46 | captcha_type =jsonpath(resp_json, '$..captcha_type')[0] 47 | assert captcha_type == 'nine', "captcha_type should be nine" 48 | 49 | kk, ss = 1, 1 50 | imgs_dir = ques_dir = "temp" 51 | Path(imgs_dir).mkdir(parents=True, exist_ok=True) 52 | imgs, imgs_count = generate_url(jsonpath(resp_json, '$..imgs')[0]) 53 | imgs_path = generate_paths(imgs_dir, f"img_{kk}_{ss}", imgs_count) 54 | download_img(imgs, imgs_path) 55 | 56 | ques, ques_count = generate_url(jsonpath(resp_json, '$..ques')[0]) 57 | ques_path = generate_paths(ques_dir, f"ques_{kk}_{ss}", ques_count) 58 | download_img(ques, ques_path) 59 | return resp_json, captcha_id, imgs_path, ques_path 60 | 61 | 62 | 63 | if __name__ == "__main__": 64 | ## 九宫格 65 | captcha_id, session = get_captchaId() 66 | 67 | resp_json, captcha_id, imgs_path, ques_path = get_resjson(captcha_id) 68 | 69 | 70 | time.sleep(1) 71 | out = gt3nine.run(imgs_path[0], ques_path) 72 | userresponse = out.nine_rowcol # 九宫格的坐标 73 | 74 | #下面传递的参数都是从resp_json中获取的 75 | lot_number = jsonpath(resp_json, '$..lot_number')[0] 76 | nine_nums = jsonpath(resp_json, '$..nine_nums')[0] 77 | payload = jsonpath(resp_json, '$..payload')[0] 78 | payload_protocol = jsonpath(resp_json, '$..payload_protocol')[0] 79 | datetime = jsonpath(resp_json, '$..datetime')[0] 80 | process_token = jsonpath(resp_json, '$..process_token')[0] 81 | with open("webjs/nine3/demo.js", "r") as f: 82 | jscode = f.read() 83 | ctx = execjs.compile(jscode) 84 | w = ctx.call("get_w", captcha_id, lot_number, datetime, userresponse) 85 | 86 | params = { 87 | 'callback': f'geetest_{int(time.time() * 1000)}', 88 | 'captcha_id': captcha_id, 89 | 'client_type': 'web', 90 | 'lot_number': lot_number, 91 | 'risk_type': 'nine', 92 | 'payload': payload, 93 | 'process_token': process_token, 94 | 'payload_protocol': '1', 95 | 'pt': '1', 96 | 'w': w, 97 | } 98 | url3 = 'https://gcaptcha4.geetest.com/verify' 99 | response = requests.get(url3, params=params, headers=headers) 100 | 101 | print(response.text) -------------------------------------------------------------------------------- /model/sha256.txt: -------------------------------------------------------------------------------- 1 | 971bb3bcbc6fe55cc17d2d54a655830974bdb1e19f5303d6bf01de4c4b62e957 g3word6300/detect.onnx 2 | 749812ddae42483864282885418ea8b52254b543ade9655ed408d35ec2193324 g3word6300/detect.pt 3 | 9025a7cc1c77f22dd008e2c727a2b9260a62afc8b2ca4ed0fe3c9f9ab01596d4 g3word6300/muti.pt 4 | 046ac6bec58c1687c147d5e0a6caec2cb55ac32fcf1292aa1f08151d3af90ec1 g3word6300/simvgg19.onnx 5 | 92d5f3e1d32f574c07d8d48db46c015901d6e765ecc09c9610dfecb492e0078f icon4mi800/detect.pt 6 | 74709cc0aa96dca01c825de76d3c8427ec16f7bd43dff2f94c5178a160144fa1 icon4mi800/muti.pt 7 | bc1033eb99ee2055efd5bd1eb6ffdd8f77d37fa7516d1297e5a919fbda69efc4 icon4mi800/simvgg19.onnx 8 | 92cd98ce5c22fb538fd7eea41968f69d40714e2d9ef9424b4eb6fbefbde4bd04 nine3/best.pt 9 | 1054268fe09de13f8a547dbaf4ba881bc7a5774d907e8bd08526809e8bd04247 极验4点选图标类型第三种.zip 10 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "crypto-js": "^4.2.0", 4 | "node-rsa": "^1.1.1" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## 验证码识别 2 | 3 | - [x] 文字点选 4 | 5 | - [x] 图标点选 6 | 7 | - [x] 九宫格(四代 and 三代) 8 | 9 | PS: 对于文字点选, 感觉可以当做一个简单的针对性的 OCR 识别, 因为文字分类有 1500+ 个. 10 | 11 | ## 免责声明 12 | 13 | 本项目旨在研究深度学习在验证码攻防上的应用。仅供学习交流使用,请勿用于非法用途,不得在任何商业使用,本人不承担任何法律责任。 14 | 15 | ## 运行环境 16 | 17 | ```bash 18 | conda create -n yzm python=3.10 -y 19 | conda activate yzm 20 | pip install -r requirements.txt 21 | ``` 22 | 23 | - 理论上支持 3.10 及以上的版本, 但是没有测试过, 请自行测试. 3.10 以下的版本, 也自行测试 24 | 25 | ## 使用方法 26 | 27 | ```bash 28 | ## 文字点选 ---- 本地直接调用模型来识别(输出在 example/temp1/output.png 文件夹下) 29 | python demo_geetest3word.py 30 | 31 | ## 图标点选 ---- 本地直接调用模型来识别(输出在 example/temp2/output.png 文件夹下) 32 | python demo_geetest4icon4mi.py 33 | 34 | ## 九宫格 ---- 本地直接调用模型来识别(结果在 example/temp3/output.png ) 35 | python demo_geetest3nine.py 36 | ``` 37 | 38 | - 当然也可以传入自己的模型, 按照案例中的格式传入即可 39 | 40 | ## 用空请作者喝杯咖啡吗? 41 | 42 | 如果这个项目对您有帮助, 就请作者喝杯咖啡吧, 您的支持是作者最大的动力. 给个 star 也是对作者的支持. 43 | 44 | | Wechat Pay | Ali Pay | 45 | | ----------------------------------------- | ----------------------------------------- | 46 | | | | 47 | 48 | ## 模型下载 49 | 50 | - 下载下来以后, 把模型放到项目的指定位置即可, 一般放到 model 目录下(直接替换即可) 51 | 52 | - [huggingface:](https://huggingface.co/zscmmm/yzm) 53 | 54 | ## 简单的案例展示 55 | 56 | - 文字点选 57 | 58 | ```bash 59 | python demo_geetest3word.py 60 | ``` 61 | 62 | | 原图 | 效果 | 63 | | ------------------------------------------------------------- | ---------------------------------------------------- | 64 | | | | 65 | 66 | - 图标点选. 67 | 68 | 需要传入额外的小图标且注意传入顺序, 即验证码右上角的小图标. 如果是透明的,需要把移除透明度设置为 `True` 69 | 70 | ```bash 71 | python demo_geetest4icon4mi.py 72 | ``` 73 | 74 | | 原图 | 效果 | 75 | | -------------------------------------------------------------- | ---------------------------------------------------- | 76 | | | | 77 | 78 | - 九宫格 79 | 80 | 同理,也需要传入额外的小图标. 即验证码右上角的小图标 81 | 82 | ```bash 83 | python demo_geetest3nine.py 84 | ``` 85 | 86 | | 原图 | 效果 | 87 | | ------------------------------------------------------------- | ---------------------------------------------------- | 88 | | | | 89 | 90 | ## app 服务 91 | 92 | - 采用 fastapi 框架, 一个简单的验证码识别服务. 93 | 94 | ```bash 95 | python service.py # 启动服务, 地址: 127.0.0.1:9100 96 | 97 | ``` 98 | 99 | 测试的输入结果 100 | 101 | ``` 102 | {'code': 200, 'msg': 'success', 'data': {'imageID': 'string', 'res': [[206.15, 19.79, 281.41, 89.9], [221.8, 234.02, 294.53, 307.69], [40.36, 76.1, 110.0, 145.15], [130.65, 88.2, 204.8, 160.11]]}} 103 | {'code': 200, 'msg': 'success', 'data': {'imageID': 'string', 'res': [[99, 0, 198, 86], [0, 86, 99, 172], [99, 86, 198, 172]]}} 104 | {'code': 200, 'msg': 'success', 'data': {'imageID': 'string', 'res': [[8.05, 53.09, 60.95, 107.45], [152.49, 75.74, 205.45, 128.27], [219.18, 51.93, 271.23, 106.0]]}} 105 | ``` 106 | 107 | ## 增加 js 验证功能 108 | 109 | - 仅测试是否通过验证, 这东西具有时效性,不一定还能用 110 | 111 | ```bash 112 | # npm install # 安装依赖 113 | # python service.py #建议先启动服务, 然后再启动 js 代码 114 | 115 | python jsdemo_g3word_1.py #文字点选的一种方式 116 | python jsdemo_g3word_2.py #文字点选的另一种方式 (需要启动接口服务) --推荐 117 | python jsdemo_g4icon.py 118 | python jsdemo_nine3.py 119 | 120 | ``` 121 | 122 | 具体的 api,可以参考: http://localhost:9100/docs 123 | 124 | ## 模型训练流程 125 | 126 | 模型不具有泛化性,需要根据具体的验证码进行训练,可以参考案例中的模型训练方法, 其实训练比较简单, 关键是标注数据, 以及数据的预处理 127 | 128 | - 文字点选大概用了 6300+ 张 129 | 130 | - 图标点选大概用了 800+ 张 131 | 132 | - 九宫格大概用了 800+ 张 (不记得了, 太久了) 133 | 134 | 模型的主要训练过程 135 | 136 | 1. 标注数据, 先用几百张数据, 进行标注 137 | 2. 数据预处理 138 | 3. 模型训练, yolov8 目标检测和分类模型, siamese 网络 139 | 4. 模型测试, 在一个更大的数据集上测试, 然后看模型的效果, 把预测不好的数据人工标注, 重新训练 140 | 5. 重复 1--4 步骤, 直到模型效果满意(好像有 3.2w+ 张不重复的数据, 预测效果不错, 感觉有 99.99%的样子, 具体没有统计过了. ) 141 | 142 | 后面的图标和九宫格就只训练了几百张数据, 如果有需要可以继续, 接口已写好,可以生成 lableme 格式的数据, 然后来回倒腾训练即可. 143 | 144 | - 比如, 利用 yolo 模型进行分类和孪生网络进行分类, 查看二者分的一不一致, 以及 yolo 分类会不会错误分类, 如果存在上述的情况,把错误的图片单独复制出来,以及对应的 lableme 格式的标注文件也复制出来, 然后就是手动标注了. 145 | 146 | - 对于 九宫格和图标点选, 原理一样,直接根据模型进行分类裁剪,然后人工查看. 147 | 148 | - 对应 YOLO 模型,采用的是 pt 文件, 当然只要你愿意,可以采用 onnx 文件 149 | 150 | ```python 151 | # 一个简单实用 onnxruntime 的例子来运行 YOLOv8 生成的 onnx 模型 152 | # pip install onnx-predict-yolov8 或者找其他库, 也可以,这里做一个简单的演示 153 | 154 | from src.utils.utils import open_image 155 | import onnxruntime as ort 156 | from opyv8 import Predictor 157 | 158 | i = "assets/word3/pic_00356_20119.png" 159 | model = "model/g3word6300/detect.onnx" 160 | session = ort.InferenceSession(model, providers=[ "CPUExecutionProvider"]) 161 | predictor = Predictor(session, ["char", "target"]) 162 | img = open_image(i) 163 | print(predictor.predict(img)) 164 | ``` 165 | 166 | ## 起因 167 | 168 | - 本项目的目的是为了学习, 请勿用于非法用途, 本人不承担任何法律责任. 169 | 170 | - 作为深度学习的入门者, 一直想找一个比较有挑战性的项目, 看见网上都没有彻底开源此类模型, 本着开源的精神, 把自己的一些心得体会分享出来, 也希望大家能够一起交流, 一起进步. 171 | 172 | - 深度学习感觉需要掌握的只是太多, 只能从案例直接入手, 一开始的时候, 也是一头雾水, 从数据的标注, 数据的预处理, 模型的训练, 模型的测试, 一步一步的走下来, 也是一种成长, 也是一种收获. 173 | 174 | - 关键点在于: 最近终于有资金购买显卡了(还是一个 4060), 才入坑深度学习, 之前一直有心无力, 也是一种机缘吧. 苦逼的我!!! 175 | 176 | - 本人代码水平有限,可能存在很多 bug, 177 | 178 | ## 参考 179 | 180 | - [https://github.com/ultralytics/ultralytics](https://github.com/ultralytics/ultralytics) 181 | 182 | - [https://github.com/bubbliiiing/Siamese-pytorch](https://github.com/bubbliiiing/Siamese-pytorch) 183 | 184 | - [https://github.com/MgArcher/Text_select_captcha](https://github.com/MgArcher/Text_select_captcha) 185 | 186 | - [https://github.com/sijiyo/projects](https://github.com/sijiyo/projects) 187 | 188 | ## Star History 189 | 190 | [![Star History Chart](https://api.star-history.com/svg?repos=zscmmm/yzm_captcha&type=Date)](https://www.star-history.com/#zscmmm/yzm_captcha&Date) 191 | 192 | 193 | 194 | 195 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ultralytics 2 | onnxruntime-gpu 3 | pillow==10.2.0 4 | pandas 5 | numpy 6 | jsonpath 7 | lxml 8 | PyExecJS2 9 | requests 10 | loguru 11 | killport 12 | email-validator 13 | fastapi==0.110.0 14 | fastapi-restful==0.5.0 15 | pydantic==2.6.3 16 | uvicorn==0.27.1 -------------------------------------------------------------------------------- /service.py: -------------------------------------------------------------------------------- 1 | """ 2 | 识别服务,fastapi实现 3 | """ 4 | import time 5 | from fastapi import FastAPI 6 | from loguru import logger 7 | logger.remove() 8 | logger.add("log/app.log", rotation="500 MB", retention="10 days") 9 | from app.gt3.word.word import gt3word 10 | from app.gt3.nine.nine import gt3nine 11 | from app.gt3.nine.nine4jsapp import gt4ninejs 12 | from app.gt3.word.wordjsapp import gt3wordjs 13 | from app.gt4.iconmi.iconmi import gt4iconmi 14 | 15 | title = "验证码识别服务" 16 | description = "验证码识别服务" 17 | version = "1.0.0" 18 | contact = {"name": "XXXX", "email": "XXXXXX@gmail.com"} 19 | 20 | app = FastAPI( 21 | title=title, 22 | description=description, 23 | version=version, 24 | contact=contact 25 | ) 26 | 27 | @app.get("/", summary="根路径", response_description="欢迎信息") 28 | async def root() -> dict[str, str]: 29 | """ 30 | 欢迎访问验证码识别服务, 请查看文档 31 | """ 32 | try: 33 | return {"message": "Hello World"} 34 | except Exception as e: 35 | return {"error": str(e)} 36 | # 利用路由的方式, 实现模块化 37 | app.include_router(gt3word, prefix="/gt3", tags=["三代文字点选"]) 38 | app.include_router(gt3nine, prefix="/gt3", tags=["三代九宫格"]) 39 | app.include_router(gt4iconmi, prefix="/gt4", tags=["四代图标点选"]) 40 | app.include_router(gt4ninejs, prefix="/gt4js", tags=["四代九宫格js"]) 41 | app.include_router(gt3wordjs, prefix="/gt3js", tags=["三代文字点选js"]) 42 | 43 | 44 | 45 | if __name__ == '__main__': 46 | import uvicorn 47 | port = 9100 48 | # from app.handleprocess import kill_process #自己写的 49 | # kill_process(port) 50 | # #别人写的,可以在终端直接运行,也提供了一个函数 51 | import killport 52 | killport.kill_ports(ports=[port], view_only=False) 53 | time.sleep(2) # 等待进程结束,不然太快了,易出错 54 | 55 | log_config = "app/uvicorn_config.json" 56 | uvicorn.run("service:app", host="0.0.0.0", port=port, reload=True, 57 | log_config=log_config, use_colors=True) 58 | 59 | # 或者直接在命令行: uvicorn service:app --port 9100 --reload 60 | -------------------------------------------------------------------------------- /src/method/GTClick.py: -------------------------------------------------------------------------------- 1 |  2 | from src.utils.SiameseOnnx import SiameseOnnx 3 | from src.utils.YoloOnnx import YoloD, YoloC 4 | from src.utils.utils import open_image, find_max_probability, process_similarity_matrix 5 | from typing import Optional, Union 6 | from PIL import Image 7 | from src.utils.outdata import Coordination 8 | import numpy as np 9 | from pathlib import Path 10 | 11 | 12 | class GModel(object): 13 | def __init__( 14 | self, 15 | pdetect: str, 16 | per: str, 17 | pclass: Optional[str] = None, 18 | pclasstags: list[str] = ["icon"], #会根据这个类别来进行分类, 最多支持两个类别, 如果是两个,则第一个是具有顺序的字符类别,第二个是目标类别 19 | chars_issorted: bool = False, # 当 chars_issorted 为 True 时, 表示手动输入chars 类别,并且具有顺序, 只有 pclasstags 为1时才有效 20 | rmalpha: bool = False, # 只有在 chars_issorted 为 True 时才有效 21 | conf=0.65, 22 | verbose=False, 23 | **kwargs 24 | ): 25 | """ 26 | 实现图像点选功能, 通过 yolo 检测模型找到目标,然后利用孪生神经网络对图片进行排序, 找出对应相似度最高的图片,最后利用 yolo 分类模型进行字符识别 27 | 参数: 28 | - pdetect: str, yolo 检测模型路径 29 | - per: str, 孪生神经网络模型路径 30 | - pclass: Optional[str], yolo 分类模型路径 31 | - pclasstags: list[str], 会根据这个类别来进行分类, 最多支持两个类别, 如果是两个,则第一个是具有顺序的字符类别,第二个是目标类别 32 | - chars_issorted: bool, 当 chars_issorted 为 True 时, 表示手动输入chars 类别,并且具有顺序, 只有 pclasstags 为1时才有效 33 | - rmalpha: bool, 只有在 chars_issorted 为 True 时才有效, 表示是否去除图片的透明度 34 | - verbose: bool, 是否打印详细信息 35 | """ 36 | self.pdetect = pdetect 37 | self.per = per 38 | self.pclass = pclass 39 | self.pclasstags = pclasstags 40 | self.conf = conf 41 | self.verbose = verbose 42 | self.rmalpha = rmalpha 43 | self.chars_issorted = chars_issorted 44 | self.modeltype = None 45 | if len(self.pclasstags) == 1 and self.chars_issorted: 46 | self._chars_issorted = True 47 | else: 48 | self._chars_issorted = False 49 | assert len(self.pclasstags) in [1, 2], f"pclasstags length is not in [1, 2], but {len(self.pclasstags)}" 50 | 51 | self.modelyolod = YoloD(self.pdetect, task="detect", verbose=self.verbose, **kwargs) 52 | # 检查输入的类别是否在模型中 53 | if not self.per and not self.pclass: 54 | assert False, f"per and pclass is None" 55 | elif not self.per and self.pclass: 56 | self.modeltype = 1 57 | self.modelyoloc = YoloC(self.pclass, task="classify", verbose=self.verbose, **kwargs) 58 | elif self.per and not self.pclass: 59 | self.modeltype = 2 60 | self.modelpre = SiameseOnnx(self.per, providers=['CPUExecutionProvider']) 61 | else: 62 | self.modeltype = 3 63 | self.modelyoloc = YoloC(self.pclass, task="classify", verbose=self.verbose, **kwargs) 64 | self.modelpre = SiameseOnnx(self.per, providers=['CPUExecutionProvider']) 65 | 66 | self._img = None 67 | self._image_path = None 68 | self.extraicon = None 69 | 70 | 71 | ### ### 1. 利用 yolo 检测模型进行检测找到目标并返回具有顺序的坐标 72 | def detect_objects(self, img, **kwargs) -> tuple: 73 | """ 74 | 利用 yolo 检测模型进行检测找到目标并返回具有顺序的坐标 75 | 参数: 76 | - img: PIL.Image.Image, 图片对象 77 | - kwargs: dict, 其他参数 78 | 返回: 79 | - tuple: (chars_xyxy, targets_xyxy) 80 | img: PIL.Image.Image, 图片对象 81 | chars_xyxy: list, 字符坐标, [[x1, y1, x2, y2], [x1, y1, x2, y2], ...] 82 | targets_xyxy: list, 目标坐标 [[x1, y1, x2, y2], [x1, y1, x2, y2], ...] 83 | """ 84 | assert isinstance(img, Image.Image), f"img type is not Image.Image, but {type(img)}" 85 | imgsz1 = kwargs.get("imgsz", None) 86 | imgsz = imgsz1 if imgsz1 else self.modelyolod.imgsz 87 | 88 | results = self.modelyolod.predict(img, imgsz = imgsz, device = self.modelyolod._device, **kwargs) 89 | xyxy, xywh, box_name, info = self.modelyolod.extract_info(results) 90 | 91 | ### 2. 根据目标按照坐标进行分类 (这里为验证模型, 直接采用模型预测的类别进行分类过滤) 92 | assert self.pclasstags[-1] in box_name, f"pclasstags[-1]: {self.pclasstags[-1]} not in box_name: {box_name}" 93 | targets_xyxy = [i.get("xyxy") for i in info if i.get("classes") == self.pclasstags[-1]] 94 | chars_xyxy= None 95 | if not self._chars_issorted: 96 | assert len(self.pclasstags) == 2, f"pclasstags length is not 2, but {len(self.pclasstags)}" 97 | assert self.pclasstags[0] in box_name, f"pclasstags[0]: {self.pclasstags[0]} not in box_name: {box_name}" 98 | chars_xyxy = [i.get("xyxy") for i in info if i.get("classes") == self.pclasstags[0]] 99 | chars_xyxy.sort(key=lambda x: x[0]) 100 | if len(chars_xyxy) != len(targets_xyxy): 101 | min_len = min(len(chars_xyxy), len(targets_xyxy)) 102 | chars_xyxy = chars_xyxy[:min_len] 103 | targets_xyxy = targets_xyxy[:min_len] 104 | return chars_xyxy, targets_xyxy 105 | 106 | def per_sortimages(self, 107 | charsImage: list[Image.Image], 108 | targetsImage: list[Image.Image], 109 | **kwargs 110 | ) -> list[tuple[int, int]]: 111 | """ 112 | 利用孪生神经网络对图片进行排序, 找出对应相似度最高的图片 113 | :param img: PIL.Image.Image, 图片对象 114 | """ 115 | imgsz1 = kwargs.get("imgsz", None) 116 | if isinstance(imgsz1, (int, float)): 117 | imgsz = [imgsz1, imgsz1] 118 | elif isinstance(imgsz1, (list, tuple)) and len(imgsz1) == 1: 119 | imgsz = [imgsz1[0], imgsz1[0]] 120 | else: 121 | imgsz = imgsz1 if imgsz1 else self.modelpre.imgsz 122 | 123 | indices = self.modelpre.predict_list(charsImage, targetsImage, *imgsz) 124 | # 返回的是图片对象 125 | return indices 126 | 127 | 128 | def yolo_classify(self, charsImage, targetsImage, **kwargs): 129 | imax_name_list = [] 130 | imax_prob_list = [] 131 | prob_matrix = np.zeros((len(charsImage), len(targetsImage))) 132 | for i in range(len(charsImage)): 133 | row_name = [] 134 | row_prob = [] 135 | for j in range(len(charsImage)): 136 | result_char = self.modelyoloc.predict( 137 | charsImage[i], 138 | conf=self.conf, 139 | imgsz=self.modelyoloc.imgsz, 140 | verbose=self.verbose, 141 | device = self.modelyoloc._device, 142 | **kwargs 143 | ) 144 | result_target = self.modelyoloc.predict( 145 | targetsImage[j], 146 | conf=self.conf, 147 | imgsz=self.modelyoloc.imgsz, 148 | device = self.modelyoloc._device, 149 | verbose=self.verbose, 150 | **kwargs 151 | ) 152 | _, _, ic_top5name, ic_top5conf = self.modelyoloc.extract_info(result_char) 153 | _, _, it_top5name, it_top5conf = self.modelyoloc.extract_info(result_target) 154 | imax_name, imax_prob = find_max_probability(ic_top5name, ic_top5conf, it_top5name, it_top5conf) 155 | row_name.append(imax_name) 156 | row_prob.append(imax_prob) 157 | imax_name_list.append(row_name) 158 | imax_prob_list.append(row_prob) 159 | prob_matrix[i] = row_prob 160 | 161 | final_indices = process_similarity_matrix(prob_matrix) 162 | char_name = [imax_name_list[i][j] for i, j in final_indices] 163 | target_name = [imax_name_list[i][j] for i, j in final_indices] 164 | return final_indices, char_name, target_name 165 | 166 | 167 | 168 | 169 | 170 | class GTClick(GModel): 171 | def __init__(self, *args, **kwargs): 172 | super().__init__(*args, **kwargs) 173 | 174 | def openimage(self, image_path: Union[str, Path, Image.Image]): 175 | if isinstance(image_path, str) or isinstance(image_path, Path): 176 | assert Path(image_path).exists(), f"image_path: {image_path} is not exists" 177 | self._image_path = image_path 178 | elif isinstance(image_path, Image.Image): 179 | self._image_path = None 180 | else: 181 | assert False, f"image_path type is not str or Image.Image, but {type(image_path)}" 182 | self._img = open_image(image_path) 183 | 184 | def reset_outdata(self): 185 | self.coordination = Coordination() 186 | 187 | def run(self, image_path: Union[str, Path, Image.Image], extraicon:list[str, Image.Image] = None, **kwargs) -> Coordination: 188 | """ 189 | 根据图片路径进行返回结果 190 | :param image_path: str, 图片路径 191 | :param extraicon: list[str], 额外的图片路径, 用于排序 192 | """ 193 | self.openimage(image_path) 194 | self.reset_outdata() 195 | 196 | self.coordination.set_value("extraicon", extraicon) 197 | ## 1. 利用 yolo 检测模型进行检测找到目标,并返回具有顺序的坐标 198 | chars_xyxy, targets_xyxy = self.detect_objects(self._img, conf=self.conf, verbose=self.verbose, **kwargs) 199 | 200 | self.coordination.set_value("chars_xyxy", chars_xyxy) # 返回的 chars_xyxy 是按照顺序排列的 201 | self.coordination.set_value("targets_xyxy", targets_xyxy) 202 | self.coordination.set_value("targetsImage", [self._img.crop(xyxy) for xyxy in targets_xyxy]) 203 | if self._chars_issorted: 204 | charsImage_temp = [open_image(i, rmalpha=self.rmalpha) for i in self.coordination.get_value("extraicon")] 205 | self.coordination.set_value("charsImage", charsImage_temp) 206 | else: 207 | self.coordination.set_value("charsImage", [self._img.crop(xyxy) for xyxy in chars_xyxy]) 208 | 209 | charsImage, targetsImage = self.coordination.get_value("charsImage"), self.coordination.get_value("targetsImage") 210 | if self.modeltype in [2]: 211 | indices = self.per_sortimages(charsImage, targetsImage) 212 | char_name= None 213 | target_name = None 214 | elif self.modeltype in [1]: 215 | indices, char_name, target_name = self.yolo_classify(charsImage, targetsImage) 216 | else: 217 | indices0 = self.per_sortimages(charsImage, targetsImage) 218 | indices, char_name, target_name = self.yolo_classify(charsImage, targetsImage) 219 | # 如果不一样,应该以谁为准呢? --- 可以用来判断这张图片是否需要人工干预 220 | if indices0 != indices: 221 | self.coordination.set_value("exist_error", True) 222 | print(f"image_path: {image_path} indices0: {indices0} indices: {indices}") 223 | self.coordination.set_value("chars_name", char_name) 224 | self.coordination.set_value("targets_name", target_name) 225 | self.coordination.set_value("indices", indices) 226 | self.coordination.rank() 227 | return self.coordination 228 | 229 | 230 | 231 | -------------------------------------------------------------------------------- /src/method/GTnine.py: -------------------------------------------------------------------------------- 1 | from src.utils.YoloOnnx import YoloC 2 | from src.utils.utils import open_image, find_max_probability 3 | from typing import Optional, Union 4 | from PIL import Image 5 | from src.utils.outdata import Coordination 6 | from src.utils.SiameseOnnx import SiameseOnnx 7 | from src.utils.nine import crop_nine, flatten 8 | import os 9 | import pandas as pd 10 | 11 | 12 | 13 | 14 | class GTnine(): 15 | def __init__( 16 | self, 17 | pclass: Optional[str] = None, 18 | per: Optional[str] = None, 19 | conf=0.65, 20 | rmalpha: bool = True, 21 | verbose=False, 22 | ) -> None: 23 | ''' 24 | 暂时实现 yolo 分类模型, 感觉孪生神经网络模型不太适合 25 | ''' 26 | assert pclass or per, "pclass and per is None" 27 | self.pclass = pclass 28 | self.per = per 29 | 30 | 31 | self.modeltype = None 32 | self.conf = conf 33 | self.verbose = verbose 34 | self.rmalpha = rmalpha 35 | 36 | if self.pclass and not self.per: 37 | self.modeltype = 1 38 | self.modelyoloc = YoloC(self.pclass, task="classify", verbose=self.verbose) 39 | elif self.per and not self.pclass: 40 | self.modeltype = 2 41 | self.modelpre = SiameseOnnx(self.per, providers=['CPUExecutionProvider']) 42 | else: 43 | self.modeltype = 3 44 | self.modelyoloc = YoloC(self.pclass, task="classify", verbose=self.verbose) 45 | self.modelpre = SiameseOnnx(self.per, providers=['CPUExecutionProvider']) 46 | 47 | 48 | 49 | def _preprocess(self, charimg: Union[list, str], background:str)-> tuple[list[Image.Image], list[Image.Image]]: 50 | if isinstance(charimg, str): 51 | # 如果是 str 则表面是路径,一定要存在 52 | assert os.path.exists(charimg), f"{charimg} not exists" 53 | charimg = [charimg] 54 | 55 | charimg = [open_image(i, rmalpha=self.rmalpha) for i in charimg] 56 | 57 | if isinstance(background, str): 58 | assert os.path.exists(background), f"{background} not exists" 59 | self._image_path = background 60 | elif isinstance(background, Image.Image): 61 | self._image_path = None 62 | else: 63 | assert False, "background type is not supported" 64 | 65 | self._img = open_image(background) 66 | self._bgsize = self._img.size 67 | crop_nine_list = crop_nine(background) 68 | 69 | return charimg, crop_nine_list 70 | def _get_similarity_byper(self, charimg: list, crop_nine_list: list, num: int = None): 71 | mat = [] 72 | for index, i in enumerate(charimg): 73 | for index_j, j in enumerate(crop_nine_list): 74 | prob = self.modelpre.predict(i, j) 75 | mat.append({ 76 | "index": index, 77 | "name": None, 78 | "conf": prob, 79 | "msilce": index_j 80 | }) 81 | # 从 mat 中找出 conf > self.conf 的元素 82 | df = pd.DataFrame(mat) 83 | # 按照 conf 降序排列 84 | df = df.sort_values(by="conf", ascending=False) 85 | if num: 86 | df = df.head(num) 87 | else: 88 | df = df[df["conf"] > self.conf] 89 | 90 | # 按照 index 分组, 91 | dfg = df.groupby("index") 92 | silece_list = [] 93 | for name, group in dfg: 94 | silece_list.append(group["msilce"].tolist()) 95 | return silece_list, None 96 | 97 | def _get_similarity_byyolo(self, charimg: list, crop_nine_list: list, num: int = None): 98 | mat = [] 99 | for index, i in enumerate(charimg): 100 | results_char = self.modelyoloc.predict(i, 101 | conf=self.conf, 102 | imgsz=self.modelyoloc.imgsz, 103 | verbose= self.verbose, 104 | device = self.modelyoloc._device 105 | ) 106 | 107 | ic_top1name, ic_top1conf, ic_top5name, ic_top5conf = self.modelyoloc.extract_info(results_char) 108 | for index_j, j in enumerate(crop_nine_list): 109 | results = self.modelyoloc.predict(j, 110 | conf=self.conf, 111 | imgsz=self.modelyoloc.imgsz, 112 | verbose= self.verbose, 113 | device = self.modelyoloc._device 114 | ) 115 | it_top1name, it_top1conf, it_top5name, it_top5conf = self.modelyoloc.extract_info(results) 116 | 117 | imax_name, imax_prob = find_max_probability([ic_top1name], [ic_top1conf] , it_top5name, it_top5conf) 118 | # 返回的概率至少都是大于 0.5 的. 119 | mat.append({ 120 | "index": index, 121 | "top1name": ic_top1name, 122 | "top1conf": ic_top1conf, 123 | "name": imax_name, 124 | "conf": imax_prob, 125 | "msilce": index_j 126 | }) 127 | 128 | 129 | # 从 mat 中找出 conf > self.conf 的元素 130 | df = pd.DataFrame(mat) 131 | # 按照 conf 降序排列 132 | df = df.sort_values(by="conf", ascending=False) 133 | if num: 134 | df = df.head(num) 135 | else: 136 | df = df[df["conf"] > self.conf] 137 | 138 | # 按照 index 分组, 139 | dfg = df.groupby("index") 140 | silece_list = [] 141 | name_list = [] 142 | for name, group in dfg: 143 | silece_list.append(group["msilce"].tolist()) 144 | name_list.append(group["name"].tolist()) 145 | return silece_list, name_list 146 | 147 | 148 | def reset_outdata(self): 149 | self.coordination = Coordination() 150 | 151 | 152 | def run(self, background: Union[ str , list, Image.Image] , charimg: Union[str, list]) -> Coordination: 153 | """ 154 | background: 背景图,即大图, 如果是 list,只支持一个元素 155 | charimg: 具有顺序的小图标 156 | """ 157 | assert background, "background is None" 158 | if isinstance(background, list) and len(background) > 1: 159 | print("Warning: background is list, only support one element") 160 | background = background[0] 161 | elif isinstance(background, list) and len(background) == 1: 162 | background = background[0] 163 | 164 | 165 | self.reset_outdata() 166 | charimg, crop_nine_list = self._preprocess(charimg, background) 167 | 168 | self.coordination.set_value("charsImage", flatten(charimg)) 169 | 170 | if self.modeltype == 1: 171 | indices, names = self._get_similarity_byyolo(charimg, crop_nine_list) 172 | elif self.modeltype == 2: 173 | indices, names = self._get_similarity_byper(charimg, crop_nine_list) 174 | else: 175 | indices1, names1 = self._get_similarity_byyolo(charimg, crop_nine_list) 176 | indices2, names2 = self._get_similarity_byper(charimg, crop_nine_list) 177 | # 可以用来查找两个模型的差异 178 | indices = indices1 179 | 180 | 181 | rowcol = self.get_rowcol(indices) 182 | xyxy = self.get_xyxy(indices, self._bgsize) 183 | self.coordination.set_value("nine_rowcol", rowcol) 184 | # 展平 185 | xyxy = flatten(xyxy) 186 | names = flatten(names) 187 | self.coordination.set_value("targets_xyxy", xyxy) 188 | self.coordination.set_value("targets_name", names) 189 | 190 | return self.coordination 191 | def get_rowcol(self, indices: list): 192 | res = [] 193 | maplist = { 194 | "0": [1,1], 195 | "1": [1,2], 196 | "2": [1,3], 197 | "3": [2,1], 198 | "4": [2,2], 199 | "5": [2,3], 200 | "6": [3,1], 201 | "7": [3,2], 202 | "8": [3,3], 203 | } 204 | res = [maplist[str(i)] for i in indices[0]] 205 | return res 206 | def get_xyxy(self, indices: list, size: tuple): 207 | res = [] 208 | width, height = size 209 | h = height // 3 - 1 210 | w = width // 3 - 1 211 | for ind in range(len(indices)): 212 | row = [] 213 | for j in indices[ind]: 214 | x = (j % 3) * w 215 | y = (j // 3) * h 216 | row.append([x, y, x+w, y+h]) 217 | res.append(row) 218 | return res 219 | 220 | 221 | 222 | if __name__ == "__main__": 223 | from conf.config import gtconf 224 | gt = GTnine(pclass=gtconf['nine']['pclass']) 225 | charimg = "assets/nine3/ques_00000_37458.png" 226 | background = "assets/nine3/img_00000_37458.png" 227 | 228 | out = gt.run(background, charimg) 229 | from src.utils.outdata import Outfile 230 | Outfile.draw_image(background, 231 | chars_xyxy= out.get_value("charsImage"), 232 | targets_xyxy = out.get_value("targets_xyxy"), 233 | out_path="temp3/temp1.png" 234 | ) 235 | 236 | 237 | 238 | 239 | -------------------------------------------------------------------------------- /src/utils/MakeCharImage.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Optional, Union 2 | from PIL import Image, ImageDraw, ImageFont 3 | import os 4 | 5 | class MakeCharImage: 6 | 7 | def __init__( 8 | self, 9 | text: str, 10 | image_size: Tuple[int, int] = (120, 120), 11 | offset: Union[int, float] = 0, 12 | font_path: str = None, 13 | output_path: Union[str, None] = None 14 | ) -> None: 15 | assert len(text) == 1, "text must be a single character" 16 | self.text = text 17 | self.image_size = image_size 18 | self.offset = offset 19 | if not font_path: 20 | font_path = os.path.join(os.path.dirname(__file__), "simsun.ttc") 21 | self.font_path = font_path 22 | self.output_path = output_path 23 | self.generated_image = self.generate_charimage() 24 | if self.output_path: 25 | self.generated_image.save(self.output_path) 26 | 27 | @classmethod 28 | def load_font(cls, font_path: str, font_size: int) -> Optional[ImageFont.FreeTypeFont]: 29 | try: 30 | return ImageFont.truetype(font_path, font_size) 31 | except Exception as e: 32 | # print(f"Error loading font: {e}") 33 | return None 34 | 35 | def generate_charimage(self) -> Image.Image: 36 | """ 37 | generate_image generates an image with the given text and size. 38 | """ 39 | assert len(self.text) == 1, "text must be a single character" 40 | assert len(self.image_size) == 2, "image_size must be a tuple of 2 integers (width, height)" 41 | 42 | font = self.load_font(self.font_path, min(self.image_size)) 43 | image_width, image_height = self.image_size 44 | background_color = (255, 255, 255) # 白色背景 45 | text_color = (0, 0, 0) # 黑色文本 46 | image = Image.new("RGB", (image_width, image_height), background_color) 47 | draw = ImageDraw.Draw(image) 48 | 49 | # 绘制原始文本 50 | _, _, width, height = draw.textbbox((0, 0), text=self.text, font=font) 51 | x = (image_width - width) // 2 52 | y = (image_height - height) // 2 53 | draw.text((x, y), self.text, font=font, fill=text_color) 54 | 55 | # 绘制加粗文本(偏移一像素) 56 | if self.offset > 0: 57 | draw.text((x - self.offset, y - self.offset), self.text, font=font, fill=text_color) 58 | draw.text((x + self.offset, y - self.offset), self.text, font=font, fill=text_color) 59 | draw.text((x - self.offset, y + self.offset), self.text, font=font, fill=text_color) 60 | draw.text((x + self.offset, y + self.offset), self.text, font=font, fill=text_color) 61 | 62 | return image 63 | 64 | 65 | 66 | if __name__ == "__main__": 67 | text = "利" 68 | image_size = (120, 120) 69 | aa = MakeCharImage(text, image_size, 0.9, output_path="output.png") 70 | -------------------------------------------------------------------------------- /src/utils/SiameseOnnx.py: -------------------------------------------------------------------------------- 1 | import onnxruntime as ort 2 | from src.utils.siamese import detect_image, sigmoid 3 | from src.utils.utils import open_image, process_similarity_matrix 4 | 5 | class SiameseOnnx(): 6 | def __init__(self, model_path: str, providers: list[str] = ['CPUExecutionProvider']) -> None: 7 | options = ort.SessionOptions() 8 | options.enable_profiling=False 9 | options.add_session_config_entry('session.load_model_format', 'ONNX') 10 | siamese_model = ort.InferenceSession( 11 | model_path, 12 | sess_options = options, 13 | providers=providers 14 | ) 15 | self.model = siamese_model 16 | input_info = siamese_model.get_inputs() 17 | self.imgsz = input_info[0].shape[2:4] 18 | 19 | 20 | 21 | 22 | def getmodel_inputname(self): 23 | """ 24 | 获取模型的信息 25 | """ 26 | input_info = self.model.get_inputs() 27 | input_name = [input.name for input in input_info] 28 | # input_shape = [input.shape for input in input_info] 29 | return input_name 30 | 31 | 32 | 33 | 34 | def predict_list(self, 35 | img1: list, 36 | img2: list, 37 | image_width: int =None, 38 | image_height: int =None 39 | ) -> list[tuple[int, int]]: 40 | """ 41 | 输入图片, 对 img1 中的每张图片和 img2 中的每张图片进行比较,找出对应最高的相似度图片 42 | img1: 图片1的路径,是一个 list, 根据 img1 的顺序返回 43 | img2: 图片2的路径,是一个 list 44 | image_width: 图片的宽 45 | image_height: 图片的高 46 | indices: 是否返回索引 47 | return: 返回的是 img1 和 img2 的索引,比如: [(0, 1), (1, 0)], 长度为 img1 的长度 48 | """ 49 | # 检查 list 中的数据类型 50 | assert isinstance(img1, list), "img1 should be a list" 51 | assert isinstance(img2, list), "img2 should be a list" 52 | assert len(img1) <= len(img2), "img1 should be less than or equal to img2" 53 | # 补全 image_width 和 image_height 54 | if image_width is None: 55 | image_width = self.imgsz[1] 56 | if image_height is None: 57 | image_height = self.imgsz[0] 58 | 59 | img1 = [open_image(i) for i in img1] 60 | img2 = [open_image(i) for i in img2] 61 | 62 | if len(img1) == 0 and len(img2) == 0: 63 | assert False, "img1 or img2 should not be empty" 64 | elif len(img1) == 1 and len(img2) == 1: 65 | return [(0, 0)] 66 | 67 | similarity_matrix = [] 68 | for i in img1: 69 | sim_row = [] 70 | for j in img2: 71 | sim_row.append(self.predict(i, j, image_width, image_height)) 72 | similarity_matrix.append(sim_row) 73 | final_indices = process_similarity_matrix(similarity_matrix) 74 | return final_indices 75 | 76 | 77 | 78 | 79 | 80 | 81 | def predict(self, img1: str, img2: str, image_width: int = 60, image_height: int = 60)-> float: 82 | """ 83 | 输入图片的路径,做预处理, 然后预测两个图片的相似度 84 | img1: 图片1的路径 或者 图片1的二进制数据,或者 Image.Image 85 | img2: 图片2的路径 或者 图片2的二进制数据 或者 Image.Image 86 | image_width: 图片的宽 87 | image_height: 图片的高 88 | return: 相似度, 保留两位小数 89 | """ 90 | image_1 = open_image(img1) 91 | image_2 = open_image(img2) 92 | 93 | photo_1, photo_2 = detect_image(image_1, image_2, image_width, image_height) 94 | inputs_name = self.getmodel_inputname() 95 | inputs = { 96 | inputs_name[0]: photo_1, 97 | inputs_name[1]: photo_2 98 | } 99 | outs = self.model.run(None, inputs) 100 | 101 | prob = sigmoid(outs[0][0][0]) 102 | return round(prob, 2) 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /src/utils/YoloOnnx.py: -------------------------------------------------------------------------------- 1 | from ultralytics import YOLO 2 | from src.utils.utils import get_onnx_shape 3 | from ultralytics.utils.checks import cuda_is_available 4 | 5 | 6 | class YoloD(YOLO): 7 | def __init__(self, *args, **kwargs): 8 | super().__init__(*args, **kwargs) 9 | 10 | tempmodel = self.model 11 | if isinstance(tempmodel, str) and tempmodel.endswith(".onnx"): 12 | self.imgsz = get_onnx_shape(tempmodel) 13 | else: 14 | self.imgsz = self.model.args["imgsz"] 15 | if isinstance(self.imgsz, (int, float)): 16 | self.imgsz = [self.imgsz, self.imgsz] 17 | elif isinstance(self.imgsz, (list, tuple)) and len(self.imgsz) == 1: 18 | self.imgsz = [self.imgsz[0], self.imgsz[0]] 19 | if cuda_is_available(): 20 | self._cuda = True 21 | self._device = 'cuda:0' 22 | else: 23 | self._cuda = None 24 | self._device = None 25 | 26 | 27 | def extract_info(self, results)-> tuple[list[list], list[list], list[str], list[dict]]: 28 | """ 29 | 只对单张图片进行检测处理 30 | 返回的是 xyxy, xywh, box_name, info 31 | xyxy: [[x1, y1, x2, y2], [x1, y1, x2, y2], ...] 32 | xywh: [[x, y, w, h], [x, y, w, h], ...] 33 | box_name: ["A", "B", ...] 34 | info: [ 35 | { 36 | "classes": "A", 37 | "prob": 0.9, 38 | "xyxy": [x1, y1, x2, y2], 39 | "xywh": [x, y, w, h] 40 | }, 41 | ... 42 | ] 43 | 44 | """ 45 | assert self.task == "detect", "detect only support detect task" 46 | assert len(results) == 1, "detect only support single image" 47 | xyxy_all = [] 48 | xywh_all = [] 49 | name_all = results[0].names 50 | #取出name_all 的 value 51 | box_name = [] 52 | info = [] 53 | for result in results: 54 | box_cls = result.boxes.cls.tolist() 55 | xyxy = result.boxes.xyxy.tolist() 56 | xywh = result.boxes.xywh.tolist() 57 | probs = result.probs 58 | for i in range(len(box_cls)): 59 | box_name.append(name_all[box_cls[i]]) 60 | info.append({ 61 | "classes": name_all[box_cls[i]], 62 | "prob": 1 if not probs else round(probs[i].item(), 2), 63 | "xyxy": xyxy[i], 64 | "xywh": xywh[i], 65 | }) 66 | xyxy_all.append(xyxy) 67 | xywh_all.append(xywh) 68 | break # 只对单张图片进行检测处理 69 | 70 | return xyxy_all[0], xywh_all[0], box_name, info 71 | 72 | 73 | 74 | class YoloC(YOLO): 75 | def __init__(self, *args, **kwargs): 76 | super().__init__(*args, **kwargs) 77 | tempmodel = self.model 78 | if isinstance(tempmodel, str) and tempmodel.endswith(".onnx"): 79 | self.imgsz = get_onnx_shape(tempmodel) 80 | else: 81 | self.imgsz = self.model.args["imgsz"] 82 | if isinstance(self.imgsz, (int, float)): 83 | self.imgsz = [self.imgsz, self.imgsz] 84 | elif isinstance(self.imgsz, (list, tuple)) and len(self.imgsz) == 1: 85 | self.imgsz = [self.imgsz[0], self.imgsz[0]] 86 | if cuda_is_available(): 87 | self._cuda = True 88 | self._device = 'cuda:0' 89 | else: 90 | self._cuda = None 91 | self._device = None 92 | 93 | def extract_info(self, result) -> tuple[str, float, list[str], list[float]]: 94 | """ 95 | 对结果进行分类 96 | :param result: 识别结果 97 | :return: top1name, top1conf, top5name, top5conf 98 | top1name: 最大概率对应的类别 99 | top1conf: 最大概率 100 | top5name: 前五的类别 101 | top5conf: 前五的概率 102 | """ 103 | assert self.task == "classify", "classify only support classify task" 104 | assert len(result) == 1, "classify only support single image" 105 | all_names = result[0].names ## 类别字典 106 | top1 = result[0].probs.top1 #最大概率对应的索引 107 | top1name = all_names[top1] #最大概率对应的类别 108 | top1conf = result[0].probs.top1conf.tolist() #最大概率 109 | top5 = result[0].probs.top5 #前五的索引 110 | top5conf = result[0].probs.top5conf.tolist() #前五的概率 111 | top5name = [all_names[i] for i in top5] 112 | # info = { 113 | # "top1": top1, 114 | # "top1name": top1name, 115 | # "top1conf": top1conf, 116 | # "top5": top5, 117 | # "top5name": top5name, 118 | # "top5conf": top5conf 119 | # } 120 | return top1name, top1conf, top5name, top5conf 121 | 122 | 123 | -------------------------------------------------------------------------------- /src/utils/nine.py: -------------------------------------------------------------------------------- 1 | from src.utils.utils import open_image 2 | from PIL import Image 3 | 4 | def flatten(lst, num=1): 5 | """ 6 | 将嵌套的列表展开 7 | :param lst: 嵌套的列表 8 | :param num: 展开的层数 9 | :return: 展开后的列表 10 | """ 11 | flattened_list = [] 12 | for item in lst: 13 | if isinstance(item, list) and num > 0: 14 | flattened_list.extend(flatten(item, num - 1)) 15 | else: 16 | flattened_list.append(item) 17 | return flattened_list 18 | 19 | 20 | def crop_nine(input_file: str) -> list[Image.Image]: 21 | """ 22 | 将图片裁剪成九宫格 23 | :param input_file: 输入图片路径 24 | :return: 九宫格图片列表, 按照从左到右,从上到下的顺序排列 25 | """ 26 | 27 | img = open_image(input_file) 28 | width, height = img.size 29 | h = height // 3 30 | w = width // 3 31 | crop_img_list = [] 32 | for i in range(3): 33 | for j in range(3): 34 | x = j * w 35 | y = i * h 36 | crop_img = img.crop((x, y, x+w, y+h)) 37 | crop_img_list.append(crop_img) 38 | return crop_img_list -------------------------------------------------------------------------------- /src/utils/outdata.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Optional, List, Union 2 | from dataclasses import dataclass 3 | from src.utils.yoloclass import Shape, Labelme 4 | from pathlib import Path 5 | import os 6 | from PIL import Image, ImageDraw, ImageFont 7 | @dataclass 8 | class Coordination: 9 | charsImage: List[Image.Image] = None 10 | targetsImage: List[Image.Image] = None 11 | chars_xyxy: List[List[float]] = None 12 | targets_xyxy: List[List[float]] = None 13 | chars_name: List[str] = None 14 | targets_name: List[str] = None 15 | extraicon: Optional[Union[str,Image.Image]] = None 16 | indices: Optional[Tuple[int, int]] = None 17 | exist_error: Optional[bool] = None 18 | nine_rowcol: Optional[Tuple[int, int]] = None 19 | chars_xywh: Optional[List[List[float]]] = None 20 | targets_xywh: Optional[List[List[float]]] = None 21 | def set_value(self, key, value): 22 | assert hasattr(self, key), f"key {key} not in Coordination" 23 | setattr(self, key, value) 24 | 25 | def get_value(self, key): 26 | assert hasattr(self, key), f"key {key} not in Coordination" 27 | return getattr(self, key) 28 | 29 | def rank(self): 30 | assert self.indices is not None, "indices is None" 31 | 32 | self.chars_rank, self.targets_rank = zip(*self.indices) 33 | 34 | self.charsImage = [self.charsImage[i] for i in self.chars_rank] if self.charsImage else None 35 | self.targetsImage = [self.targetsImage[i] for i in self.targets_rank] if self.targetsImage else None 36 | self.chars_xyxy = [self.chars_xyxy[i] for i in self.chars_rank] if self.chars_xyxy else None 37 | self.targets_xyxy = [self.targets_xyxy[i] for i in self.targets_rank] if self.targets_xyxy else None 38 | # 保留 2 位小数 39 | self.chars_xyxy = [[round(j, 2) for j in i] for i in self.chars_xyxy] if self.chars_xyxy else None 40 | self.targets_xyxy = [[round(j, 2) for j in i] for i in self.targets_xyxy] if self.targets_xyxy else None 41 | self.chars_xywh = self.xyxy2xywh(self.chars_xyxy) if self.chars_xyxy else None 42 | self.targets_xywh = self.xyxy2xywh(self.targets_xyxy) if self.targets_xyxy else None 43 | def to_dict(self): 44 | exclude = ["charsImage", "targetsImage", "indices", "chars_rank", "targets_rank"] 45 | return {k: v for k, v in self.__dict__.items() if v is not None and k not in exclude} 46 | 47 | def xyxy2xywh(self, xyxy): 48 | if not xyxy: 49 | return None 50 | assert isinstance(xyxy, list), "xyxy should be a list" 51 | assert all([len(i) == 4 for i in xyxy]), "xyxy should have 4 elements" 52 | return [[ (i[0] + i[2]) / 2, (i[1] + i[3]) / 2, i[2] - i[0], i[3] - i[1]] for i in xyxy] 53 | 54 | 55 | 56 | @dataclass 57 | class Outfile: 58 | 59 | @staticmethod 60 | def concatenate_images(images: List[Image.Image]) -> Image.Image: 61 | widths, heights = zip(*(i.size for i in images)) 62 | 63 | total_width = sum(widths) 64 | max_height = max(heights) 65 | 66 | concatenated_image = Image.new('RGB', (total_width, max_height)) 67 | 68 | x_offset = 0 69 | for img in images: 70 | concatenated_image.paste(img, (x_offset, 0)) 71 | x_offset += img.width 72 | 73 | return concatenated_image 74 | 75 | @staticmethod 76 | def check_format(data): 77 | if not isinstance(data, list): 78 | return False # 如果不是列表,则格式不符合要求 79 | 80 | for sublist in data: 81 | if not isinstance(sublist, list): 82 | return False # 如果子列表不是列表,或者长度不等于4,则格式不符合要求 83 | for item in sublist: 84 | if not isinstance(item, (int, float)): 85 | return False # 如果子列表中的元素不是浮点数,则格式不符合要求 86 | return True 87 | 88 | @staticmethod 89 | def load_font(font_path: str, font_size: int) -> Optional[ImageFont.FreeTypeFont]: 90 | try: 91 | return ImageFont.truetype(font_path, font_size) 92 | except Exception as e: 93 | # print(f"Error loading font: {e}") 94 | return None 95 | 96 | @staticmethod 97 | def draw_image(image_path:str, chars_xyxy:list =None, targets_xyxy:list = None, out_path=None): 98 | assert os.path.exists(image_path), f"{image_path} not exists" 99 | assert chars_xyxy is not None or targets_xyxy is not None, "chars_xyxy or targets_xyxy must be not None" 100 | 101 | img = Image.open(image_path) 102 | out_path = image_path.replace(".png", "_out.png") if out_path is None else out_path 103 | 104 | # ##把坐标在图中画出来 105 | draw = ImageDraw.Draw(img) 106 | try: 107 | font_path = os.path.join(os.path.dirname(__file__), "simsun.ttc") 108 | except: 109 | font_path = None 110 | 111 | if Outfile.check_format(chars_xyxy): 112 | font = Outfile.load_font(font_path, (chars_xyxy[0][2] - chars_xyxy[0][0]) // 2) 113 | for index, xyxy in enumerate(chars_xyxy): 114 | 115 | draw.rectangle(xyxy, outline="red", width=3) 116 | # draw.text((xyxy[0], xyxy[1]), str(index), fill="blue", font=font) 117 | x = xyxy[0] 118 | y = xyxy[1] 119 | text = str(index) 120 | offset = 0.1 121 | text_color = "blue" 122 | draw.text((x, y), text, fill= text_color, font=font) 123 | draw.text((x - offset, y - offset), text, font=font, fill=text_color) 124 | draw.text((x + offset, y - offset), text, font=font, fill=text_color) 125 | draw.text((x - offset, y + offset), text, font=font, fill=text_color) 126 | draw.text((x + offset, y + offset), text, font=font, fill=text_color) 127 | 128 | if chars_xyxy and all([isinstance(i, Image.Image) for i in chars_xyxy]): 129 | # 把这些图按顺序拼接起来, 放到图的左下角 130 | concat_image = Outfile.concatenate_images(chars_xyxy) 131 | # 按比例进行缩放 132 | concat_image = concat_image.resize((concat_image.width//4, concat_image.height // 4)) 133 | # 将拼接后的图像放到原始图像的左下角 134 | img.paste(concat_image, (0, img.height - concat_image.height)) 135 | 136 | 137 | if targets_xyxy is not None: 138 | font = Outfile.load_font(font_path, (targets_xyxy[0][2] - targets_xyxy[0][0]) // 2) 139 | for index, xyxy in enumerate(targets_xyxy): 140 | draw.rectangle(xyxy, outline="blue", width=3) 141 | x = xyxy[0] 142 | y = xyxy[1] 143 | text = str(index) 144 | offset = 0.1 145 | text_color = "red" 146 | draw.text((x, y), text, fill= text_color, font=font) 147 | draw.text((x - offset, y - offset), text, font=font, fill=text_color) 148 | draw.text((x + offset, y - offset), text, font=font, fill=text_color) 149 | draw.text((x - offset, y + offset), text, font=font, fill=text_color) 150 | draw.text((x + offset, y + offset), text, font=font, fill=text_color) 151 | img.save(out_path) 152 | 153 | 154 | @staticmethod 155 | def word3to_labelme(image_path: str, 156 | chars_xyxy: list, 157 | targets_xyxy: list, 158 | chars_name: list, 159 | targets_name: list, 160 | size: Tuple[int, int] = (384, 344), 161 | output_dir:str = None, 162 | showWarning: bool = True 163 | ) -> Labelme: 164 | if chars_xyxy is None and chars_name is None and showWarning: 165 | print("Warning: chars_xyxy and chars_name are None") 166 | assert len(targets_xyxy) == len(targets_name), "targets_xyxy and targets_name should have the same length" 167 | assert isinstance(targets_xyxy, list), "targets_xyxy should be a list" 168 | assert isinstance(targets_name, list), "targets_name should be a list" 169 | 170 | os.makedirs(output_dir, exist_ok=True) 171 | 172 | labelme = Labelme() 173 | labelme.set_size(*size) 174 | labelme.set_imagePath(os.path.join("../", Path(image_path).parent.stem, Path(image_path).name)) 175 | for i in range(len(targets_xyxy)): 176 | if chars_xyxy: 177 | ichar_shape1 = Shape() #创建一个空的shape 178 | ichar_shape1.set_points(chars_xyxy[i]) 179 | ichar_shape1.set_group_id(int(i)) 180 | ichar_shape1.set_label("char") 181 | ichar_shape1.set_text(chars_name[i]) 182 | ichar_shape1.set_description(chars_name[i]) 183 | labelme.shapes.append(ichar_shape1.to_dict()) 184 | if targets_xyxy: 185 | itarget_shape1 = Shape() #创建一个空的shape 186 | itarget_shape1.set_points(targets_xyxy[i]) 187 | itarget_shape1.set_group_id(int(i)) 188 | itarget_shape1.set_label("target") 189 | itarget_shape1.set_text(targets_name[i]) 190 | itarget_shape1.set_description(targets_name[i]) 191 | labelme.shapes.append(itarget_shape1.to_dict()) 192 | 193 | new_json = os.path.join(output_dir, f"{Path(image_path).stem}.json") 194 | labelme.to_json_file(new_json) 195 | return labelme 196 | 197 | 198 | @staticmethod 199 | def to_labelme(image_path:str, info:Coordination, size: Tuple[int, int] = (384, 344), output_dir:str = None) -> Labelme: 200 | """ 201 | image_path: 图片路径, 不读取图片, 只是用来生成 json 文件 202 | info: Coordination 类型, 包含了图片的坐标信息 203 | output_dir: 输出的文件夹 204 | """ 205 | assert isinstance(info, Coordination), "input should be Coordination" 206 | # 调用 word3to_labelme 207 | return Outfile.word3to_labelme(image_path, 208 | info.chars_xyxy, info.targets_xyxy, 209 | info.chars_name, info.targets_name, 210 | size=size, 211 | output_dir=output_dir) 212 | 213 | 214 | -------------------------------------------------------------------------------- /src/utils/siamese.py: -------------------------------------------------------------------------------- 1 | """ 2 | 来自项目: _ 3 | """ 4 | 5 | from PIL import Image 6 | from PIL.PngImagePlugin import PngImageFile 7 | import numbers 8 | import numpy as np 9 | 10 | 11 | def sigmoid(x): 12 | return 1 / (1 + np.exp(-x)) 13 | 14 | 15 | 16 | def cvtColor(image): 17 | if len(np.shape(image)) == 3 and np.shape(image)[2] == 3: 18 | return image 19 | else: 20 | image = image.convert('RGB') 21 | return image 22 | 23 | def preprocess_input(x): 24 | x /= 255.0 25 | return x 26 | 27 | def resize(img, size, interpolation=Image.BILINEAR): 28 | if isinstance(size, int): 29 | w, h = img.size 30 | if (w <= h and w == size) or (h <= w and h == size): 31 | return img 32 | if w < h: 33 | ow = size 34 | oh = int(size * h / w) 35 | return img.resize((ow, oh), interpolation) 36 | else: 37 | oh = size 38 | ow = int(size * w / h) 39 | return img.resize((ow, oh), interpolation) 40 | else: 41 | return img.resize(size[::-1], interpolation) 42 | 43 | def crop(img, i, j, h, w): 44 | return img.crop((j, i, j + w, i + h)) 45 | 46 | def center_crop(img, output_size): 47 | if isinstance(output_size, numbers.Number): 48 | output_size = (int(output_size), int(output_size)) 49 | w, h = img.size 50 | th, tw = output_size 51 | i = int(round((h - th) / 2.)) 52 | j = int(round((w - tw) / 2.)) 53 | return crop(img, i, j, th, tw) 54 | 55 | def letterbox_image(image, size, letterbox_image): 56 | w, h = size 57 | iw, ih = image.size 58 | if letterbox_image: 59 | '''resize image with unchanged aspect ratio using padding''' 60 | scale = min(w/iw, h/ih) 61 | nw = int(iw*scale) 62 | nh = int(ih*scale) 63 | 64 | image = image.resize((nw,nh), Image.BICUBIC) 65 | new_image = Image.new('RGB', size, (128,128,128)) 66 | new_image.paste(image, ((w-nw)//2, (h-nh)//2)) 67 | else: 68 | if h == w: 69 | new_image = resize(image, h) 70 | else: 71 | new_image = resize(image, [h ,w]) 72 | new_image = center_crop(new_image, [h ,w]) 73 | return new_image 74 | 75 | 76 | 77 | def detect_image(image_1:PngImageFile, image_2:PngImageFile, image_width:int =60, image_height:int = 60): 78 | """ 79 | 输入图片的路径,做预处理,返回预处理后的图片 80 | image_1: PngImageFile数据类型, 由 PIL.Image.open() 读取的图片 81 | image_2: PngImageFile数据类型 82 | image_width: 图片的宽 83 | image_height: 图片的高 84 | return: photo_1, photo_2 85 | """ 86 | assert isinstance(image_1, Image.Image), "image_1 should be a Image.Image" 87 | #---------------------------------------------------------# 88 | # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 89 | #---------------------------------------------------------# 90 | image_1 = cvtColor(image_1) 91 | image_2 = cvtColor(image_2) 92 | 93 | #---------------------------------------------------# 94 | # 对输入图像进行不失真的resize 95 | #---------------------------------------------------# 96 | image_1 = letterbox_image(image_1, [image_width, image_height],False) 97 | image_2 = letterbox_image(image_2, [image_width, image_height],False) 98 | #---------------------------------------------------------# 99 | # 归一化+添加上batch_size维度 100 | #---------------------------------------------------------# 101 | photo_1 = preprocess_input(np.array(image_1, np.float32)) 102 | photo_2 = preprocess_input(np.array(image_2, np.float32)) 103 | 104 | 105 | #---------------------------------------------------# 106 | # 添加上 batch 维度,才可以放入网络中预测 107 | #---------------------------------------------------# 108 | photo_1 = np.expand_dims(np.transpose(photo_1, (2, 0, 1)), 0).astype(np.float32) 109 | photo_2 = np.expand_dims(np.transpose(photo_2, (2, 0, 1)), 0).astype(np.float32) 110 | 111 | return photo_1, photo_2 112 | 113 | 114 | 115 | if __name__ == '__main__': 116 | image_1 = "testimg/pic_00273_99704_target0.png" 117 | image_2 = "testimg/pic_00273_99704_target0.png" 118 | image_1 = Image.open(image_1) 119 | image_2 = Image.open(image_2) 120 | image_width = 60 121 | image_height = 60 122 | 123 | photo_1, photo_2 = detect_image(image_1, image_2, image_width, image_height) 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /src/utils/simsun.ttc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/src/utils/simsun.ttc -------------------------------------------------------------------------------- /src/utils/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Optional 2 | from PIL import Image 3 | from io import BytesIO 4 | import numpy as np 5 | import onnxruntime as ort 6 | import base64 7 | import os 8 | from pathlib import Path 9 | def get_onnx_shape(onnx_model: str) -> tuple: 10 | providers = ['CPUExecutionProvider'] 11 | options = ort.SessionOptions() 12 | options.enable_profiling=False 13 | model = ort.InferenceSession( 14 | onnx_model, 15 | sess_options = options, 16 | providers=providers 17 | ) 18 | input_info = model.get_inputs() 19 | input_shape = input_info[0].shape[2:4] # 先高度后宽度 20 | del model 21 | return input_shape 22 | 23 | 24 | def process_similarity_matrix(similarity_matrix:np.ndarray | list[list[float]]) -> list[tuple[int, int]]: 25 | """ 26 | 处理相似度矩阵, 返回最大值的索引且不重复 27 | :param similarity_matrix: 相似度矩阵 28 | :return: 行索引和列索引的列表 29 | """ 30 | if not isinstance(similarity_matrix, np.ndarray): 31 | similarity_matrix = np.array(similarity_matrix) 32 | # rows, cols = similarity_matrix.shape 33 | final_indices = [] 34 | while True: 35 | #无参数的时候,把矩阵按行展开成一维数组,然后返回最大值的索引,如果有多个最大值,返回第一个 36 | max_index = np.argmax(similarity_matrix) 37 | #根据索引返回行和列 38 | max_i, max_j = np.unravel_index(max_index, similarity_matrix.shape) 39 | if similarity_matrix[max_i][max_j] == -np.inf: 40 | break 41 | final_indices.append((max_i, max_j)) 42 | similarity_matrix[max_i, :] = -np.inf 43 | similarity_matrix[:, max_j] = -np.inf 44 | 45 | # 对 final_indices 按照行进行排序 46 | final_indices.sort(key=lambda x: x[0]) 47 | return final_indices 48 | 49 | def is_base64(s: str) -> bool: 50 | """ 51 | 判断字符串是否是 base64 编码 52 | :param s: 字符串 53 | :return: 是否是 base64 编码 54 | """ 55 | try: 56 | # 如果能解码则返回 True 57 | base64.b64encode(base64.b64decode(s)) == s 58 | return True 59 | except Exception: 60 | return False 61 | 62 | #打开图片 63 | def open_image(file: str, rmalpha:bool = False, output_path: Optional[str] = None) -> Image.Image: 64 | if isinstance(file, list): 65 | print("Warning: Multiple images provided") 66 | img = [open_image(f, rmalpha, output_path) for f in file] 67 | 68 | elif isinstance(file, np.ndarray): 69 | img = Image.fromarray(file) 70 | elif is_base64(file) and isinstance(file, str): 71 | img = Image.open(BytesIO(base64.b64decode(file))) 72 | elif isinstance(file, bytes): 73 | img = Image.open(BytesIO(file)) 74 | elif isinstance(file, Image.Image): 75 | img = file 76 | elif isinstance(file, Path) and file.exists(): 77 | img = Image.open(file) 78 | elif isinstance(file, str) and os.path.exists(file): 79 | img = Image.open(file) 80 | else: 81 | assert False, "file type is not supported" 82 | 83 | if img.mode == 'RGBA' and rmalpha: 84 | # 检查图像是否具有 alpha 通道, 创建一个白色背景的图像 85 | white_bg = Image.new("RGB", img.size, (255, 255, 255)) 86 | # 将原始图像粘贴到白色背景上 87 | white_bg.paste(img, mask=img.split()[3]) 88 | img = white_bg 89 | img = img.convert('RGB') 90 | elif img.mode == 'RGB': 91 | pass 92 | else: 93 | img = img.convert('RGB') 94 | 95 | if output_path: 96 | img.save(output_path) 97 | return img 98 | 99 | 100 | #调整坐标 101 | def adjust_coordinates(coordinates: list, image_size: Tuple[float, float], toint: bool= True) -> list: 102 | """ 103 | 输入提供的是两个坐标点: 格式为 [[x1, y1], [x2, y2]],其中 x1, y1 是左上角坐标,x2, y2 是右下角坐标。 104 | 或者是 105 | 四个坐标点: 格式为 [x1, y1, x2, y2],其中 x1, y1 是左上角坐标,x2, y2 是右下角坐标。 106 | 如果不是左上角和右下角,则进行调整 107 | :param coordinates: 坐标 108 | :return: 调整后的坐标,格式为 [x1, y1, x2, y2] 109 | """ 110 | # 确保提供的坐标是一个包含两个点的列表 111 | if len(coordinates) == 2: 112 | # 获取坐标点的 x 和 y 值 113 | x1, y1, x2, y2 = coordinates[0][0], coordinates[0][1], coordinates[1][0], coordinates[1][1] 114 | elif len(coordinates) == 4: 115 | x1, y1, x2, y2 = coordinates 116 | else: 117 | raise ValueError("Invalid coordinates format. It should be either [[x1, y1], [x2, y2]] or [x1, y1, x2, y2]") 118 | 119 | width, height = image_size 120 | # 确保坐标在图像范围内 121 | x1 = min(x1, width) 122 | x2 = min(x2, width) 123 | y1 = min(y1, height) 124 | y2 = min(y2, height) 125 | 126 | # 判断是否是左上角和右下角,如果不是则进行调整 127 | if x1 > x2 or y1 > y2: 128 | print("Warning: Input coordinates do not match the expected format, adjusting coordinates.") 129 | # 交换 x 和 y 值,以确保左上角和右下角的关系 130 | x1, y1, x2, y2 = min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2) 131 | # print("Adjusted coordinates to match the expected format.") 132 | # 返回调整后的坐标 133 | if toint: 134 | return [int(x1), int(y1), int(x2), int(y2)] 135 | else: 136 | return [x1, y1, x2, y2] 137 | 138 | 139 | # 根据坐标进行裁剪 140 | def crop_and_save_image(input_path, coordinates:list, output_path:str): 141 | # 打开图像 142 | original_image = Image.open(input_path) 143 | image_size = original_image.size 144 | x1, y1, x2,y2 = adjust_coordinates(coordinates, image_size, toint=False) 145 | # 裁剪图像 146 | cropped_image = original_image.crop((x1, y1, x2, y2)) 147 | # 保存图像 148 | cropped_image.save(output_path) 149 | 150 | 151 | def find_max_probability(ichars_five_name, ichars_five_prob, itargets_five_name, itargets_five_prob): 152 | # 创建字典将名称和概率关联起来 153 | ichars_dict = dict(zip(ichars_five_name, ichars_five_prob)) 154 | itarget_dict = dict(zip(itargets_five_name, itargets_five_prob)) 155 | 156 | merged_dict = ichars_dict.copy() 157 | for key, value in itarget_dict.items(): 158 | if key in merged_dict: 159 | merged_dict[key] += value 160 | else: 161 | merged_dict[key] = value 162 | # 找到概率最大的名称 163 | max_name = max(merged_dict, key=merged_dict.get) 164 | max_prob = merged_dict[max_name] 165 | return max_name, round(max_prob / 2, 2) 166 | 167 | 168 | if __name__ == '__main__': 169 | 170 | similarity_matrix = [ 171 | [0.1, 0, 0.5], 172 | [0.7, 1, 0.8], 173 | [1, 0.6, 1], 174 | ] 175 | 176 | # 调用函数处理相似度矩阵 177 | final_indices = process_similarity_matrix(similarity_matrix) 178 | 179 | # 打印处理后的索引列表 180 | print(final_indices) 181 | 182 | ichars_three_name = ['a', 'b', 'c'] 183 | ichars_three_prob = [0.1, 0.2, 0.3] 184 | itargets_three_name = ['b1', 'c1', 'd1'] 185 | itargets_three_prob = [0.2, 0.3, 0.4] 186 | 187 | # 调用函数 188 | max_name, max_prob = find_max_probability(ichars_three_name, ichars_three_prob, itargets_three_name, itargets_three_prob) 189 | 190 | # 打印结果 191 | print("最大概率对应的名称:", max_name) 192 | print("最大概率:", max_prob) 193 | 194 | 195 | 196 | 197 | 198 | -------------------------------------------------------------------------------- /src/utils/yoloclass.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import json 3 | from pathlib import Path 4 | 5 | class Shape(): 6 | def __init__(self, points = None): 7 | # points = [[x1,y1, x2,y2], [x1,y1, x2,y2], ...] 8 | # 必须是上面的形式 9 | self.label = "" 10 | if not points: 11 | self.points = [] 12 | else: 13 | if not isinstance(points, list) and len(points) == 4: 14 | raise Exception("points must be list and len(points) == 4") 15 | self.points = [[float(points[0]), float(points[1])], [float(points[2]), float(points[3])]] 16 | self.group_id = None 17 | self.description = "" 18 | self.shape_type = "rectangle" 19 | self.flags = {} 20 | self.text = "" 21 | self.mask = None 22 | def set_group_id(self, group_id): 23 | assert isinstance(group_id, int), "group_id must be int" 24 | self.group_id = group_id 25 | def set_label(self, label): 26 | self.label = label 27 | def set_points(self, points): 28 | if not isinstance(points, list) and len(points) == 4: 29 | raise Exception("points must be list and len(points) == 4") 30 | self.points = [[float(points[0]), float(points[1])], [float(points[2]), float(points[3])]] 31 | 32 | def set_shape_type(self, shape_type): 33 | self.shape_type = shape_type 34 | def set_description(self, description): 35 | self.description = description 36 | def set_text(self, text): 37 | self.text = text 38 | def to_dict(self): 39 | return self.__dict__ 40 | 41 | 42 | 43 | class Labelme(): 44 | def __init__(self): 45 | self.version = "5.4.1" 46 | self.flags = {} 47 | self.shapes = [] 48 | self.imagePath = "" 49 | self.imageData = None 50 | self.imageHeight = 200 51 | self.imageWidth = 300 52 | 53 | def set_imagePath(self, path): 54 | self.imagePath = path 55 | 56 | def set_size(self, h, w): 57 | self.imageHeight = h 58 | self.imageWidth = w 59 | 60 | 61 | def set_poses_list(self, 62 | poses: List[List[int]], 63 | label:list[str], 64 | text: list[str] = None, 65 | description: list[str] = None, 66 | ): 67 | if not isinstance(poses, list): 68 | raise Exception("poses must be list") 69 | if not isinstance(label, list): 70 | raise Exception("label must be list") 71 | 72 | if len(poses) != len(label) and len(label) ==1: 73 | label = label * len(poses) 74 | 75 | if len(poses) != len(label): 76 | raise Exception("len(poses) must be equal to len(label)") 77 | 78 | for i in range(len(poses)): 79 | shape = Shape(poses[i]) 80 | shape.set_label(label[i]) 81 | if text: 82 | shape.set_text(text[i]) 83 | if description: 84 | shape.set_description(description[i]) 85 | self.shapes.append(shape.to_dict()) 86 | 87 | def set_shape_text(self, ind:int, texts:str): 88 | if ind >= len(self.shapes): 89 | raise Exception("ind must be less than len(self.shapes)") 90 | self.shapes[ind]["text"] = texts 91 | 92 | def set_shape_description(self, ind:int, description:str): 93 | if ind >= len(self.shapes): 94 | raise Exception("ind must be less than len(self.shapes)") 95 | self.shapes[ind]["description"] = description 96 | 97 | def set_shape_label(self, ind:int, label:str): 98 | if ind > len(self.shapes): 99 | raise Exception("ind must be less than len(self.shapes)") 100 | self.shapes[ind]["label"] = label 101 | 102 | def set_shape_points(self, ind:int, points:List[int]): 103 | if ind >= len(self.shapes): 104 | raise Exception("ind must be less than len(self.shapes)") 105 | self.shapes[ind]["points"] = [[float(points[0]), float(points[1])], [float(points[2]), float(points[3])]] 106 | 107 | 108 | 109 | 110 | def set_poses(self, poses: List[List[float]], label="icon1"): 111 | if not isinstance(poses, list): 112 | raise Exception("poses must be list") 113 | temp = poses[0] 114 | assert isinstance(temp, list) and len(temp) == 4, "poses must be list and len(poses[0]) == 4" 115 | for pose in poses: 116 | shape = Shape(pose) 117 | shape.set_label(label) 118 | self.shapes.append(shape.to_dict()) 119 | 120 | 121 | def to_dict(self): 122 | return self.__dict__ 123 | def to_json_file(self, file_path): 124 | assert file_path.endswith(".json"), "file_path must be end with .json" 125 | 126 | with open(file_path, "w") as f: 127 | json.dump(self.__dict__, f, ensure_ascii=False, indent=2) 128 | 129 | if __name__ == "__main__": 130 | labelme = Labelme() 131 | labelme.set_imagePath("../icon4_imgs/imgs_00794_41425_0.png") 132 | labelme.set_size(200, 300) 133 | pose = [[1, 2, 3, 4], [5, 6, 7, 9]] 134 | labelme.set_poses(pose) 135 | labelme.to_json_file("icon4_imgs.json") -------------------------------------------------------------------------------- /webjs/icon4/demo_g4icon.js: -------------------------------------------------------------------------------- 1 | const NodeRSA = require('node-rsa'); 2 | const crypto = require('crypto'); 3 | const CryptoJS = require("crypto-js"); 4 | function get_key() { 5 | var s4 = ""; 6 | for (i = 0; i < 4; i++) { 7 | s4 = s4 + ((1 + Math["random"]()) * 65536 | 0)["toString"](16)["substring"](1); 8 | } 9 | return s4; 10 | } 11 | function MD5_Encrypt(word) { 12 | return CryptoJS.MD5(word).toString(); 13 | } 14 | function AES_Encrypt(key, word) { 15 | var srcs = CryptoJS.enc.Utf8.parse(word); 16 | var encrypted = CryptoJS.AES.encrypt(srcs, CryptoJS.enc.Utf8.parse(key), { 17 | iv: CryptoJS.enc.Utf8.parse("0000000000000000"), 18 | mode: CryptoJS.mode.CBC, 19 | padding: CryptoJS.pad.Pkcs7 20 | }); 21 | return CryptoJS.enc.Hex.stringify(CryptoJS.enc.Base64.parse(encrypted.toString())); 22 | } 23 | function RSA_encrypt(data) { 24 | const public_key_1 = '00C1E3934D1614465B33053E7F48EE4EC87B14B95EF88947713D25EECBFF7E74C7977D02DC1D9451F79DD5D1C10C29ACB6A9B4D6FB7D0A0279B6719E1772565F09AF627715919221AEF91899CAE08C0D686D748B20A3603BE2318CA6BC2B59706592A9219D0BF05C9F65023A21D2330807252AE0066D59CEEFA5F2748EA80BAB81'; 25 | const public_key_2 = '10001'; 26 | const public_key = new NodeRSA(); 27 | public_key.importKey({ 28 | n: Buffer.from(public_key_1, 'hex'), 29 | e: parseInt(public_key_2, 16), 30 | }, 'components-public'); 31 | const encrypted = crypto.publicEncrypt({ 32 | key: public_key.exportKey('public'), 33 | padding: crypto.constants.RSA_PKCS1_PADDING 34 | }, Buffer.from(data)); 35 | return encrypted.toString('hex'); 36 | } 37 | 38 | 39 | 40 | 41 | function sha256(str) { 42 | const hash = crypto.createHash('sha256'); 43 | hash.update(str); 44 | return hash.digest('hex'); 45 | } 46 | 47 | function get_w2(gt, lot_number, detail_time, userresponse){ 48 | let randomkey = get_key() 49 | 50 | passtime = 3000 + Math.floor(Math.random() * 1000) 51 | pow_msg = '1' + "|" + 12 + "|" + 'sha256' + "|" + detail_time + "|" + gt + "|" + lot_number + "|" + '' + "|" + randomkey 52 | pow_sign = sha256(pow_msg) 53 | // 输入的坐标格式: 54 | // userresponse = [[1554,6199],[1819,2771],[4569,3665]] 55 | xiyu = { 56 | "passtime": passtime, 57 | "userresponse": userresponse, 58 | "device_id": "70ad34ab80cef354efa5b79c622d5ad3", 59 | "lot_number": lot_number, 60 | "pow_msg": pow_msg, 61 | "pow_sign": pow_sign, 62 | "geetest": "captcha", 63 | "lang": "zh", 64 | "ep": "123", 65 | "biht": "1426265548", 66 | "gee_guard": { 67 | "env": { 68 | "sf": { 69 | "data": [ 70 | "Arial Unicode MS", 71 | "Gill Sans", 72 | "Helvetica Neue", 73 | "Menlo" 74 | ] 75 | }, 76 | "seaof": { 77 | "data": { 78 | "tdf": 148.859375, 79 | "elp": 148.859375, 80 | "fos": 144.3125, 81 | "pos": 148.859375, 82 | "onm": 133.0625, 83 | "nmi": 9.3125, 84 | "mys": 146.09375 85 | } 86 | }, 87 | "aosua": { 88 | "data": 124.04344968475198 89 | }, 90 | "ecs": { 91 | "data": [ 92 | 30, 93 | 0, 94 | 0, 95 | 0 96 | ] 97 | }, 98 | "uscpo": {}, 99 | "sal": { 100 | "data": [ 101 | [ 102 | "zh-CN" 103 | ] 104 | ] 105 | }, 106 | "hoc": { 107 | "data": 30 108 | }, 109 | "ydmed": { 110 | "data": 8 111 | }, 112 | "ncs": { 113 | "data": [ 114 | 900, 115 | 1440 116 | ] 117 | }, 118 | "yah": { 119 | "data": 8 120 | }, 121 | "eit": { 122 | "data": "Asia/Shanghai" 123 | }, 124 | "ees": { 125 | "data": true 126 | }, 127 | "els": { 128 | "data": true 129 | }, 130 | "bni": { 131 | "data": true 132 | }, 133 | "epo": { 134 | "data": false 135 | }, 136 | "sdspc": {}, 137 | "mlp": { 138 | "data": "MacIntel" 139 | }, 140 | "slp": { 141 | "data": [ 142 | { 143 | "name": "PDF Viewer", 144 | "description": "Portable Document Format", 145 | "mimeTypes": [ 146 | { 147 | "type": "application/pdf", 148 | "suffixes": "pdf" 149 | }, 150 | { 151 | "type": "text/pdf", 152 | "suffixes": "pdf" 153 | } 154 | ] 155 | }, 156 | { 157 | "name": "Chrome PDF Viewer", 158 | "description": "Portable Document Format", 159 | "mimeTypes": [ 160 | { 161 | "type": "application/pdf", 162 | "suffixes": "pdf" 163 | }, 164 | { 165 | "type": "text/pdf", 166 | "suffixes": "pdf" 167 | } 168 | ] 169 | }, 170 | { 171 | "name": "Chromium PDF Viewer", 172 | "description": "Portable Document Format", 173 | "mimeTypes": [ 174 | { 175 | "type": "application/pdf", 176 | "suffixes": "pdf" 177 | }, 178 | { 179 | "type": "text/pdf", 180 | "suffixes": "pdf" 181 | } 182 | ] 183 | }, 184 | { 185 | "name": "Microsoft Edge PDF Viewer", 186 | "description": "Portable Document Format", 187 | "mimeTypes": [ 188 | { 189 | "type": "application/pdf", 190 | "suffixes": "pdf" 191 | }, 192 | { 193 | "type": "text/pdf", 194 | "suffixes": "pdf" 195 | } 196 | ] 197 | }, 198 | { 199 | "name": "WebKit built-in PDF", 200 | "description": "Portable Document Format", 201 | "mimeTypes": [ 202 | { 203 | "type": "application/pdf", 204 | "suffixes": "pdf" 205 | }, 206 | { 207 | "type": "text/pdf", 208 | "suffixes": "pdf" 209 | } 210 | ] 211 | } 212 | ] 213 | }, 214 | "sac": { 215 | "data": { 216 | "wpd": true, 217 | "ytg": "1fd188f9714ca90a5a10eb2fc306b5eb", 218 | "tcg": "_tcg_tcg_val", 219 | "xt": "32a115bd05e0f411c5ecd7e285fd36e2" 220 | } 221 | }, 222 | "sstot": { 223 | "data": { 224 | "maxTouchPoints": 0, 225 | "touchEvent": false, 226 | "touchStart": false 227 | } 228 | }, 229 | "rev": { 230 | "data": "Google Inc." 231 | }, 232 | "sadev": { 233 | "data": [ 234 | "chrome" 235 | ] 236 | }, 237 | "doc": { 238 | "data": true 239 | }, 240 | "drh": { 241 | "data": true 242 | }, 243 | "lew": { 244 | "data": "Google Inc. (Apple)ANGLE (Apple, Apple M1, OpenGL 4.1)" 245 | }, 246 | "slo": { 247 | "data": [ 248 | "location" 249 | ] 250 | }, 251 | "pst": { 252 | "data": [ 253 | false, 254 | false, 255 | false 256 | ] 257 | } 258 | }, 259 | "roe": { 260 | "aup": "3", 261 | "sep": "3", 262 | "egp": "3", 263 | "auh": "3", 264 | "rew": "3", 265 | "snh": "3", 266 | "snih": "3", 267 | "res": "3", 268 | "resl": "3", 269 | "stpn": "3" 270 | } 271 | }, 272 | "va8R": "wG3Q", 273 | "em": { 274 | "ph": 0, 275 | "cp": 0, 276 | "ek": "11", 277 | "wd": 1, 278 | "nt": 0, 279 | "si": 0, 280 | "sc": 0 281 | } 282 | } 283 | xiyu = JSON.stringify(xiyu).replace(" ", "").replace("'", '"') 284 | w = AES_Encrypt(randomkey, xiyu)+ RSA_encrypt(randomkey) 285 | return w 286 | } 287 | 288 | 289 | 290 | // lot_number = 'de023059ed154096bc535dece6904205' 291 | // captcha_id = gt = '0b2abaab0ad3f4744ab45342a2f3d409' 292 | // detail_time = '2024-03-12T13:50:04.645097+08:00' 293 | // distance = 300 294 | // passtime = 1786 295 | // track = [[34,45,67,78],[23, 45, 56, 67]] 296 | // console.log(get_w2(gt, lot_number, detail_time, distance, passtime, track)) 297 | 298 | 299 | -------------------------------------------------------------------------------- /webjs/icon4/tools.py: -------------------------------------------------------------------------------- 1 | from urllib import request 2 | import os 3 | from typing import Union 4 | from urllib.parse import urljoin 5 | import time 6 | def download_img(url: Union[str, list], path: Union[str, list]) -> None: 7 | """ 8 | 通过url下载图片,已经被 urllib.request 封装好了的 9 | :param url: 图片url 10 | :param path: 保存路径,带后缀名 11 | """ 12 | if isinstance(url, str) and isinstance(path, str): 13 | if r"/" in path: 14 | os.makedirs(os.path.dirname(path), exist_ok=True) 15 | if "https" not in url: 16 | url = urljoin("https://static.geetest.com", url) 17 | request.urlretrieve(url, path) 18 | 19 | elif isinstance(url, list) and isinstance(path, list): 20 | assert len(url) == len(path), "url和path长度不一致" 21 | for i in range(len(url)): 22 | if "https" not in url[i]: 23 | url[i] = urljoin("https://static.geetest.com", url[i]) 24 | request.urlretrieve(url[i], path[i]) 25 | 26 | def xyxy2gtformat(xyxy): 27 | xyxy_center = [] 28 | for point in xyxy: 29 | x = (point[0] + point[2]) // 2 30 | y = (point[1] + point[3]) // 2 31 | xyxy_center.append([x, y]) 32 | 33 | new_points = [] 34 | for point in xyxy_center: 35 | x = point[0] * 32 # 32 和 48 是手动计算出来的 36 | y = point[1] * 48 37 | new_points.append([x, y]) 38 | return new_points 39 | 40 | 41 | 42 | 43 | now_str = str(int(time.time() * 1000)) 44 | 45 | cookies = { 46 | 'aliyungf_tc': '180a86da32644284df3bb8fbeeb91f03283c3444515b4888d1f04eb2eb504862', 47 | '_MHYUUID': 'b337f507-855b-4c73-afd8-13b573b69469', 48 | 'DEVICEFP_SEED_ID': 'f42cee7a2fbc6a2b', 49 | 'DEVICEFP_SEED_TIME': now_str, 50 | 'DEVICEFP': '38d7f7f987b09' 51 | } 52 | 53 | headers = { 54 | 'Accept': 'application/json, text/plain, */*', 55 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 56 | 'Cache-Control': 'no-cache', 57 | 'Connection': 'keep-alive', 58 | 'Origin': 'https://user.mihoyo.com', 59 | 'Pragma': 'no-cache', 60 | 'Referer': 'https://user.mihoyo.com/', 61 | 'Sec-Fetch-Dest': 'empty', 62 | 'Sec-Fetch-Mode': 'cors', 63 | 'Sec-Fetch-Site': 'same-site', 64 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0', 65 | 'x-rpc-client_type': '4', 66 | 'x-rpc-device_fp': '38d7f7f987b09', 67 | 'x-rpc-device_id': 'b337f507-855b-4c73-afd8-13b573b69469', 68 | 'x-rpc-device_model': 'Microsoft%20Edge%20120.0.0.0', 69 | 'x-rpc-device_name': 'Microsoft%20Edge', 70 | 'x-rpc-mi_referrer': 'https://user.mihoyo.com/', 71 | 'x-rpc-source': 'accountWebsite', 72 | } 73 | -------------------------------------------------------------------------------- /webjs/nine3/demo.js: -------------------------------------------------------------------------------- 1 | const NodeRSA = require('node-rsa'); 2 | const crypto = require('crypto'); 3 | const CryptoJS = require("crypto-js"); 4 | function get_key() { 5 | var s4 = ""; 6 | for (i = 0; i < 4; i++) { 7 | s4 = s4 + ((1 + Math["random"]()) * 65536 | 0)["toString"](16)["substring"](1); 8 | } 9 | return s4; 10 | } 11 | function MD5_Encrypt(word) { 12 | return CryptoJS.MD5(word).toString(); 13 | } 14 | function AES_Encrypt(key, word) { 15 | var srcs = CryptoJS.enc.Utf8.parse(word); 16 | var encrypted = CryptoJS.AES.encrypt(srcs, CryptoJS.enc.Utf8.parse(key), { 17 | iv: CryptoJS.enc.Utf8.parse("0000000000000000"), 18 | mode: CryptoJS.mode.CBC, 19 | padding: CryptoJS.pad.Pkcs7 20 | }); 21 | // 把加密后的数据(base64 -- > 解析 base64 -- > hex) 22 | return CryptoJS.enc.Hex.stringify(CryptoJS.enc.Base64.parse(encrypted.toString())); 23 | } 24 | function RSA_encrypt(data) { 25 | const public_key_1 = '00C1E3934D1614465B33053E7F48EE4EC87B14B95EF88947713D25EECBFF7E74C7977D02DC1D9451F79DD5D1C10C29ACB6A9B4D6FB7D0A0279B6719E1772565F09AF627715919221AEF91899CAE08C0D686D748B20A3603BE2318CA6BC2B59706592A9219D0BF05C9F65023A21D2330807252AE0066D59CEEFA5F2748EA80BAB81'; 26 | const public_key_2 = '10001'; 27 | const public_key = new NodeRSA(); 28 | public_key.importKey({ 29 | n: Buffer.from(public_key_1, 'hex'), 30 | e: parseInt(public_key_2, 16), 31 | }, 'components-public'); 32 | const encrypted = crypto.publicEncrypt({ 33 | key: public_key.exportKey('public'), 34 | padding: crypto.constants.RSA_PKCS1_PADDING 35 | }, Buffer.from(data)); 36 | return encrypted.toString('hex'); 37 | } 38 | 39 | function get_w(captchaId, lot_number, detail_time, userresponse) { 40 | 41 | // 随机产生一个 3000 --4000 之间的值 42 | passtime = 3000 + Math.floor(Math.random() * 1000) 43 | romdon_key = get_key() 44 | pow_msg = "1|0|md5|" + detail_time + "|" + captchaId + "|" + lot_number + "||" + romdon_key 45 | xiyu = { 46 | "passtime": passtime, 47 | "userresponse": userresponse, //[[2,3],[2,2],[1,2]], // 这里是从 1 开始的,[行,列] 48 | "device_id": "D00D", 49 | "lot_number": lot_number, 50 | "pow_msg": pow_msg, 51 | "pow_sign": MD5_Encrypt(pow_msg), 52 | "geetest": "captcha", 53 | "lang": "zh", 54 | "ep": "123", 55 | "biht":"1426265548", 56 | "gee_guard":{"roe":{"aup":"3","sep":"3","egp":"3","auh":"3","rew":"3","snh":"3","res":"3","cdc":"3"}}, 57 | "Dqf2":"zgWV", 58 | "em":{"ph":0,"cp":0,"ek":"11","wd":1,"nt":0,"si":0,"sc":0} 59 | } 60 | xiyu = JSON.stringify(xiyu).replace(" ", "").replace("'", '"') 61 | w = AES_Encrypt(romdon_key, xiyu)+ RSA_encrypt(romdon_key) 62 | return w 63 | } 64 | 65 | 66 | -------------------------------------------------------------------------------- /webjs/nine3/utils.py: -------------------------------------------------------------------------------- 1 |  2 | import json 3 | import os 4 | import random 5 | 6 | from urllib import request 7 | from typing import Union 8 | from urllib.parse import urljoin 9 | from lxml import etree 10 | from jsonpath import jsonpath 11 | import copy 12 | 13 | def create_directory(directory): 14 | os.makedirs(directory, exist_ok=True) 15 | 16 | def write_json_file(json_data, file_path): 17 | with open(file_path, "w", encoding="utf-8") as f: 18 | json.dump(json_data, f, indent=2, ensure_ascii=False, sort_keys=True) 19 | 20 | def generate_filename_prefix(ii): 21 | ss = random.randint(10000, 99999) 22 | kk = str(ii).zfill(5) 23 | return kk, ss 24 | 25 | def generate_paths(directory, prefix, count): 26 | if count == 1: 27 | return [os.path.join(directory, f"{prefix}.png")] 28 | else: 29 | return [os.path.join(directory, f"{prefix}_{i}.png") for i in range(count)] 30 | 31 | 32 | 33 | 34 | def download_img(url: Union[str, list], path: Union[str, list]) -> None: 35 | """ 36 | 通过url下载图片,已经被 urllib.request 封装好了的 37 | :param url: 图片url 38 | :param path: 保存路径,带后缀名 39 | """ 40 | if isinstance(url, str) and isinstance(path, str): 41 | os.makedirs(os.path.dirname(path), exist_ok=True) 42 | request.urlretrieve(url, path) 43 | 44 | elif isinstance(url, list) and isinstance(path, list): 45 | assert len(url) == len(path), "url和path长度不一致" 46 | for i in range(len(url)): 47 | request.urlretrieve(url[i], path[i]) 48 | 49 | def handle_json_data(resp_json: dict): 50 | try: 51 | data = copy.deepcopy(resp_json['data']) 52 | if isinstance(data, str): 53 | data = json.loads(data) 54 | resp_json['data'] = data 55 | return resp_json 56 | except: 57 | return resp_json 58 | 59 | def generate_list(i:str|list) -> list: 60 | if isinstance(i, str): 61 | return [i] 62 | else: 63 | return i 64 | def generate_url(imgs: str|list) -> Union[list, int]: 65 | imgs = generate_list(imgs) 66 | imgs_list = [urljoin("https://static.geetest.com/", img) for img in imgs] 67 | return imgs_list, len(imgs_list) 68 | 69 | 70 | def from_json_download_imgs_icon(resp_json: dict, ii: int): 71 | resp_json = handle_json_data(resp_json) 72 | json_dir = "icon4_json" 73 | imgs_dir = "icon4_imgs" 74 | ques_dir = "icon4_ques" 75 | 76 | create_directory(json_dir) 77 | create_directory(imgs_dir) 78 | create_directory(ques_dir) 79 | 80 | kk, ss = generate_filename_prefix(ii) 81 | filejson = os.path.join(json_dir, f"res_{kk}_{ss}.json") 82 | write_json_file(resp_json, filejson) 83 | 84 | imgs, imgs_count = generate_url(jsonpath(resp_json, '$..imgs')[0]) 85 | imgs_path = generate_paths(imgs_dir, f"img_{kk}_{ss}", imgs_count) 86 | download_img(imgs, imgs_path) 87 | 88 | ques, ques_count= generate_url(jsonpath(resp_json, '$..ques')[0]) 89 | ques_path = generate_paths(ques_dir, f"ques_{kk}_{ss}", ques_count) 90 | download_img(ques, ques_path) 91 | return ii 92 | 93 | 94 | 95 | def from_json_download_imgs_silde(resp_json: dict, ii: int): 96 | resp_json = handle_json_data(resp_json) 97 | json_dir = "slide4_json" 98 | slide_dir = "slide4_slide" 99 | bg_dir = "slide4_bg" 100 | 101 | create_directory(json_dir) 102 | create_directory(slide_dir) 103 | create_directory(bg_dir) 104 | 105 | kk, ss = generate_filename_prefix(ii) 106 | 107 | filejson = os.path.join(json_dir, f"res_{kk}_{ss}.json") 108 | write_json_file(resp_json, filejson) 109 | 110 | m_slice, m_slice_count = generate_url(jsonpath(resp_json, '$..slice')[0]) 111 | m_slice_path = generate_paths(slide_dir, f"slice_{kk}_{ss}", m_slice_count) 112 | download_img(m_slice, m_slice_path) 113 | 114 | m_bg, m_bg_count = generate_url(jsonpath(resp_json, '$..bg')[0]) 115 | m_bg_path = generate_paths(bg_dir, f"bg_{kk}_{ss}", m_bg_count) 116 | download_img(m_bg, m_bg_path) 117 | 118 | return ii 119 | 120 | 121 | 122 | 123 | 124 | def from_json_download_imgs_phrase(resp_json: dict, ii: int): 125 | resp_json = handle_json_data(resp_json) 126 | json_dir = "phrase4_json" 127 | imgs_dir = "phrase4_imgs" 128 | 129 | create_directory(json_dir) 130 | create_directory(imgs_dir) 131 | 132 | kk, ss = generate_filename_prefix(ii) 133 | 134 | filejson = os.path.join(json_dir, f"res_{kk}_{ss}.json") 135 | write_json_file(resp_json, filejson) 136 | 137 | imgs, imgs_count = generate_url(jsonpath(resp_json, '$..imgs')[0]) 138 | imgs_path = generate_paths(imgs_dir, f"img_{kk}_{ss}", imgs_count) 139 | download_img(imgs, imgs_path) 140 | return ii 141 | 142 | 143 | 144 | def from_json_download_imgs_winlinze(resp_json: dict, ii: int): 145 | resp_json = handle_json_data(resp_json) 146 | json_dir = "winlinze4_json" 147 | imgs_dir = "winlinze4_imgs" 148 | 149 | create_directory(json_dir) 150 | create_directory(imgs_dir) 151 | 152 | kk, ss = generate_filename_prefix(ii) 153 | filejson = os.path.join(json_dir, f"res_{kk}_{ss}.json") 154 | write_json_file(resp_json, filejson) 155 | 156 | imgs, imgs_count = generate_url(jsonpath(resp_json, '$..imgs')[0]) 157 | imgs_path = generate_paths(imgs_dir, f"img_{kk}_{ss}", imgs_count) 158 | download_img(imgs, imgs_path) 159 | return ii 160 | 161 | 162 | def from_json_download_imgs_nine(resp_json: dict, ii: int): 163 | resp_json = handle_json_data(resp_json) 164 | json_dir = "nine4_json" 165 | imgs_dir = "nine4_imgs" 166 | ques_dir = "nine4_ques" 167 | create_directory(json_dir) 168 | create_directory(imgs_dir) 169 | create_directory(ques_dir) 170 | 171 | kk, ss = generate_filename_prefix(ii) 172 | filejson = os.path.join(json_dir, f"res_{kk}_{ss}.json") 173 | write_json_file(resp_json, filejson) 174 | 175 | imgs, imgs_count = generate_url(jsonpath(resp_json, '$..imgs')[0]) 176 | imgs_path = generate_paths(imgs_dir, f"img_{kk}_{ss}", imgs_count) 177 | download_img(imgs, imgs_path) 178 | 179 | ques, ques_count = generate_url(jsonpath(resp_json, '$..ques')[0]) 180 | ques_path = generate_paths(ques_dir, f"ques_{kk}_{ss}", ques_count) 181 | download_img(ques, ques_path) 182 | return ii 183 | 184 | 185 | 186 | 187 | def process_and_download( 188 | resp_json: dict, #返回的json 189 | ii: int, #计数 190 | imgtype: str, #类型 191 | imgs_key: str = 'imgs', #图片的key 192 | additional_key: str = None, #额外的图片保存目录路径 193 | ) -> int: 194 | resp_json = handle_json_data(resp_json) 195 | json_dir = f"{imgtype}_json" 196 | imgs_dir = f"{imgtype}_{imgs_key}" 197 | create_directory(json_dir) 198 | create_directory(imgs_dir) 199 | 200 | imgs_file_prefix = imgs_key #以imgs_key为前缀 201 | 202 | kk, ss = generate_filename_prefix(ii) 203 | filejson = os.path.join(json_dir, f"res_{kk}_{ss}.json") #写入json文件 204 | write_json_file(resp_json, filejson) 205 | 206 | imgs_url, imgs_count = generate_url(jsonpath(resp_json, f'$..{imgs_key}')[0]) 207 | imgs_path = generate_paths(imgs_dir, f"{imgs_file_prefix}_{kk}_{ss}", imgs_count) 208 | download_img(imgs_url, imgs_path) 209 | 210 | if additional_key: 211 | additional_dir = f"{imgtype}_{additional_key}" 212 | additional_file_prefix = additional_key #以additional_key为前缀 213 | create_directory(additional_dir) 214 | additional_url, additional_count = generate_url(jsonpath(resp_json, f'$..{additional_key}')[0]) 215 | additional_path = generate_paths(additional_dir, f"{additional_file_prefix}_{kk}_{ss}", additional_count) 216 | download_img(additional_url, additional_path) 217 | return ii 218 | 219 | # ##### 一个统一的函数 220 | # process_and_download(resp_json, ii, "icon4", "imgs", "ques") 221 | # process_and_download(resp_json, ii, "slide4", "slice", "bg") 222 | # process_and_download(resp_json, ii, "phrase4", "imgs") 223 | # process_and_download(resp_json, ii, "winlinze4", "imgs") 224 | # process_and_download(resp_json, ii, "nine4", "imgs", "ques") 225 | -------------------------------------------------------------------------------- /webjs/word3/f1/tools.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import requests 3 | from urllib.parse import urljoin 4 | from urllib import request 5 | from typing import Union 6 | import os 7 | 8 | def send_image2server(image_path: Union[str, bytes], 9 | image_id: str = "string", 10 | server_url: str = "http://127.0.0.1:9100/gt3/word3"): 11 | """ 12 | 根据图片路径,读取图片,并发送到服务器, 获取返回结果,返回结果为json格式 13 | :param image_path: 图片路径, 14 | str类型, 表示图片的路径, 15 | bytes类型, 表示图片的二进制数据,即图片的内容,一般是通过open('rb')读取的或者直接 request.get(url).content 16 | :param image_id: 图片id 17 | :param server_url: 服务器地址 18 | :return: 返回结果, json格式,里面包含识别的详细信息 19 | """ 20 | if isinstance(image_path, bytes): 21 | image_data = image_path 22 | elif isinstance(image_path, str) and os.path.exists(image_path): 23 | with open(image_path, 'rb') as f: 24 | image_data = f.read() 25 | else: 26 | raise ValueError("image_path should be bytes or str") 27 | data = { 28 | "dataType": 2, 29 | "imageSource": [base64.b64encode(image_data).decode('utf-8')], 30 | "imageID": image_id, 31 | "extraicon": None, 32 | "imageID": "string", 33 | "token": "abc1" 34 | } 35 | response = requests.post(server_url, json=data) 36 | try: 37 | resp_json = response.json() 38 | return resp_json['data']['res'] 39 | except: 40 | return response.text 41 | 42 | 43 | def download_img(url: Union[str, list], path: Union[str, list]) -> None: 44 | """ 45 | 通过url下载图片,已经被 urllib.request 封装好了的 46 | :param url: 图片url 47 | :param path: 保存路径,带后缀名 48 | """ 49 | if isinstance(url, str) and isinstance(path, str): 50 | if r'https://static.geetest.com' in url: 51 | pass 52 | else: 53 | url = urljoin('https://static.geetest.com', url) 54 | if r"/" in path or r"\\" in path: 55 | os.makedirs(os.path.dirname(path), exist_ok=True) 56 | request.urlretrieve(url, path) 57 | 58 | elif isinstance(url, list) and isinstance(path, list): 59 | assert len(url) == len(path), "url和path长度不一致" 60 | if r"/" in path or r"\\" in path: 61 | os.makedirs(os.path.dirname(path), exist_ok=True) 62 | 63 | for i in range(len(url)): 64 | if r'https://static.geetest.com' in url[i]: 65 | pass 66 | else: 67 | url[i] = urljoin('https://static.geetest.com', url[i]) 68 | request.urlretrieve(url[i], path[i]) 69 | 70 | 71 | 72 | 73 | 74 | if __name__ == "__main__": 75 | import os 76 | image_path = os.path.join("docs", "a01.jpg") 77 | response = send_image2server(image_path) 78 | print(response.text) 79 | # import hashlib 80 | # import base64 81 | # with open(image_path, 'rb') as f: 82 | # image_data = f.read() 83 | # imageSource = base64.b64encode(image_data).decode('utf-8') 84 | # imageSource2 = base64.b64decode(bytes(imageSource, 'utf-8')) 85 | # hash_value = hashlib.md5(imageSource2).hexdigest() 86 | # print("Image Hash (MD5):", hash_value) 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /webjs/word3/f2/loadmodel.py: -------------------------------------------------------------------------------- 1 | from src.method.GTClick import GTClick 2 | from src.method.GTnine import GTnine 3 | from conf.config import gtconf 4 | 5 | 6 | 7 | gt3word = GTClick( 8 | pdetect = gtconf['word']['pdetect'], 9 | per = gtconf['word']['per'], 10 | pclass = gtconf['word']['pclass'], 11 | pclasstags = gtconf['word']['pclasstags'], 12 | chars_issorted = False, 13 | rmalpha = True, 14 | ) 15 | 16 | gt3nine = GTnine(pclass=gtconf['nine']['pclass']) 17 | 18 | gt4icon = GTClick( 19 | pdetect = gtconf['icon4mi']['pdetect'], 20 | per = gtconf['icon4mi']['per'], 21 | pclass = gtconf['icon4mi']['pclass'], 22 | pclasstags = gtconf['icon4mi']['pclasstags'], 23 | chars_issorted = True, 24 | rmalpha = True, 25 | ) -------------------------------------------------------------------------------- /webjs/word3/f2/tools.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import urljoin 2 | from urllib import request 3 | from typing import Union 4 | import os 5 | 6 | def download_img(url: Union[str, list], path: Union[str, list]) -> None: 7 | """ 8 | 通过url下载图片,已经被 urllib.request 封装好了的 9 | :param url: 图片url 10 | :param path: 保存路径,带后缀名 11 | """ 12 | if isinstance(url, str) and isinstance(path, str): 13 | if r'https://static.geetest.com' in url: 14 | pass 15 | else: 16 | url = urljoin('https://static.geetest.com', url) 17 | if r"/" in path or r"\\" in path: 18 | os.makedirs(os.path.dirname(path), exist_ok=True) 19 | request.urlretrieve(url, path) 20 | 21 | elif isinstance(url, list) and isinstance(path, list): 22 | assert len(url) == len(path), "url和path长度不一致" 23 | if r"/" in path or r"\\" in path: 24 | os.makedirs(os.path.dirname(path), exist_ok=True) 25 | 26 | for i in range(len(url)): 27 | if r'https://static.geetest.com' in url[i]: 28 | pass 29 | else: 30 | url[i] = urljoin('https://static.geetest.com', url[i]) 31 | request.urlretrieve(url[i], path[i]) 32 | 33 | 34 | 35 | def poses2geetest(poses: list) -> str: 36 | """ 37 | 处理坐标,变为极验需要的样子 38 | 参数: 39 | poses: list: 坐标信息, 格式是: [[x1, y1, x2, y2], [x1, y1, x2, y2], ...] 需要转为极验需要的格式 40 | 返回: 41 | str: 返回处理后的坐标 42 | """ 43 | new = [] 44 | for pose in poses: 45 | x, y = (pose[0] + pose[2]) / 2, (pose[1] + pose[3]) / 2 46 | final_x = int(round(int(x) / 333.375 * 100 * 100, 0)) 47 | final_y = int(round(int(y) / 333.375 * 100 * 100, 0)) 48 | final = f'{final_x}_{final_y}' 49 | new.append(final) 50 | stringCodes = ','.join(new) 51 | return stringCodes 52 | 53 | 54 | cookies = None 55 | 56 | headers = { 57 | 'authority': 'passport.bilibili.com', 58 | 'accept': '*/*', 59 | 'accept-language': 'zh-CN,zh;q=0.9', 60 | 'cache-control': 'no-cache', 61 | 'origin': 'https://www.bilibili.com', 62 | 'pragma': 'no-cache', 63 | 'referer': 'https://www.bilibili.com', 64 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0', 65 | } 66 | 67 | HD= headers.copy() 68 | --------------------------------------------------------------------------------