├── .gitignore
├── LICENSE
├── app
├── common.py
├── gt3
│ ├── nine
│ │ ├── nine.py
│ │ ├── nine4jsapp.py
│ │ └── ninejs.py
│ └── word
│ │ ├── word.py
│ │ ├── wordjs.py
│ │ └── wordjsapp.py
├── gt4
│ └── iconmi
│ │ └── iconmi.py
├── handleprocess.py
├── loadmodel.py
├── utils.py
└── uvicorn_config.json
├── assets
├── icon4
│ ├── 0a5573.png
│ ├── 0ab207.png
│ ├── 0ac1df.png
│ ├── 0b236d.png
│ ├── 0b41a3.png
│ ├── 0c2974.png
│ ├── 3f9cdf.png
│ ├── 94cb8d.png
│ ├── c59e7a.png
│ ├── imgs_00141_20081.png
│ ├── imgs_00142_59845.png
│ ├── imgs_00145_79210.png
│ ├── imgs_00146_99736.png
│ └── imgs_00150_46045.png
├── nine3
│ ├── img_00000_37458.png
│ ├── img_00001_54480.png
│ ├── img_00002_59670.png
│ ├── img_00003_47146.png
│ ├── img_00004_50080.png
│ ├── img_00005_67809.png
│ ├── img_00006_25480.png
│ ├── ques_00000_37458.png
│ ├── ques_00001_54480.png
│ ├── ques_00002_59670.png
│ ├── ques_00003_47146.png
│ ├── ques_00004_50080.png
│ ├── ques_00005_67809.png
│ └── ques_00006_25480.png
├── temp1_output.png
├── temp2_output.png
├── temp3_output.png
└── word3
│ ├── pic_00355_54552.png
│ ├── pic_00355_67108.png
│ ├── pic_00355_91218.png
│ ├── pic_00356_20119.png
│ ├── pic_00356_24524.png
│ ├── pic_00356_46593.png
│ ├── pic_00356_73261.png
│ └── pic_00357_13273.png
├── conf
├── config.py
└── config.yaml
├── demo_geetest3nine.py
├── demo_geetest3word.py
├── demo_geetest4icon4mi.py
├── imgsrc
├── a.jpg
└── w.png
├── jsdemo_g3word_1.py
├── jsdemo_g3word_2.py
├── jsdemo_g4icon.py
├── jsdemo_nine3.py
├── model
└── sha256.txt
├── package.json
├── readme.md
├── requirements.txt
├── service.py
├── src
├── method
│ ├── GTClick.py
│ └── GTnine.py
└── utils
│ ├── MakeCharImage.py
│ ├── SiameseOnnx.py
│ ├── YoloOnnx.py
│ ├── nine.py
│ ├── outdata.py
│ ├── siamese.py
│ ├── simsun.ttc
│ ├── utils.py
│ └── yoloclass.py
└── webjs
├── icon4
├── demo_g4icon.js
└── tools.py
├── nine3
├── demo.js
└── utils.py
└── word3
├── f1
├── demo.js
└── tools.py
└── f2
├── biblg3word.js
├── loadmodel.py
└── tools.py
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.onnx
3 | *.pt
4 | model/**
5 | *.log
6 | temp
7 | temp1/
8 | temp2/
9 | temp3/
10 | .DS_Store
11 | node_modules
12 | package-lock.json
13 | log.txt
14 | !model/sha256.txt
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 caisxg
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/app/common.py:
--------------------------------------------------------------------------------
1 | """
2 | 一些公共的类
3 | """
4 | from enum import Enum
5 | from typing import Optional, Union
6 | from typing import List, Optional
7 | from pydantic import BaseModel, Field, validator
8 |
9 |
10 |
11 | class Input(BaseModel):
12 | """
13 | 输入的数据类
14 | """
15 | dataType: int = Field(..., description="数据类型, 1: url, 2: 文件流", example=1) #
16 | imageSource: list[str] = Field(..., description="base64的图片", example="[base64编码的图片数据1]")
17 | extraicon: Optional[list] = Field(None, description="base64的icon图片", example="[base64编码的图片数据1, base64编码的图片数据2]")
18 | imageID: Optional[str] = Field(None, description="图片的id", example="string")
19 | token: Optional[str] = Field(None, description="token", example="string")
20 |
21 | # # 验证器
22 | @validator('dataType')
23 | def dataType_must_be_int(cls, v):
24 | assert v in [1, 2], "dataType must be 1 or 2"
25 | return v
26 |
27 |
28 | class InputChangeIdword3(BaseModel):
29 | """
30 | 输入的数据类
31 | """
32 | challenge: str = Field(..., description="challenge参数", example="string")
33 | gt: str = Field(..., description="gt参数", example="string")
34 | key: str = Field(..., description="key参数(授权参数)", example="string")
35 | referer: Optional[str] = Field(None, description="referer 参数(可选)", example="string")
36 | ua: Optional[str] = Field(None, description="ua 参数(可选)", example="string")
37 | origin: Optional[str] = Field(None, description="origin 参数(可选)", example="string")
38 | pic: Optional[str] = Field(None, description="pic 参数(可选)", example="string")
39 | c: Optional[List[int]] = Field(None, description="c 参数(可选)", example=[1, 2, 3, 4])
40 | s: Optional[str] = Field(None, description="s 参数(可选)", example="string")
41 |
42 |
43 | class InputChangeIdnine(BaseModel):
44 | """
45 | 输入的数据类
46 | """
47 | gt: str = Field(..., description="gt", example="string")
48 | key: str = Field(..., description="key", example="string")
49 | referer: Optional[str] = Field(None, description="referer", example="string")
50 | ua: Optional[str] = Field(None, description="ua", example="string")
51 | origin: Optional[str] = Field(None, description="origin", example="string")
52 |
53 |
54 |
55 | class Output(BaseModel):
56 | code: int = Field(..., description="state code(状态码, 如果是 200,证明这边提供的服务没有问题) ", example=200) #code 的值只能是 StatusCodeEnum 枚举中的值, 200 或 500
57 | msg: str = Field(..., description="state massage(状态的信息, 这边提供的服务简单的做一下筛选,然后转发给极验)", example="success")
58 | data: Union[list, dict, str] = Field(..., description="return data(返回的数据, 这是验证码识别的结果,原封不动的返回,不做任何修改)", example={"imageID": "string", "res": [[184, 0, 259, 67], [176,238, 244, 310], [63,70,132,142]]})
59 |
60 | @validator('code')
61 | def code_must_be_int(cls, v):
62 | assert v in list(range(100, 6000)), "code must be 100 or 6000,放飞自我"
63 | return v
64 |
65 | responsesdict ={
66 | 403: {"description": "Token Error"},
67 | 422: {"description": "Input Error - Invalid DataType"},
68 | 456: {"description": "算法未实现"},
69 | 500: {"description": "Input Error"}
70 | }
--------------------------------------------------------------------------------
/app/gt3/nine/nine.py:
--------------------------------------------------------------------------------
1 | """
2 | 主要实现九宫格验证码的识别
3 | """
4 |
5 | import asyncio
6 |
7 | from concurrent.futures import ThreadPoolExecutor
8 |
9 | from fastapi import APIRouter, Request
10 |
11 | from app.common import Input, Output, responsesdict
12 | from app.utils import get_res, token_validation
13 |
14 | gt3nine = APIRouter()
15 |
16 |
17 | # 这 @gt3 是一个路径操作装饰器,
18 | @gt3nine.post("/nine3",
19 | status_code=200,
20 | response_model=Output,
21 | summary = "三代九宫格",
22 | description = "这是一个三代九宫格",
23 | response_description = "返回json格式",
24 | responses=responsesdict
25 | )
26 | async def nine3(
27 | input: Input,
28 | request: Request,
29 | ) -> Output:
30 | try:
31 | headers, input_data = token_validation(input, request)
32 | if headers is None and input_data is None:
33 | Output(code=403, msg="Token error", data={})
34 | # data = get_res("gt3nine",input_data, headers = headers)
35 | loop = asyncio.get_event_loop()
36 | newexecutor = ThreadPoolExecutor(max_workers=3)
37 | data = await loop.run_in_executor(newexecutor, get_res, "gt3nine", input_data, headers)
38 | return Output(code=200, msg="success", data=data)
39 | except:
40 | return Output(code=500, msg="Server Error", data={})
41 |
--------------------------------------------------------------------------------
/app/gt3/nine/nine4jsapp.py:
--------------------------------------------------------------------------------
1 | """
2 | 主要实现九宫格验证码的识别
3 | """
4 |
5 | import asyncio
6 |
7 | from concurrent.futures import ThreadPoolExecutor
8 |
9 | from fastapi import APIRouter, Request
10 |
11 | from app.common import Input, Output, responsesdict, InputChangeIdnine
12 | from app.utils import token_validation
13 | from app.gt3.nine.ninejs import get_resjs
14 |
15 | gt4ninejs = APIRouter()
16 |
17 |
18 |
19 | @gt4ninejs.post("/gt4nine",
20 | status_code=200,
21 | response_model=Output,
22 | summary = "si代九宫格js",
23 | description = "这是一个四代九宫格js",
24 | response_description = "返回json格式",
25 | responses=responsesdict
26 | )
27 | async def gt4nine(
28 | input: InputChangeIdnine,
29 | request: Request,
30 | ) -> Output:
31 | try:
32 | headers, input_data = token_validation(input, request)
33 | if headers is None and input_data is None:
34 | Output(code=403, msg="Token error", data={})
35 | # data = get_res("gt3nine",input_data, headers = headers)
36 | loop = asyncio.get_event_loop()
37 | newexecutor = ThreadPoolExecutor(max_workers=3)
38 | data = await loop.run_in_executor(newexecutor, get_resjs, input_data, headers)
39 | return Output(code=200, msg="success", data=data)
40 | except:
41 | return Output(code=500, msg="Server Error", data={})
42 |
--------------------------------------------------------------------------------
/app/gt3/nine/ninejs.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | 主要实现九宫格验证码的识别
4 | """
5 | import re, requests, time, uuid, execjs, json
6 | from jsonpath import jsonpath
7 | from pathlib import Path
8 | from webjs.word3.f2.loadmodel import gt3nine
9 | from webjs.nine3.utils import *
10 |
11 |
12 | headers = {
13 | 'authority': 'gt4.geetest.com',
14 | 'accept': '*/*',
15 | 'accept-language': 'zh-CN,zh;q=0.9',
16 | 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
17 | }
18 |
19 | def get_resjson(captcha_id, headers=None):
20 | if headers is None:
21 | headers = headers
22 |
23 |
24 | session = requests.Session()
25 | params = {
26 | 'callback': f'geetest_{int(time.time() * 1000)}',
27 | 'captcha_id': captcha_id,
28 | 'challenge': str(uuid.uuid4()),
29 | 'client_type': 'web',
30 | 'risk_type': 'nine',
31 | 'lang': 'zh',
32 | }
33 | response = session.get('https://gcaptcha4.geetest.com/load', params=params, headers=headers)
34 | res = response.text
35 | resp_json = json.loads(res[res.find("(") + 1:res.rfind(")")])
36 | captcha_type =jsonpath(resp_json, '$..captcha_type')[0]
37 | assert captcha_type == 'nine', "captcha_type should be nine"
38 |
39 | kk, ss = 1, 1
40 | imgs_dir = ques_dir = "temp_nine"
41 | Path(imgs_dir).mkdir(parents=True, exist_ok=True)
42 | imgs, imgs_count = generate_url(jsonpath(resp_json, '$..imgs')[0])
43 | imgs_path = generate_paths(imgs_dir, f"img_{kk}_{ss}", imgs_count)
44 | download_img(imgs, imgs_path)
45 |
46 | ques, ques_count = generate_url(jsonpath(resp_json, '$..ques')[0])
47 | ques_path = generate_paths(ques_dir, f"ques_{kk}_{ss}", ques_count)
48 | download_img(ques, ques_path)
49 | return resp_json, captcha_id, imgs_path, ques_path, session, headers
50 |
51 |
52 | def get_verify(session, captcha_id, resp_json, userresponse, headers):
53 | #下面传递的参数都是从resp_json中获取的
54 | lot_number = jsonpath(resp_json, '$..lot_number')[0]
55 | nine_nums = jsonpath(resp_json, '$..nine_nums')[0]
56 | payload = jsonpath(resp_json, '$..payload')[0]
57 | payload_protocol = jsonpath(resp_json, '$..payload_protocol')[0]
58 | datetime = jsonpath(resp_json, '$..datetime')[0]
59 | process_token = jsonpath(resp_json, '$..process_token')[0]
60 | with open("webjs/nine3/demo.js", "r") as f:
61 | jscode = f.read()
62 | ctx = execjs.compile(jscode)
63 | w = ctx.call("get_w", captcha_id, lot_number, datetime, userresponse)
64 |
65 | params = {
66 | 'callback': f'geetest_{int(time.time() * 1000)}',
67 | 'captcha_id': captcha_id,
68 | 'client_type': 'web',
69 | 'lot_number': lot_number,
70 | 'risk_type': 'nine',
71 | 'payload': payload,
72 | 'process_token': process_token,
73 | 'payload_protocol': '1',
74 | 'pt': '1',
75 | 'w': w,
76 | }
77 | url3 = 'https://gcaptcha4.geetest.com/verify'
78 | response = session.get(url3, params=params, headers=headers)
79 | session.close()
80 | return response.text
81 |
82 |
83 |
84 | def get_resjs(input_dict: dict, headers: dict,
85 | get_resjson = get_resjson, get_verify = get_verify):
86 | '''
87 | 四代九宫格js, 传入验证码id, 返回验证结果,
88 | '''
89 | try:
90 | captcha_id = input_dict.get("gt", None)
91 | assert captcha_id is not None, "captcha_id is None"
92 | except:
93 | return {"code": 400, "msg": "captcha_id is None", "data": {}}
94 | if headers is None:
95 | headers = headers
96 |
97 | resp_json, captcha_id, imgs_path, ques_path, session, headers = get_resjson(captcha_id, headers)
98 | out = gt3nine.run(imgs_path[0], ques_path)
99 | userresponse = out.nine_rowcol
100 | resp = get_verify(session, captcha_id, resp_json, userresponse, headers)
101 | try:
102 | resp1 = resp.json()
103 | except:
104 | try:
105 | resp1 = json.loads(resp[resp.find("(") + 1:resp.rfind(")")])
106 | except:
107 | resp1 = resp.text
108 | if not resp1:
109 | return {"code": 500, "msg": "Server Error", "data": {}}
110 | return resp1
111 |
112 | if __name__ == "__main__":
113 | pass
--------------------------------------------------------------------------------
/app/gt3/word/word.py:
--------------------------------------------------------------------------------
1 | """
2 | 主要实现文字点选验证码的识别
3 | """
4 |
5 | import asyncio
6 | from concurrent.futures import ThreadPoolExecutor
7 |
8 | from fastapi import APIRouter, Request
9 |
10 | from app.common import Input, Output, responsesdict
11 | from app.utils import get_res, token_validation
12 |
13 | gt3word = APIRouter()
14 |
15 |
16 | # 这 @gt3 是一个路径操作装饰器,
17 | @gt3word.post("/word3",
18 | status_code=200,
19 | response_model=Output,
20 | summary = "三代文字点选 ",
21 | description = "这是一个三代的文字点选",
22 | response_description = "返回json格式",
23 | responses=responsesdict
24 | )
25 | async def word3(
26 | input: Input,
27 | request: Request,
28 | ) -> Output:
29 | try:
30 | headers, input_data = token_validation(input, request)
31 | if headers is None and input_data is None:
32 | Output(code=403, msg="Token error", data={})
33 | # data = get_res("gt3word", input_data, headers = headers)
34 | loop = asyncio.get_event_loop()
35 | newexecutor = ThreadPoolExecutor(max_workers=8)
36 | data = await loop.run_in_executor(newexecutor, get_res, "gt3word", input_data, headers)
37 | return Output(code=200, msg="success", data=data)
38 | except:
39 | return Output(code=500, msg="Server Error", data={})
40 |
--------------------------------------------------------------------------------
/app/gt3/word/wordjs.py:
--------------------------------------------------------------------------------
1 |
2 | import requests, execjs, json, time, os
3 | from jsonpath import jsonpath
4 | from webjs.word3.f2.tools import download_img, headers, cookies, HD
5 | from webjs.word3.f2.loadmodel import gt3word
6 | import execjs
7 | import random
8 | from typing import Any
9 | from loguru import logger
10 | import asyncio
11 | import uuid
12 | ### 2. 获取点击类型
13 | def get_click_type(gt, challenge, headers=None):
14 | headers = headers if headers is not None else HD
15 | session = requests.Session()
16 | params = {
17 | 'gt': gt,
18 | 'challenge': challenge,
19 | 'lang': 'zh-cn',
20 | 'pt': '0',
21 | 'client_type': 'web',
22 | 'callback': f'geetest_{int(time.time() * 1000) - 1000}'
23 | }
24 | ## 获取点击类型
25 |
26 | response = session.get('https://api.geetest.com/ajax.php', headers=headers, params=params)
27 | restext = response.text
28 | result = json.loads(restext[restext.find("(") + 1:restext.rfind(")")])
29 | click_type = jsonpath(result, '$..result')[0]
30 | # assert click_type == 'click', "点击类型不是 click"
31 | if click_type != 'click':
32 | logger.warning("点击类型不是 click")
33 | return click_type, session, headers
34 |
35 |
36 | ### 3. 获取 json 详细信息
37 | def get_gtresponse(gt, challenge, session,headers=None):
38 | headers = headers if headers is not None else HD
39 | params = {
40 | 'is_next': 'true',
41 | 'type': 'click',
42 | 'gt': gt,
43 | 'challenge': challenge,
44 | 'lang': 'zh-cn',
45 | 'https': 'false',
46 | 'protocol': 'https://',
47 | 'offline': 'false',
48 | 'product': 'embed',
49 | 'api_server': 'api.geetest.com',
50 | 'isPC': 'true',
51 | 'autoReset': 'true',
52 | 'width': '100%',
53 | 'callback': f'geetest_{int(time.time() * 1000) - 1000}',
54 | }
55 | response = session.get('https://api.geetest.com/get.php', params=params, headers=headers)
56 | restext = response.text
57 | result = json.loads(restext[restext.find("(") + 1:restext.rfind(")")])
58 | myc = jsonpath(result, '$..c')[0]
59 | mys = jsonpath(result, '$..s')[0]
60 | pic = jsonpath(result, '$..pic')[0]
61 | return myc, mys, pic, session, headers, result
62 |
63 |
64 |
65 |
66 | def get_resjson(gt, challenge, session, headers=None):
67 | '''
68 | 三代点选js, 传入验证码id, 返回验证结果,
69 | '''
70 | headers = headers if headers is not None else HD
71 | ### 3. 获取 json 详细信息
72 | myc, mys, pic, session, headers, result = get_gtresponse(gt, challenge, session, headers)
73 | ### 4. 下载图片,获取坐标, 并转为极验需要的格式
74 | os.makedirs('temp', exist_ok=True)
75 | imgs_path = 'temp/a.jpg'
76 | download_img(pic, imgs_path)
77 |
78 | return myc, mys, pic, session, headers, result, imgs_path
79 |
80 |
81 |
82 | ### 4. 最后验证
83 | def validate(gt, challenge, w, session, headers: dict = None):
84 | headers = headers if headers is not None else HD
85 | params = {
86 | 'gt': gt,
87 | 'challenge': challenge,
88 | 'lang': 'zh-cn',
89 | 'pt': '0',
90 | 'client_type': 'web',
91 | "w": w,
92 | 'callback': f'geetest_{int(time.time() * 1000) + 1000}',
93 | }
94 | time.sleep(random.uniform(0.1, 0.3)) #### 休息一下,防止太快
95 | response = session.get('https://api.geetest.com/ajax.php', headers=headers, params=params)
96 | session.close()
97 | return response.text
98 |
99 |
100 |
101 |
102 | with open("./webjs/word3/f2/biblg3word.js", 'r', encoding='utf-8') as f:
103 | jscode = f.read()
104 | ctx = execjs.compile(jscode)
105 |
106 | def get_resjs(input_dict: dict,
107 | headers: dict,
108 | get_resjson = get_resjson,
109 | validate = validate,
110 | ctx = ctx,
111 | gt3word = gt3word,
112 | get_click_type = get_click_type
113 | ) -> dict[str, Any] | Any:
114 | '''
115 | 三代点选js, 传入验证码id, 返回验证结果,
116 | '''
117 | try:
118 | gt = input_dict.get("gt", None)
119 | challenge = input_dict.get("challenge", None)
120 | assert gt is not None, "captcha_id is None"
121 | assert challenge is not None, "challenge is None"
122 | except:
123 | return {"code": 400, "msg": "captcha_id is None", "data": {}}
124 |
125 | headers = headers if headers is not None else HD
126 | ### 2. 获取点击类型
127 | click_type, session, headers = get_click_type(gt, challenge, headers)
128 | if click_type != 'click':
129 | return {"code": 400, "msg": "click_type is not click", "data": {}}
130 | ### 3. 获取 json 详细信息 --- 下载图片
131 | myc, mys, pic, session, headers, result, imgs_path = get_resjson(gt, challenge, session, headers)
132 | ### 4.获取坐标
133 | out = gt3word.run(imgs_path)
134 | xyxy = out.targets_xyxy
135 | time.sleep(random.uniform(0.9, 1.3)) #### 休息一下,防止太快
136 | ### 4. 获取 w--- 已经自动转换为极验需要的格式
137 | w = ctx.call('get_w',xyxy , pic, gt, challenge, myc, mys )
138 |
139 | ### 5. 最后验证
140 | resptext = validate(gt, challenge, w, session, headers)
141 | try:
142 | resp1 = json.loads(resptext[resptext.find("(") + 1:resptext.rfind(")")])
143 | except:
144 | try:
145 | resp1 = json.loads(resptext)
146 | except:
147 | resp1 = resptext
148 | if not resp1:
149 | return {"code": 500, "msg": "Server Error", "data": {"w": w, "xyxy": xyxy, "pic": pic, "myc": myc, "mys": mys, "result": result}}
150 | return resp1
151 |
152 |
153 |
154 | async def aioget_resjs(input_dict: dict,
155 | headers: dict,
156 | get_resjson = get_resjson,
157 | validate = validate,
158 | ctx = ctx,
159 | gt3word = gt3word,
160 | get_click_type = get_click_type
161 | ) -> dict[str, Any] | Any:
162 | '''
163 | 三代点选js, 传入验证码id, 返回验证结果,
164 | '''
165 | try:
166 | gt = input_dict.get("gt", None)
167 | challenge = input_dict.get("challenge", None)
168 | assert gt is not None, "captcha_id is None"
169 | assert challenge is not None, "challenge is None"
170 | except:
171 | return {"code": 400, "msg": "captcha_id is None", "data": {}}
172 |
173 | headers = headers if headers is not None else HD
174 | ### 2. 获取点击类型
175 | click_type, session, headers = get_click_type(gt, challenge, headers)
176 | if click_type != 'click':
177 | return {"code": 400, "msg": "click_type is not click", "data": {}}
178 | ### 3. 获取 json 详细信息 --- 下载图片
179 | myc, mys, pic, session, headers, result, imgs_path = get_resjson(gt, challenge, session, headers)
180 | ### 4.获取坐标
181 | out = gt3word.run(imgs_path)
182 | xyxy = out.targets_xyxy
183 | # time.sleep(random.uniform(0.9, 1.3)) #### 休息一下,防止太快
184 | tt = random.uniform(1, 1.5)
185 | # 产生一个 uuid
186 | random_uuid = str(uuid.uuid4())
187 | logger.warning(f"uuid: {random_uuid}, 休息时间: {tt}")
188 | await asyncio.sleep(tt)
189 | logger.warning(f"uuid: {random_uuid}, 休息结束")
190 | ### 4. 获取 w--- 已经自动转换为极验需要的格式
191 | w = ctx.call('get_w',xyxy , pic, gt, challenge, myc, mys )
192 |
193 | ### 5. 最后验证
194 | resptext = validate(gt, challenge, w, session, headers)
195 | try:
196 | resp1 = json.loads(resptext[resptext.find("(") + 1:resptext.rfind(")")])
197 | except:
198 | try:
199 | resp1 = json.loads(resptext)
200 | except:
201 | resp1 = resptext
202 | if not resp1:
203 | return {"code": 500, "msg": "Server Error", "data": {"w": w, "xyxy": xyxy, "pic": pic, "myc": myc, "mys": mys, "result": result}}
204 | return resp1
205 |
206 |
207 | if __name__ == "__main__":
208 | pass
--------------------------------------------------------------------------------
/app/gt3/word/wordjsapp.py:
--------------------------------------------------------------------------------
1 | """
2 | 主要实现三代文字点选字js的接口
3 | """
4 |
5 | import asyncio
6 |
7 | from concurrent.futures import ThreadPoolExecutor
8 |
9 | from fastapi import APIRouter, Request
10 |
11 | from app.common import Input, Output, responsesdict, InputChangeIdword3
12 | from app.utils import token_validation
13 | from app.gt3.word.wordjs import get_resjs, aioget_resjs
14 | from loguru import logger
15 | gt3wordjs = APIRouter()
16 |
17 |
18 | @gt3wordjs.post("/gt3word",
19 | status_code=200,
20 | response_model=Output,
21 | summary = "三代文字点选(适合b站)",
22 | description = "三代文字点选(适合b站), 只需要传递 gt 和 challenge 两个参数即可,返回 json 格式数据识别结果, data 字段是极验返回的识别结果",
23 | response_description = "返回json格式, data 字段是极验返回的识别结果",
24 | responses=responsesdict
25 | )
26 | async def gt3word(
27 | input: InputChangeIdword3,
28 | request: Request,
29 | ) -> Output:
30 | """
31 | 参数:
32 | - input: 传入参数
33 | - request: 请求
34 | """
35 | try:
36 | headers, input_data = token_validation(input, request)
37 | if headers is None and input_data is None:
38 | logger.info(f"ip地址:{client_host}, input_data: {input_data}, 1返回信息: Token error")
39 | Output(code=403, msg="Token error", data={})
40 | except:
41 | logger.info(f"ip地址:{client_host}, input_data: {input_data}, 2返回信息: headers or input_data is None")
42 | return Output(code=403, msg="headers or input_data is None", data={"headers": headers, "input_data": input_data})
43 | try:
44 | client_host = request.client.host
45 |
46 | # data = get_resjs(input_data, headers = headers)
47 | data = await asyncio.gather(aioget_resjs(input_data, headers))
48 | # 如果 data 是 list 类型, 则返回第一个元素
49 | if isinstance(data, list):
50 | data = data[0]
51 | logger.info(f"ip地址:{client_host}, input_data: {input_data}, 3返回信息: {data}")
52 | return Output(code=200, msg="success", data=data)
53 | except:
54 | logger.info(f"ip地址:{client_host}, input_data: {input_data}, 4返回信息: Server Error")
55 | return Output(code=500, msg="Server Error", data={})
56 |
--------------------------------------------------------------------------------
/app/gt4/iconmi/iconmi.py:
--------------------------------------------------------------------------------
1 | """
2 | 主要实现图标点选验证码的识别
3 | """
4 | import asyncio
5 | from concurrent.futures import ThreadPoolExecutor
6 | from fastapi import APIRouter, Request
7 | from app.common import Input, Output, responsesdict
8 | from app.utils import get_res, token_validation
9 |
10 |
11 | gt4iconmi = APIRouter()
12 | @gt4iconmi.post("/icon4mi",
13 | status_code=200,
14 | response_model=Output,
15 | summary = "四代图标点选 ",
16 | description = "这是一个四代的文字点选",
17 | response_description = "返回json格式",
18 | responses=responsesdict
19 | )
20 | async def icon4mi(
21 | input: Input,
22 | request: Request, # 请求对象,用来获取请求头,请求体等信息
23 | ) -> Output:
24 | try:
25 | headers, input_data = token_validation(input, request)
26 | if headers is None and input_data is None:
27 | Output(code=403, msg="Token error", data={})
28 | # data = get_res("gt4icon", input_data, headers = headers)
29 | loop = asyncio.get_event_loop()
30 | newexecutor = ThreadPoolExecutor(max_workers=4)
31 | data = await loop.run_in_executor(newexecutor, get_res, "gt4icon", input_data, headers)
32 | return Output(code=200, msg="success", data=data)
33 | except:
34 | return Output(code=500, msg="Server Error", data={})
35 |
--------------------------------------------------------------------------------
/app/handleprocess.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import time
3 |
4 | def get_pids(port)-> list | list[int]:
5 | try:
6 | # 尝试使用sudo权限运行lsof命令
7 | output = subprocess.run(['sudo', 'lsof', '-i', f':{port}'], capture_output=True, text=True, check=True).stdout
8 | except:
9 | try:
10 | # 如果sudo失败,则尝试不使用sudo权限运行lsof命令
11 | output = subprocess.run(['lsof', '-i', f':{port}'], capture_output=True, text=True, check=True).stdout
12 | except subprocess.CalledProcessError:
13 | # 处理lsof命令执行失败的情况
14 | return []
15 |
16 | # 使用列表推导式获取进程ID
17 | # print(output) # 输出 str 类型的进程信息,
18 | # output.splitlines() # 按将字符串按照分隔符\n、\r、\r\n进行分割。 返回一个列表。 keepends=False:不保留分隔符(默认值)。
19 | pids = [int(line.split()[1]) for line in output.splitlines()[1:] if line.strip()]
20 | return pids
21 |
22 | def kill_process(port) -> None:
23 | pids = get_pids(port)
24 | if not pids:
25 | return
26 | for pid in pids:
27 | subprocess.run(['sudo', 'kill', '-9', str(pid)])
28 | time.sleep(2) # 等待进程结束,不然太快了,易出错
29 | print(f'====== Killed processes on port: {port} ======')
30 | return
31 |
32 | if __name__ == '__main__':
33 | port = 9100
34 | print(get_pids(port))
35 | # kill_process(port)
36 | print(f'Killed processes on port {port}')
37 |
--------------------------------------------------------------------------------
/app/loadmodel.py:
--------------------------------------------------------------------------------
1 | from src.method.GTClick import GTClick
2 | from src.method.GTnine import GTnine
3 | from conf.config import gtconf
4 |
5 | gt3word = GTClick(
6 | pdetect = gtconf['word']['pdetect'],
7 | per = gtconf['word']['per'],
8 | pclass = gtconf['word']['pclass'],
9 | pclasstags = gtconf['word']['pclasstags'],
10 | chars_issorted = False,
11 | rmalpha = True,
12 | )
13 |
14 | gt3nine = GTnine(pclass=gtconf['nine']['pclass'])
15 |
16 | gt4icon = GTClick(
17 | pdetect = gtconf['icon4mi']['pdetect'],
18 | per = gtconf['icon4mi']['per'],
19 | pclass = gtconf['icon4mi']['pclass'],
20 | pclasstags = gtconf['icon4mi']['pclasstags'],
21 | chars_issorted = True,
22 | rmalpha = True,
23 | )
24 |
25 |
--------------------------------------------------------------------------------
/app/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | 函数通用接口
3 | """
4 | import base64
5 | import hashlib
6 | from typing import Optional, Union
7 | import requests
8 | from app.loadmodel import gt4icon, gt3nine, gt3word
9 | from app.common import Input
10 | from src.utils.utils import open_image
11 | from fastapi import Request
12 | from app.gt3.nine.ninejs import get_resjson, get_verify
13 |
14 | token_list = ["abc", "abc1", "abcdQwm123dnine4", "abcdQwm123gtword3"] # abcdQwm123dnine4 这个 已出售给了客户, 请勿删除
15 |
16 | def token_validation(input: Input, request: Request):
17 | # 请求对象,用来获取请求头,请求体等信息):
18 | ############################################################
19 | ###### 用于 js逆向, 这里用不到, 可以删除 ################
20 | ua = request.headers.get("user-agent", "")
21 | origin = request.headers.get("origin", "")
22 | referer = request.headers.get("referer", "")
23 | headers = {
24 | "user-agent": ua,
25 | "origin": origin,
26 | "referer": referer
27 | }
28 | ############################################################
29 | ############################################################
30 | #################### 简单的 token 验证 #######################
31 | input_data = input.model_dump()
32 | token = input_data.get("token", "")
33 | key = input_data.get("key", "")
34 |
35 | if token not in token_list and key not in token_list:
36 | return None, None
37 |
38 | input_data.pop("token", None)
39 | input_data.pop("key", None)
40 |
41 | return headers, input_data
42 | ############################################################
43 |
44 |
45 |
46 | def set_imageSource(data: dict, headers = None) -> Optional[bytes]:
47 | """
48 | 把传入的图片数据保存到本地,并返回图片的二进制数据
49 | :param data: 传入的图片数据,是一个字典
50 | 如果 dataType 为 1, 则 imageSource 是一个 url, 则直接下载图片,保存图片(丢弃)
51 | 如果 dataType 为 2, 则 imageSource 是一个 base64 编码的字符串, 则解码后保存图片
52 | :return: 返回图片的二进制数据
53 | """
54 | if data.get('dataType', None) == 1:
55 | rep = requests.get(
56 | data['imageSource'],
57 | verify=False,
58 | headers=headers
59 | )
60 | imageSource = rep.content
61 | img = [imageSource]
62 | extraicon = None
63 | elif data.get('dataType', None) == 2:
64 | img = data.get('imageSource', None)
65 | extraicon = data.get('extraicon', None)
66 | assert img is not None, "imageSource is None"
67 | assert isinstance(img, list), "imageSource must be a list"
68 | else:
69 | assert False, "dataType is not 1 or 2"
70 |
71 | imageSource_list = [open_image(i, rmalpha=True) for i in img]
72 | extraicon_list = [open_image(i, rmalpha=True) for i in extraicon] if extraicon else None
73 | return imageSource_list, extraicon_list
74 |
75 | maplist = {
76 | "gt4icon": gt4icon.run,
77 | "gt3nine": gt3nine.run,
78 | "gt3word": gt3word.run
79 | }
80 | def get_res(obj_name: str, input_dict: dict, headers: dict):
81 | imageID = input_dict.get("imageID", "")
82 | imageSource_list, extraicon_list = set_imageSource(input_dict, headers=headers)
83 | # 获取对象
84 | res = maplist.get(obj_name)(imageSource_list[0], extraicon_list) # 调用对象的 run 方法
85 | targets_xyxy = res.targets_xyxy
86 | data = {"imageID": imageID, "res": targets_xyxy}
87 | return data
88 |
89 |
90 |
91 |
92 |
--------------------------------------------------------------------------------
/app/uvicorn_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": 1,
3 | "disable_existing_loggers": false,
4 | "formatters": {
5 | "default": {
6 | "()": "uvicorn.logging.DefaultFormatter",
7 | "fmt": "%(asctime)s - %(levelprefix)s %(message)s",
8 | "use_colors": null
9 | },
10 | "access": {
11 | "()": "uvicorn.logging.AccessFormatter",
12 | "fmt": "%(asctime)s - %(levelprefix)s %(client_addr)s - \"%(request_line)s\" %(status_code)s",
13 | "use_colors": true
14 | }
15 | },
16 | "handlers": {
17 | "default": {
18 | "formatter": "default",
19 | "class": "logging.FileHandler",
20 | "filename": "log/uvicorn_default.log"
21 | },
22 | "access": {
23 | "formatter": "access",
24 | "class": "logging.FileHandler",
25 | "filename": "log/uvicorn_access.log"
26 | }
27 | },
28 | "loggers": {
29 | "uvicorn": {
30 | "handlers": [
31 | "default"
32 | ],
33 | "level": "INFO"
34 | },
35 | "uvicorn.error": {
36 | "level": "INFO"
37 | },
38 | "uvicorn.access": {
39 | "handlers": [
40 | "access"
41 | ],
42 | "level": "INFO",
43 | "propagate": false
44 | }
45 | }
46 | }
--------------------------------------------------------------------------------
/assets/icon4/0a5573.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/0a5573.png
--------------------------------------------------------------------------------
/assets/icon4/0ab207.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/0ab207.png
--------------------------------------------------------------------------------
/assets/icon4/0ac1df.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/0ac1df.png
--------------------------------------------------------------------------------
/assets/icon4/0b236d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/0b236d.png
--------------------------------------------------------------------------------
/assets/icon4/0b41a3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/0b41a3.png
--------------------------------------------------------------------------------
/assets/icon4/0c2974.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/0c2974.png
--------------------------------------------------------------------------------
/assets/icon4/3f9cdf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/3f9cdf.png
--------------------------------------------------------------------------------
/assets/icon4/94cb8d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/94cb8d.png
--------------------------------------------------------------------------------
/assets/icon4/c59e7a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/c59e7a.png
--------------------------------------------------------------------------------
/assets/icon4/imgs_00141_20081.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/imgs_00141_20081.png
--------------------------------------------------------------------------------
/assets/icon4/imgs_00142_59845.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/imgs_00142_59845.png
--------------------------------------------------------------------------------
/assets/icon4/imgs_00145_79210.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/imgs_00145_79210.png
--------------------------------------------------------------------------------
/assets/icon4/imgs_00146_99736.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/imgs_00146_99736.png
--------------------------------------------------------------------------------
/assets/icon4/imgs_00150_46045.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/icon4/imgs_00150_46045.png
--------------------------------------------------------------------------------
/assets/nine3/img_00000_37458.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00000_37458.png
--------------------------------------------------------------------------------
/assets/nine3/img_00001_54480.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00001_54480.png
--------------------------------------------------------------------------------
/assets/nine3/img_00002_59670.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00002_59670.png
--------------------------------------------------------------------------------
/assets/nine3/img_00003_47146.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00003_47146.png
--------------------------------------------------------------------------------
/assets/nine3/img_00004_50080.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00004_50080.png
--------------------------------------------------------------------------------
/assets/nine3/img_00005_67809.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00005_67809.png
--------------------------------------------------------------------------------
/assets/nine3/img_00006_25480.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/img_00006_25480.png
--------------------------------------------------------------------------------
/assets/nine3/ques_00000_37458.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00000_37458.png
--------------------------------------------------------------------------------
/assets/nine3/ques_00001_54480.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00001_54480.png
--------------------------------------------------------------------------------
/assets/nine3/ques_00002_59670.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00002_59670.png
--------------------------------------------------------------------------------
/assets/nine3/ques_00003_47146.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00003_47146.png
--------------------------------------------------------------------------------
/assets/nine3/ques_00004_50080.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00004_50080.png
--------------------------------------------------------------------------------
/assets/nine3/ques_00005_67809.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00005_67809.png
--------------------------------------------------------------------------------
/assets/nine3/ques_00006_25480.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/nine3/ques_00006_25480.png
--------------------------------------------------------------------------------
/assets/temp1_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/temp1_output.png
--------------------------------------------------------------------------------
/assets/temp2_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/temp2_output.png
--------------------------------------------------------------------------------
/assets/temp3_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/temp3_output.png
--------------------------------------------------------------------------------
/assets/word3/pic_00355_54552.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00355_54552.png
--------------------------------------------------------------------------------
/assets/word3/pic_00355_67108.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00355_67108.png
--------------------------------------------------------------------------------
/assets/word3/pic_00355_91218.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00355_91218.png
--------------------------------------------------------------------------------
/assets/word3/pic_00356_20119.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00356_20119.png
--------------------------------------------------------------------------------
/assets/word3/pic_00356_24524.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00356_24524.png
--------------------------------------------------------------------------------
/assets/word3/pic_00356_46593.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00356_46593.png
--------------------------------------------------------------------------------
/assets/word3/pic_00356_73261.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00356_73261.png
--------------------------------------------------------------------------------
/assets/word3/pic_00357_13273.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/assets/word3/pic_00357_13273.png
--------------------------------------------------------------------------------
/conf/config.py:
--------------------------------------------------------------------------------
1 | import yaml
2 |
3 | # 假设你的 YAML 配置文件名为 config.yaml
4 | yaml_file_path = 'conf/config.yaml'
5 |
6 | # 读取 YAML 文件
7 | with open(yaml_file_path, 'r') as file:
8 | config = yaml.safe_load(file)
9 |
10 | gtconf = config['gt']['click']
11 |
12 |
13 |
--------------------------------------------------------------------------------
/conf/config.yaml:
--------------------------------------------------------------------------------
1 | gt:
2 | click:
3 | word:
4 | # yolo 检测模型路径
5 | pdetect: model/g3word6300/detect.pt
6 | #孪生神经网络模型路径 ,如果没有则用 detect 模型
7 | per: "model/g3word6300/simvgg19.onnx"
8 | # yolo 分类模型(一定要填写下面的标签)
9 | # 会根据这个类别来进行分类, 最多支持两个类别, 如果是两个,则第一个是具有顺序的字符类别,第二个是目标类别
10 | # 当有两个标签的时候,具有顺序的一定要放在第一个
11 | # 一个 char 标签, 一个 target 标签
12 | pclass: "model/g3word6300/muti.pt"
13 | pclasstags: ["char", "target"]
14 | nine:
15 | # 可以直接根据九宫格把图片进行划分,因此只需要一个分类模型即可
16 | pclass: model/nine3/best.pt
17 | icon4mi:
18 | pdetect: model/icon4mi800/detect.pt
19 | per: "model/icon4mi800/simvgg19.onnx"
20 | pclass: model/icon4mi800/muti.pt
21 | pclasstags: ["target"]
22 |
--------------------------------------------------------------------------------
/demo_geetest3nine.py:
--------------------------------------------------------------------------------
1 | from src.utils.nine import crop_nine
2 | from src.method.GTnine import GTnine
3 | from src.utils.outdata import Outfile
4 | from pathlib import Path
5 | from conf.config import gtconf
6 |
7 |
8 |
9 |
10 | if __name__ == "__main__":
11 | gt = GTnine(pclass=gtconf["nine"]["pclass"])
12 | charimg = ["assets/nine3/ques_00001_54480.png"]
13 | background = "assets/nine3/img_00000_37458.png"
14 |
15 | outdir = "example/temp3"
16 | Path(outdir).mkdir(parents=True, exist_ok=True)
17 | Path(outdir).mkdir(exist_ok=True)
18 | test_img = crop_nine(background)
19 | for index, i in enumerate(test_img):
20 | i.save(f"{outdir}/{index}.png")
21 |
22 | out = gt.run(background, charimg)
23 | # 高 * 宽
24 | Outfile.to_labelme(background, out, size = (261, 300), output_dir = outdir )
25 | Outfile.draw_image(background,
26 | chars_xyxy= out.get_value("charsImage"),
27 | targets_xyxy = out.get_value("targets_xyxy"),
28 | out_path=f"{outdir}/output.png"
29 | )
30 | ## 结果在temp3/output.png 中
--------------------------------------------------------------------------------
/demo_geetest3word.py:
--------------------------------------------------------------------------------
1 | from src.method.GTClick import GTClick
2 | from src.utils.outdata import Outfile
3 | from pathlib import Path
4 | from conf.config import gtconf
5 |
6 | gtclick = GTClick(
7 | pdetect = gtconf["word"]['pdetect'],
8 | per = gtconf["word"]['per'],
9 | pclass = gtconf["word"]['pclass'],
10 | pclasstags =gtconf["word"]['pclasstags'],
11 | chars_issorted = False,
12 | rmalpha = True,
13 | )
14 |
15 |
16 | if __name__ == '__main__':
17 | i = "assets/word3/pic_00356_20119.png"
18 | outdir = "example/temp1"
19 | Path(outdir).mkdir(parents=True, exist_ok=True)
20 | Path(outdir).mkdir(exist_ok=True)
21 | out = gtclick.run(i)
22 | # 高 * 宽
23 | Outfile.to_labelme(i, out, size=(384, 344), output_dir=outdir )
24 | charsImage = out.charsImage
25 | targetsImage = out.targetsImage
26 | for index, temp in enumerate(charsImage):
27 | temp.save(f"{outdir}/char_{index}.png")
28 | for index, temp in enumerate(targetsImage):
29 | temp.save(f"{outdir}/target_{index}.png")
30 |
31 | Outfile.draw_image(i, out.chars_xyxy, out.targets_xyxy , f"{outdir}/output.png")
32 | exit()
33 |
34 |
--------------------------------------------------------------------------------
/demo_geetest4icon4mi.py:
--------------------------------------------------------------------------------
1 | from src.method.GTClick import GTClick
2 | from src.utils.outdata import Outfile
3 |
4 | from pathlib import Path
5 | from conf.config import gtconf
6 |
7 | gtclick = GTClick(
8 | pdetect = gtconf['icon4mi']['pdetect'],
9 | per = gtconf['icon4mi']['per'],
10 | pclass = gtconf['icon4mi']['pclass'],
11 | pclasstags = gtconf['icon4mi']['pclasstags'],
12 | chars_issorted = True,
13 | rmalpha = True,
14 | )
15 | if __name__ == '__main__':
16 | i = "assets/icon4/imgs_00142_59845.png"
17 | extraicon = ["assets/icon4/3f9cdf.png", "assets/icon4/c59e7a.png", "assets/icon4/94cb8d.png"]
18 | outdir = "example/temp2"
19 | Path(outdir).mkdir(parents=True, exist_ok=True)
20 | Path(outdir).parent.mkdir(exist_ok=True)
21 | out = gtclick.run(i, extraicon)
22 | # 高 * 宽
23 | Outfile.to_labelme(i, out, size = (200,300), output_dir = outdir )
24 |
25 | targetsImage = out.targetsImage
26 | for index, temp in enumerate(targetsImage):
27 | temp.save(f"{outdir}/target_{index}.png")
28 | Outfile.draw_image(i, out.charsImage, out.targets_xyxy , f"{outdir}/output.png")
29 | exit()
30 |
31 |
--------------------------------------------------------------------------------
/imgsrc/a.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/imgsrc/a.jpg
--------------------------------------------------------------------------------
/imgsrc/w.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/imgsrc/w.png
--------------------------------------------------------------------------------
/jsdemo_g3word_1.py:
--------------------------------------------------------------------------------
1 | import json, re, time, execjs, os, random, requests
2 | from typing import Literal, Union
3 | from webjs.word3.f1.tools import send_image2server, download_img
4 |
5 |
6 | URL = [
7 | 'https://passport.bilibili.com/x/passport-login/captcha', # 初始化获取挑战
8 | 'https://api.geetest.com/gettype.php', # 初始化相关
9 | 'https://api.geetest.com/ajax.php', # 初始化相关
10 | 'https://api.geetest.com/get.php', # 获取图片
11 | ]
12 |
13 | Method = Literal['get', 'post', 'POST', 'GET']
14 | pattern = re.compile(r'\((.*?)\)', re.S)
15 |
16 |
17 | class Gessts:
18 | # 设置请求session
19 | session = requests.Session()
20 | # 返回指定数据类型
21 | dataProcessors = {
22 | 'json': lambda resp: resp.json(),
23 | 'text': lambda resp: resp.text,
24 | 'contents': lambda resp: resp.content
25 | }
26 | # 请求方式
27 | methodProcessors = {
28 | 'get': session.get,
29 | 'post': session.post
30 | }
31 |
32 | def __init__(self):
33 | self.cookies = None
34 |
35 | self.headers = {
36 | 'authority': 'passport.bilibili.com',
37 | 'accept': '*/*',
38 | 'accept-language': 'zh-CN,zh;q=0.9',
39 | 'Referer': 'https://www.bilibili.com',
40 | 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
41 | }
42 |
43 | def ajax_requests(
44 | self, url: str, method: Method, headers: dict,
45 | cookies: dict, params: Union[dict, str, None],
46 | jsonData: Union[dict, None], retryTimes: int = 5,
47 | timeOut: int = 20
48 | ) -> requests.Response:
49 | # 初始化请求发送器以及数据获取器
50 |
51 | methodProcessor = self.methodProcessors[method]
52 | for _ in range(retryTimes):
53 | try:
54 | return methodProcessor(
55 | url=url,
56 | headers=headers,
57 | cookies=cookies,
58 | params=params,
59 | data=json.dumps(jsonData, ensure_ascii=False, separators=(',', ':')),
60 | timeout=timeOut
61 | )
62 | except Exception as e:
63 | print(
64 | f"错误链接: {url}",
65 | f"请求出现错误, 正在重试: {_}/{retryTimes}",
66 | f"错误信息为: {e}",
67 | sep='\n'
68 | )
69 | else:
70 | raise '重试5次后仍然无法获取数据,可能是加密参数错误或者ip风控'
71 |
72 | def init_challenge(self):
73 | """
74 | 通过 B 站的接口获取验证码的 challenge 和 gt
75 | """
76 | url = URL[0]
77 | params = {
78 | 'source': 'main-fe-header',
79 | 't': '0.26599063907171017',
80 | }
81 | resp: dict = self.ajax_requests(
82 | url=url,
83 | params=params,
84 | method='get',
85 | jsonData=None,
86 | cookies=self.cookies,
87 | headers=self.headers
88 | ).json()
89 | challenge, gt = resp['data']['geetest'].values()
90 | return challenge, gt
91 |
92 | def get_all_info(self, challenge: str, gt: str, header:dict = None, cookies:dict = None) -> tuple:
93 | """
94 | 根据 gt 和 challenge 获取 c, s 以及图片的地址等详细参数
95 | 这个函数是获取c,s以及坐标信息,这里的坐标是未经过处理的
96 | 参数:
97 | gt: str:
98 | challenge: str:
99 | header: dict: 请求头, 建议越全面越好, 比如:
100 | {
101 | 'authority': ***,
102 | 'accept': ***,
103 | 'accept-language': ***,
104 | 'Referer': ***,
105 | 'user-agent': ***,
106 | }
107 | cookies: dict: cookies ,这两个参数传递给get_all_info, 不过一般情况下 cookies 不需要传递
108 | 返回:
109 | tuple: 返回图片的地址(带前缀), gt, challenge, c, s
110 | """
111 | if header is not None and isinstance(header, dict):
112 | hd = header
113 | else:
114 | hd = self.headers
115 | if cookies is not None and isinstance(cookies, dict):
116 | ck = cookies
117 | else:
118 | ck = self.cookies
119 | # ck = None # 这里不需要cookies, 但是为了方便调试,先注释掉
120 | now_stamp = int(time.time() * 1000)
121 | self._now_stamp = now_stamp
122 | par = {
123 | 'gt': gt,
124 | 'callback': f'geetest_{self._now_stamp}',
125 | }
126 | self.ajax_requests(url=URL[1], headers=hd, cookies=ck, jsonData=None, method='get', params=par)
127 | par.update({
128 | 'challenge': challenge,
129 | 'lang': 'zh-cn',
130 | 'pt': '0',
131 | 'client_type': 'web',
132 | 'w': '',
133 | })
134 | self.ajax_requests(url=URL[3], method='get', headers=hd, cookies=ck, params=par, jsonData=None)
135 | self.ajax_requests(url=URL[2], method='get', headers=hd, cookies=ck, params=par, jsonData=None)
136 | par.update({
137 | 'is_next': 'true',
138 | 'type': 'click',
139 | 'https': 'false',
140 | 'protocol': 'https://',
141 | 'offline': 'false',
142 | 'product': 'embed',
143 | 'api_server': 'api.geetest.com',
144 | 'isPC': 'true',
145 | 'autoReset': 'true',
146 | 'width': '100%',
147 | 'callback': f'geetest_{self._now_stamp}',
148 | })
149 | resp = self.ajax_requests(url=URL[3], method='get', headers=hd, cookies=ck, params=par, jsonData=None)
150 | # 上述顺序不能打乱,必须严格相同
151 | result: dict = json.loads(pattern.findall(resp.text)[0])['data']
152 | pic: str = 'https://static.geetest.com' + result['pic']
153 | c = result['c']
154 | s = result['s']
155 | assert "word" in pic, "这不是点选验证码"
156 | return pic, gt, challenge, c, s
157 |
158 | def xyxy2gt(self, xyxy_list: list[list[float]] ) -> str:
159 | """
160 | 将坐标转换为极验需要的格式
161 | """
162 | assert isinstance(xyxy_list, list), "xyxy_list 应该是一个列表"
163 | assert all(len(i) == 4 for i in xyxy_list), "xyxy_list 中的每个元素应该是一个长度为4的列表"
164 | new = []
165 | # 处理坐标,变为极验需要的样子
166 | for code in xyxy_list:
167 | x, y = (code[0] + code[2]) / 2, (code[1] + code[3]) / 2
168 | final_x = int(round(int(x) / 333.375 * 100 * 100, 0))
169 | final_y = int(round(int(y) / 333.375 * 100 * 100, 0))
170 | final = f'{final_x}_{final_y}'
171 | new.append(final)
172 |
173 | return ','.join(new)
174 |
175 | def do_verify(self, challenge: str, gt: str, pic_name:str="image.jpg", header:dict = None, cookies:dict = None) -> dict:
176 | """
177 | 处理验证的主要函数
178 | 参数:
179 | gt: str:
180 | challenge: str:
181 | pic_name: str: 图片的保存路径,本地路径, 默认为 image.jpg
182 | header: dict: 请求头, 建议越全面越好, 比如:
183 | {
184 | 'authority': ***,
185 | 'accept': ***,
186 | 'accept-language': ***,
187 | 'Referer': ***,
188 | 'user-agent': ***,
189 | }
190 | cookies: dict: cookies ,这两个参数传递给get_all_info, 不过一般情况下 cookies 不需要传递
191 | 返回:
192 | dict: 返回验证的结果
193 | """
194 | pic, gt, challenge, c, s = self.get_all_info(challenge, gt, header, cookies)
195 | # 获取坐标信息
196 | download_img(pic, pic_name)
197 | codes = send_image2server(pic_name)
198 | print(f"处理之前的坐标: {codes}")
199 | stringCodes = self.xyxy2gt(codes)
200 | print(
201 | f'处理后坐标: {stringCodes}',
202 | f'图片地址: {pic}',
203 | f'gt:{gt}, challenge:{challenge}',
204 | f'c: {c}, s: {s}', sep='\n'
205 | )
206 | with open('./webjs/word3/f1/demo.js', 'r', encoding='utf-8') as f:
207 | jscode = f.read()
208 | ctx = execjs.compile(jscode)
209 | print(f"stringCodes: {stringCodes}")
210 | w = ctx.call('get_w', stringCodes, pic, gt, challenge, c, s )
211 |
212 | ### 方法 2
213 | # with open('./webjs/word3/f2/biblg3word.js', 'r', encoding='utf-8') as f:
214 | # jscode = f.read()
215 | # ctx = execjs.compile(jscode)
216 | # w = ctx.call('get_w', stringCodes, pic, gt, challenge, c, s )
217 |
218 | params = {
219 | "gt": gt,
220 | "challenge": challenge,
221 | "lang": "zh-cn",
222 | "pt": "0",
223 | "client_type": "web",
224 | "w": w,
225 | "callback": f"geetest_{self._now_stamp}",
226 | }
227 | # print(f"参数: {params}")
228 | # 避免出现点选过快的情况
229 | time.sleep(random.uniform(3, 6))
230 | resp = self.ajax_requests(
231 | url='https://api.geetest.com/ajax.php',
232 | method='get',
233 | headers=self.headers,
234 | cookies=self.cookies,
235 | jsonData=None,
236 | params=params
237 | )
238 | # 处理 jsonp 数据
239 | try:
240 | resp_json = resp.json()
241 | except:
242 | resp_json = self.headle_jsonp(resp.text)
243 | return resp_json
244 |
245 | def is_valid_jsonp(self, text: str) -> bool:
246 | """
247 | 判断是否是 JSONP 格式的数据,以 'geetest_数字(' 开头. 当然可以换成其他的, 比如: re.compile(r'\((.*?)\)', re.S)
248 | 参数:
249 | text: str: 需要判断的文本
250 | 返回:
251 | bool: 返回是否是 JSONP 格式的数据
252 | """
253 | if not isinstance(text, str):
254 | return False
255 | pattern = re.compile(r"^geetest_\d+\(") #
256 | match = pattern.match(text)
257 | # 如果匹配成功,返回 True,否则返回 False
258 | return bool(match)
259 | def headle_jsonp(self, text) -> dict:
260 | """
261 | 处理 JSONP 格式的数据,去掉头尾的无用字符
262 | 参数:
263 | text: str: 需要处理的文本
264 | 返回:
265 | dict: 返回处理后的数据,如果不是 JSONP 格式的数据,则抛出异常
266 | """
267 |
268 | if self.is_valid_jsonp(text):
269 | jsonppattern = re.compile(r'\((.*?)\)', re.S)
270 | return json.loads(jsonppattern.findall(text)[0])
271 | else:
272 | assert False, '不是 JSONP 格式的数据'
273 |
274 |
275 | if __name__ == '__main__':
276 | bili = Gessts()
277 | challenge, gt = bili.init_challenge() # 调用 b 站的接口获取 challenge 和 gt
278 | print(f"challenge: {challenge}, gt: {gt}")
279 | pic_name = os.path.join("temp", f"pic_{challenge[0:5]}.jpg")
280 | os.makedirs(os.path.dirname(pic_name), exist_ok=True)
281 | resp = bili.do_verify(challenge, gt, pic_name) #传递参数获取验证结果
282 | print(resp)
--------------------------------------------------------------------------------
/jsdemo_g3word_2.py:
--------------------------------------------------------------------------------
1 | import requests, execjs, json, time, os
2 | from jsonpath import jsonpath
3 | from webjs.word3.f2.tools import download_img, headers, cookies
4 | from webjs.word3.f2.loadmodel import gt3word
5 |
6 |
7 | session = requests.Session()
8 |
9 | ### 1. 获取challenge 和 gt
10 | def get_challengeid():
11 | params = {
12 | 'source': 'main-fe-header',
13 | 't': '0.7758525919151655',
14 | }
15 | url1 = 'https://passport.bilibili.com/x/passport-login/captcha'
16 | response = session.get(url1, params=params, cookies=cookies, headers=headers)
17 | resjson = response.json()
18 | challenge = jsonpath(resjson, '$..challenge')[0]
19 | gt = jsonpath(resjson, '$..gt')[0]
20 | return challenge, gt
21 |
22 | ### 2. 获取点击类型
23 | def get_click_type(gt, challenge):
24 | params = {
25 | 'gt': gt,
26 | 'challenge': challenge,
27 | 'lang': 'zh-cn',
28 | 'pt': '0',
29 | 'client_type': 'web',
30 | 'callback': f'geetest_{int(time.time() * 1000)}'
31 | }
32 | ## 获取点击类型
33 | response = session.get(
34 | 'https://api.geetest.com/ajax.php', headers=headers, params=params
35 | )
36 | restext = response.text
37 | result = json.loads(restext[restext.find("(") + 1:restext.rfind(")")])
38 | click_type = jsonpath(result, '$..result')[0]
39 | assert click_type == 'click', "点击类型不是 click"
40 | return click_type
41 |
42 | ### 3. 获取 json 详细信息
43 | def get_gtresponse(gt, challenge):
44 | params = {
45 | 'is_next': 'true',
46 | 'type': 'click',
47 | 'gt': gt,
48 | 'challenge': challenge,
49 | 'lang': 'zh-cn',
50 | 'https': 'false',
51 | 'protocol': 'https://',
52 | 'offline': 'false',
53 | 'product': 'embed',
54 | 'api_server': 'api.geetest.com',
55 | 'isPC': 'true',
56 | 'autoReset': 'true',
57 | 'width': '100%',
58 | 'callback': f'geetest_{int(time.time() * 1000)}',
59 | }
60 | response = session.get('https://api.geetest.com/get.php', params=params, headers=headers)
61 | restext = response.text
62 | result = json.loads(restext[restext.find("(") + 1:restext.rfind(")")])
63 | myc = jsonpath(result, '$..c')[0]
64 | mys = jsonpath(result, '$..s')[0]
65 | pic = jsonpath(result, '$..pic')[0]
66 | return myc, mys, pic
67 |
68 |
69 | ### 4. 最后验证
70 | def validate(gt, challenge, w):
71 | params = {
72 | 'gt': gt,
73 | 'challenge': challenge,
74 | 'lang': 'zh-cn',
75 | 'pt': '0',
76 | 'client_type': 'web',
77 | "w": w,
78 | 'callback': f'geetest_{int(time.time() * 1000)}',
79 | }
80 |
81 | response = session.get('https://api.geetest.com/ajax.php', headers=headers, params=params)
82 | return response.text
83 |
84 |
85 |
86 | if __name__ == "__main__":
87 |
88 | ### 1. 获取challenge 和 gt
89 | challenge, gt = get_challengeid()
90 |
91 | ### 2. 获取点击类型
92 | click_type = get_click_type(gt, challenge)
93 |
94 | ### 3. 获取 json 详细信息
95 | myc, mys, pic = get_gtresponse(gt, challenge)
96 | time.sleep(1)
97 | ### 4. 下载图片,获取坐标, 并转为极验需要的格式
98 | os.makedirs('temp', exist_ok=True)
99 | download_img(pic, 'temp/a.jpg')
100 | out = gt3word.run('temp/a.jpg')
101 | # xyxy = poses2geetest(out.targets_xyxy)
102 |
103 | ### 5. 处理 w 参数
104 | time.sleep(1)
105 | import execjs
106 | with open("./webjs/word3/f2/biblg3word.js", 'r', encoding='utf-8') as f:
107 | jscode = f.read()
108 | ctx = execjs.compile(jscode)
109 |
110 | w = ctx.call('get_w', out.targets_xyxy, pic, gt, challenge, myc, mys )
111 |
112 | ### 6. 最后验证
113 | res = validate(gt, challenge, w)
114 | print(res)
115 |
116 |
117 |
118 |
119 |
--------------------------------------------------------------------------------
/jsdemo_g4icon.py:
--------------------------------------------------------------------------------
1 | import requests, json, time, execjs, uuid, os
2 | from jsonpath import jsonpath
3 | from webjs.word3.f2.loadmodel import gt4icon
4 | from webjs.icon4.tools import xyxy2gtformat, download_img, headers, cookies, now_str
5 | """
6 | 由于该网站比较严格,需要 header 和 cookies, 以及一些参数, 本代码只是一个示例, 不能直接运行
7 | """
8 | params = {
9 | 'scene_type': '1',
10 | 'now': now_str,
11 | 'reason': 'user.mihoyo.com#/login/password',
12 | 'action_type': 'login_by_password',
13 | 'account': '19196951600',
14 | 't': now_str,
15 | }
16 |
17 |
18 | session = requests.session()
19 | for ii in range(10):
20 | try:
21 | url1 = 'https://webapi.account.mihoyo.com/Api/create_mmt'
22 | response = session.get(url1, params=params, cookies=cookies, headers=headers)
23 | gt1 = jsonpath(response.json(), '$..gt')
24 | mmt_key = jsonpath(response.json(), '$..mmt_key')
25 | if gt1:
26 | gt = gt1[0]
27 | break
28 | time.sleep(1)
29 | data = {
30 | 'account': '19194931000',
31 | 'password': 'MERS6bUrEYw9LMhf2mLL9j2CeWmdowp5Vgadu58jZeYN7LT1BdWIh8ASiD35xaFRoPKg3Uz5B4ka4P+QzQB6ViopvqRPUW3VOhcpZLM/RM8RIDvHOtRZzHwJjGyQfw/gbZf2YPbARE3kplpvbTYvcX/3SjSuLkqG0XJapIvfKFc=',
32 | 'is_crypto': 'true',
33 | 'mmt_key': mmt_key,
34 | 'source': 'user.mihoyo.com',
35 | 't': str(int(time.time() * 1000)),
36 | }
37 | url2 = 'https://webapi.account.mihoyo.com/Api/login_by_password'
38 | response = session.post(url2, cookies=cookies, headers=headers, data=data)
39 | time.sleep(1)
40 | gt2 = jsonpath(response.json(), '$..gt')
41 | if gt2:
42 | gt = gt2[0]
43 | break
44 | except:
45 | print(f"第{ii}次获取gt和mmt_key失败")
46 | continue
47 |
48 |
49 | params = {
50 | 'callback': f'geetest_{int(time.time() * 1000)}',
51 | 'captcha_id': gt,
52 | 'challenge': str(uuid.uuid4()),
53 | 'client_type': 'web',
54 | 'risk_type': 'icon',
55 | 'user_info': json.dumps({"mmt_key": mmt_key }, separators=(',', ':')),
56 | 'lang': 'zho',
57 | }
58 |
59 |
60 | response = session.get('https://gcaptcha4.geetest.com/load', params=params, cookies=cookies, headers=headers)
61 | res = response.text
62 | json_data = json.loads(res[res.index("(") + 1:res.rindex(")")])
63 | os.makedirs("temp", exist_ok=True)
64 |
65 | # with open("temp/icon4.json", "w") as f:
66 | # json.dump(json_data, f, ensure_ascii=False, indent=2)
67 |
68 | imgs = jsonpath(json_data, '$..imgs')[0]
69 | download_img(imgs, "temp/a.png")
70 |
71 | ques = jsonpath(json_data, '$..ques')[0]
72 | ques_path = [f"temp/ques_{i}.png" for i in range(len(ques))]
73 | download_img(ques, ques_path)
74 |
75 | imgs_path = "temp/a.png"
76 |
77 | out = gt4icon.run(imgs_path, ques_path)
78 | xyxy = out.targets_xyxy
79 |
80 | userresponse = xyxy2gtformat(xyxy)
81 |
82 | with open("webjs/icon4/demo_g4icon.js", "r") as f:
83 | jscode = f.read()
84 | ctx = execjs.compile(jscode)
85 |
86 |
87 | lot_number = jsonpath(json_data, '$..lot_number')[0]
88 | pow_detail = jsonpath(json_data, '$..pow_detail')[0]
89 | detail_time = jsonpath(pow_detail, '$..datetime')[0]
90 | payload = jsonpath(json_data, '$..payload')[0]
91 | process_token = jsonpath(json_data, '$..process_token')[0]
92 |
93 |
94 | w = ctx.call("get_w2", gt, lot_number, detail_time, userresponse)
95 |
96 |
97 |
98 | params = {
99 | 'callback': f'geetest_{int(time.time() * 1000)}',
100 | 'captcha_id': gt,
101 | 'client_type': 'web',
102 | 'lot_number': lot_number,
103 | 'risk_type': 'icon',
104 | 'payload': payload,
105 | 'process_token': process_token,
106 | 'payload_protocol': '1',
107 | 'pt': '1',
108 | 'w': w,
109 | }
110 |
111 | response = session.get('https://gcaptcha4.geetest.com/verify', params=params, cookies=cookies, headers=headers)
112 | print(response.text)
113 |
114 |
--------------------------------------------------------------------------------
/jsdemo_nine3.py:
--------------------------------------------------------------------------------
1 | import re, requests, time, uuid, execjs, json
2 | from lxml import etree
3 | from urllib.parse import urljoin
4 | from jsonpath import jsonpath
5 | from pathlib import Path
6 | from webjs.nine3.utils import *
7 | from webjs.word3.f2.loadmodel import gt3nine
8 |
9 |
10 |
11 |
12 | headers = {
13 | 'authority': 'gt4.geetest.com',
14 | 'accept': '*/*',
15 | 'accept-language': 'zh-CN,zh;q=0.9',
16 | 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
17 | }
18 |
19 | def get_captchaId():
20 | global headers
21 | session = requests.Session()
22 | response = session.get('https://gt4.geetest.com/', headers=headers)
23 |
24 | HTML = etree.HTML(response.text)
25 | js_url = HTML.xpath('//script[contains(@src, "/assets/index")]/@src')[0]
26 |
27 | res = session.get(urljoin("https://gt4.geetest.com", js_url), headers=headers).text
28 | captchaId = re.search('captcha_id:"([0-9a-z]+)"', res).group(1)
29 | return captchaId, session
30 |
31 |
32 |
33 |
34 | def get_resjson(captcha_id):
35 | params = {
36 | 'callback': f'geetest_{int(time.time() * 1000)}',
37 | 'captcha_id': captcha_id,
38 | 'challenge': str(uuid.uuid4()),
39 | 'client_type': 'web',
40 | 'risk_type': 'nine',
41 | 'lang': 'zh',
42 | }
43 | response = session.get('https://gcaptcha4.geetest.com/load', params=params, headers=headers)
44 | res = response.text
45 | resp_json = json.loads(res[res.find("(") + 1:res.rfind(")")])
46 | captcha_type =jsonpath(resp_json, '$..captcha_type')[0]
47 | assert captcha_type == 'nine', "captcha_type should be nine"
48 |
49 | kk, ss = 1, 1
50 | imgs_dir = ques_dir = "temp"
51 | Path(imgs_dir).mkdir(parents=True, exist_ok=True)
52 | imgs, imgs_count = generate_url(jsonpath(resp_json, '$..imgs')[0])
53 | imgs_path = generate_paths(imgs_dir, f"img_{kk}_{ss}", imgs_count)
54 | download_img(imgs, imgs_path)
55 |
56 | ques, ques_count = generate_url(jsonpath(resp_json, '$..ques')[0])
57 | ques_path = generate_paths(ques_dir, f"ques_{kk}_{ss}", ques_count)
58 | download_img(ques, ques_path)
59 | return resp_json, captcha_id, imgs_path, ques_path
60 |
61 |
62 |
63 | if __name__ == "__main__":
64 | ## 九宫格
65 | captcha_id, session = get_captchaId()
66 |
67 | resp_json, captcha_id, imgs_path, ques_path = get_resjson(captcha_id)
68 |
69 |
70 | time.sleep(1)
71 | out = gt3nine.run(imgs_path[0], ques_path)
72 | userresponse = out.nine_rowcol # 九宫格的坐标
73 |
74 | #下面传递的参数都是从resp_json中获取的
75 | lot_number = jsonpath(resp_json, '$..lot_number')[0]
76 | nine_nums = jsonpath(resp_json, '$..nine_nums')[0]
77 | payload = jsonpath(resp_json, '$..payload')[0]
78 | payload_protocol = jsonpath(resp_json, '$..payload_protocol')[0]
79 | datetime = jsonpath(resp_json, '$..datetime')[0]
80 | process_token = jsonpath(resp_json, '$..process_token')[0]
81 | with open("webjs/nine3/demo.js", "r") as f:
82 | jscode = f.read()
83 | ctx = execjs.compile(jscode)
84 | w = ctx.call("get_w", captcha_id, lot_number, datetime, userresponse)
85 |
86 | params = {
87 | 'callback': f'geetest_{int(time.time() * 1000)}',
88 | 'captcha_id': captcha_id,
89 | 'client_type': 'web',
90 | 'lot_number': lot_number,
91 | 'risk_type': 'nine',
92 | 'payload': payload,
93 | 'process_token': process_token,
94 | 'payload_protocol': '1',
95 | 'pt': '1',
96 | 'w': w,
97 | }
98 | url3 = 'https://gcaptcha4.geetest.com/verify'
99 | response = requests.get(url3, params=params, headers=headers)
100 |
101 | print(response.text)
--------------------------------------------------------------------------------
/model/sha256.txt:
--------------------------------------------------------------------------------
1 | 971bb3bcbc6fe55cc17d2d54a655830974bdb1e19f5303d6bf01de4c4b62e957 g3word6300/detect.onnx
2 | 749812ddae42483864282885418ea8b52254b543ade9655ed408d35ec2193324 g3word6300/detect.pt
3 | 9025a7cc1c77f22dd008e2c727a2b9260a62afc8b2ca4ed0fe3c9f9ab01596d4 g3word6300/muti.pt
4 | 046ac6bec58c1687c147d5e0a6caec2cb55ac32fcf1292aa1f08151d3af90ec1 g3word6300/simvgg19.onnx
5 | 92d5f3e1d32f574c07d8d48db46c015901d6e765ecc09c9610dfecb492e0078f icon4mi800/detect.pt
6 | 74709cc0aa96dca01c825de76d3c8427ec16f7bd43dff2f94c5178a160144fa1 icon4mi800/muti.pt
7 | bc1033eb99ee2055efd5bd1eb6ffdd8f77d37fa7516d1297e5a919fbda69efc4 icon4mi800/simvgg19.onnx
8 | 92cd98ce5c22fb538fd7eea41968f69d40714e2d9ef9424b4eb6fbefbde4bd04 nine3/best.pt
9 | 1054268fe09de13f8a547dbaf4ba881bc7a5774d907e8bd08526809e8bd04247 极验4点选图标类型第三种.zip
10 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "dependencies": {
3 | "crypto-js": "^4.2.0",
4 | "node-rsa": "^1.1.1"
5 | }
6 | }
7 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | ## 验证码识别
2 |
3 | - [x] 文字点选
4 |
5 | - [x] 图标点选
6 |
7 | - [x] 九宫格(四代 and 三代)
8 |
9 | PS: 对于文字点选, 感觉可以当做一个简单的针对性的 OCR 识别, 因为文字分类有 1500+ 个.
10 |
11 | ## 免责声明
12 |
13 | 本项目旨在研究深度学习在验证码攻防上的应用。仅供学习交流使用,请勿用于非法用途,不得在任何商业使用,本人不承担任何法律责任。
14 |
15 | ## 运行环境
16 |
17 | ```bash
18 | conda create -n yzm python=3.10 -y
19 | conda activate yzm
20 | pip install -r requirements.txt
21 | ```
22 |
23 | - 理论上支持 3.10 及以上的版本, 但是没有测试过, 请自行测试. 3.10 以下的版本, 也自行测试
24 |
25 | ## 使用方法
26 |
27 | ```bash
28 | ## 文字点选 ---- 本地直接调用模型来识别(输出在 example/temp1/output.png 文件夹下)
29 | python demo_geetest3word.py
30 |
31 | ## 图标点选 ---- 本地直接调用模型来识别(输出在 example/temp2/output.png 文件夹下)
32 | python demo_geetest4icon4mi.py
33 |
34 | ## 九宫格 ---- 本地直接调用模型来识别(结果在 example/temp3/output.png )
35 | python demo_geetest3nine.py
36 | ```
37 |
38 | - 当然也可以传入自己的模型, 按照案例中的格式传入即可
39 |
40 | ## 用空请作者喝杯咖啡吗?
41 |
42 | 如果这个项目对您有帮助, 就请作者喝杯咖啡吧, 您的支持是作者最大的动力. 给个 star 也是对作者的支持.
43 |
44 | | Wechat Pay | Ali Pay |
45 | | ----------------------------------------- | ----------------------------------------- |
46 | |
|
|
47 |
48 | ## 模型下载
49 |
50 | - 下载下来以后, 把模型放到项目的指定位置即可, 一般放到 model 目录下(直接替换即可)
51 |
52 | - [huggingface:](https://huggingface.co/zscmmm/yzm)
53 |
54 | ## 简单的案例展示
55 |
56 | - 文字点选
57 |
58 | ```bash
59 | python demo_geetest3word.py
60 | ```
61 |
62 | | 原图 | 效果 |
63 | | ------------------------------------------------------------- | ---------------------------------------------------- |
64 | |
|
|
65 |
66 | - 图标点选.
67 |
68 | 需要传入额外的小图标且注意传入顺序, 即验证码右上角的小图标. 如果是透明的,需要把移除透明度设置为 `True`
69 |
70 | ```bash
71 | python demo_geetest4icon4mi.py
72 | ```
73 |
74 | | 原图 | 效果 |
75 | | -------------------------------------------------------------- | ---------------------------------------------------- |
76 | |
|
|
77 |
78 | - 九宫格
79 |
80 | 同理,也需要传入额外的小图标. 即验证码右上角的小图标
81 |
82 | ```bash
83 | python demo_geetest3nine.py
84 | ```
85 |
86 | | 原图 | 效果 |
87 | | ------------------------------------------------------------- | ---------------------------------------------------- |
88 | |
|
|
89 |
90 | ## app 服务
91 |
92 | - 采用 fastapi 框架, 一个简单的验证码识别服务.
93 |
94 | ```bash
95 | python service.py # 启动服务, 地址: 127.0.0.1:9100
96 |
97 | ```
98 |
99 | 测试的输入结果
100 |
101 | ```
102 | {'code': 200, 'msg': 'success', 'data': {'imageID': 'string', 'res': [[206.15, 19.79, 281.41, 89.9], [221.8, 234.02, 294.53, 307.69], [40.36, 76.1, 110.0, 145.15], [130.65, 88.2, 204.8, 160.11]]}}
103 | {'code': 200, 'msg': 'success', 'data': {'imageID': 'string', 'res': [[99, 0, 198, 86], [0, 86, 99, 172], [99, 86, 198, 172]]}}
104 | {'code': 200, 'msg': 'success', 'data': {'imageID': 'string', 'res': [[8.05, 53.09, 60.95, 107.45], [152.49, 75.74, 205.45, 128.27], [219.18, 51.93, 271.23, 106.0]]}}
105 | ```
106 |
107 | ## 增加 js 验证功能
108 |
109 | - 仅测试是否通过验证, 这东西具有时效性,不一定还能用
110 |
111 | ```bash
112 | # npm install # 安装依赖
113 | # python service.py #建议先启动服务, 然后再启动 js 代码
114 |
115 | python jsdemo_g3word_1.py #文字点选的一种方式
116 | python jsdemo_g3word_2.py #文字点选的另一种方式 (需要启动接口服务) --推荐
117 | python jsdemo_g4icon.py
118 | python jsdemo_nine3.py
119 |
120 | ```
121 |
122 | 具体的 api,可以参考: http://localhost:9100/docs
123 |
124 | ## 模型训练流程
125 |
126 | 模型不具有泛化性,需要根据具体的验证码进行训练,可以参考案例中的模型训练方法, 其实训练比较简单, 关键是标注数据, 以及数据的预处理
127 |
128 | - 文字点选大概用了 6300+ 张
129 |
130 | - 图标点选大概用了 800+ 张
131 |
132 | - 九宫格大概用了 800+ 张 (不记得了, 太久了)
133 |
134 | 模型的主要训练过程
135 |
136 | 1. 标注数据, 先用几百张数据, 进行标注
137 | 2. 数据预处理
138 | 3. 模型训练, yolov8 目标检测和分类模型, siamese 网络
139 | 4. 模型测试, 在一个更大的数据集上测试, 然后看模型的效果, 把预测不好的数据人工标注, 重新训练
140 | 5. 重复 1--4 步骤, 直到模型效果满意(好像有 3.2w+ 张不重复的数据, 预测效果不错, 感觉有 99.99%的样子, 具体没有统计过了. )
141 |
142 | 后面的图标和九宫格就只训练了几百张数据, 如果有需要可以继续, 接口已写好,可以生成 lableme 格式的数据, 然后来回倒腾训练即可.
143 |
144 | - 比如, 利用 yolo 模型进行分类和孪生网络进行分类, 查看二者分的一不一致, 以及 yolo 分类会不会错误分类, 如果存在上述的情况,把错误的图片单独复制出来,以及对应的 lableme 格式的标注文件也复制出来, 然后就是手动标注了.
145 |
146 | - 对于 九宫格和图标点选, 原理一样,直接根据模型进行分类裁剪,然后人工查看.
147 |
148 | - 对应 YOLO 模型,采用的是 pt 文件, 当然只要你愿意,可以采用 onnx 文件
149 |
150 | ```python
151 | # 一个简单实用 onnxruntime 的例子来运行 YOLOv8 生成的 onnx 模型
152 | # pip install onnx-predict-yolov8 或者找其他库, 也可以,这里做一个简单的演示
153 |
154 | from src.utils.utils import open_image
155 | import onnxruntime as ort
156 | from opyv8 import Predictor
157 |
158 | i = "assets/word3/pic_00356_20119.png"
159 | model = "model/g3word6300/detect.onnx"
160 | session = ort.InferenceSession(model, providers=[ "CPUExecutionProvider"])
161 | predictor = Predictor(session, ["char", "target"])
162 | img = open_image(i)
163 | print(predictor.predict(img))
164 | ```
165 |
166 | ## 起因
167 |
168 | - 本项目的目的是为了学习, 请勿用于非法用途, 本人不承担任何法律责任.
169 |
170 | - 作为深度学习的入门者, 一直想找一个比较有挑战性的项目, 看见网上都没有彻底开源此类模型, 本着开源的精神, 把自己的一些心得体会分享出来, 也希望大家能够一起交流, 一起进步.
171 |
172 | - 深度学习感觉需要掌握的只是太多, 只能从案例直接入手, 一开始的时候, 也是一头雾水, 从数据的标注, 数据的预处理, 模型的训练, 模型的测试, 一步一步的走下来, 也是一种成长, 也是一种收获.
173 |
174 | - 关键点在于: 最近终于有资金购买显卡了(还是一个 4060), 才入坑深度学习, 之前一直有心无力, 也是一种机缘吧. 苦逼的我!!!
175 |
176 | - 本人代码水平有限,可能存在很多 bug,
177 |
178 | ## 参考
179 |
180 | - [https://github.com/ultralytics/ultralytics](https://github.com/ultralytics/ultralytics)
181 |
182 | - [https://github.com/bubbliiiing/Siamese-pytorch](https://github.com/bubbliiiing/Siamese-pytorch)
183 |
184 | - [https://github.com/MgArcher/Text_select_captcha](https://github.com/MgArcher/Text_select_captcha)
185 |
186 | - [https://github.com/sijiyo/projects](https://github.com/sijiyo/projects)
187 |
188 | ## Star History
189 |
190 | [](https://www.star-history.com/#zscmmm/yzm_captcha&Date)
191 |
192 |
193 |
194 |
195 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ultralytics
2 | onnxruntime-gpu
3 | pillow==10.2.0
4 | pandas
5 | numpy
6 | jsonpath
7 | lxml
8 | PyExecJS2
9 | requests
10 | loguru
11 | killport
12 | email-validator
13 | fastapi==0.110.0
14 | fastapi-restful==0.5.0
15 | pydantic==2.6.3
16 | uvicorn==0.27.1
--------------------------------------------------------------------------------
/service.py:
--------------------------------------------------------------------------------
1 | """
2 | 识别服务,fastapi实现
3 | """
4 | import time
5 | from fastapi import FastAPI
6 | from loguru import logger
7 | logger.remove()
8 | logger.add("log/app.log", rotation="500 MB", retention="10 days")
9 | from app.gt3.word.word import gt3word
10 | from app.gt3.nine.nine import gt3nine
11 | from app.gt3.nine.nine4jsapp import gt4ninejs
12 | from app.gt3.word.wordjsapp import gt3wordjs
13 | from app.gt4.iconmi.iconmi import gt4iconmi
14 |
15 | title = "验证码识别服务"
16 | description = "验证码识别服务"
17 | version = "1.0.0"
18 | contact = {"name": "XXXX", "email": "XXXXXX@gmail.com"}
19 |
20 | app = FastAPI(
21 | title=title,
22 | description=description,
23 | version=version,
24 | contact=contact
25 | )
26 |
27 | @app.get("/", summary="根路径", response_description="欢迎信息")
28 | async def root() -> dict[str, str]:
29 | """
30 | 欢迎访问验证码识别服务, 请查看文档
31 | """
32 | try:
33 | return {"message": "Hello World"}
34 | except Exception as e:
35 | return {"error": str(e)}
36 | # 利用路由的方式, 实现模块化
37 | app.include_router(gt3word, prefix="/gt3", tags=["三代文字点选"])
38 | app.include_router(gt3nine, prefix="/gt3", tags=["三代九宫格"])
39 | app.include_router(gt4iconmi, prefix="/gt4", tags=["四代图标点选"])
40 | app.include_router(gt4ninejs, prefix="/gt4js", tags=["四代九宫格js"])
41 | app.include_router(gt3wordjs, prefix="/gt3js", tags=["三代文字点选js"])
42 |
43 |
44 |
45 | if __name__ == '__main__':
46 | import uvicorn
47 | port = 9100
48 | # from app.handleprocess import kill_process #自己写的
49 | # kill_process(port)
50 | # #别人写的,可以在终端直接运行,也提供了一个函数
51 | import killport
52 | killport.kill_ports(ports=[port], view_only=False)
53 | time.sleep(2) # 等待进程结束,不然太快了,易出错
54 |
55 | log_config = "app/uvicorn_config.json"
56 | uvicorn.run("service:app", host="0.0.0.0", port=port, reload=True,
57 | log_config=log_config, use_colors=True)
58 |
59 | # 或者直接在命令行: uvicorn service:app --port 9100 --reload
60 |
--------------------------------------------------------------------------------
/src/method/GTClick.py:
--------------------------------------------------------------------------------
1 |
2 | from src.utils.SiameseOnnx import SiameseOnnx
3 | from src.utils.YoloOnnx import YoloD, YoloC
4 | from src.utils.utils import open_image, find_max_probability, process_similarity_matrix
5 | from typing import Optional, Union
6 | from PIL import Image
7 | from src.utils.outdata import Coordination
8 | import numpy as np
9 | from pathlib import Path
10 |
11 |
12 | class GModel(object):
13 | def __init__(
14 | self,
15 | pdetect: str,
16 | per: str,
17 | pclass: Optional[str] = None,
18 | pclasstags: list[str] = ["icon"], #会根据这个类别来进行分类, 最多支持两个类别, 如果是两个,则第一个是具有顺序的字符类别,第二个是目标类别
19 | chars_issorted: bool = False, # 当 chars_issorted 为 True 时, 表示手动输入chars 类别,并且具有顺序, 只有 pclasstags 为1时才有效
20 | rmalpha: bool = False, # 只有在 chars_issorted 为 True 时才有效
21 | conf=0.65,
22 | verbose=False,
23 | **kwargs
24 | ):
25 | """
26 | 实现图像点选功能, 通过 yolo 检测模型找到目标,然后利用孪生神经网络对图片进行排序, 找出对应相似度最高的图片,最后利用 yolo 分类模型进行字符识别
27 | 参数:
28 | - pdetect: str, yolo 检测模型路径
29 | - per: str, 孪生神经网络模型路径
30 | - pclass: Optional[str], yolo 分类模型路径
31 | - pclasstags: list[str], 会根据这个类别来进行分类, 最多支持两个类别, 如果是两个,则第一个是具有顺序的字符类别,第二个是目标类别
32 | - chars_issorted: bool, 当 chars_issorted 为 True 时, 表示手动输入chars 类别,并且具有顺序, 只有 pclasstags 为1时才有效
33 | - rmalpha: bool, 只有在 chars_issorted 为 True 时才有效, 表示是否去除图片的透明度
34 | - verbose: bool, 是否打印详细信息
35 | """
36 | self.pdetect = pdetect
37 | self.per = per
38 | self.pclass = pclass
39 | self.pclasstags = pclasstags
40 | self.conf = conf
41 | self.verbose = verbose
42 | self.rmalpha = rmalpha
43 | self.chars_issorted = chars_issorted
44 | self.modeltype = None
45 | if len(self.pclasstags) == 1 and self.chars_issorted:
46 | self._chars_issorted = True
47 | else:
48 | self._chars_issorted = False
49 | assert len(self.pclasstags) in [1, 2], f"pclasstags length is not in [1, 2], but {len(self.pclasstags)}"
50 |
51 | self.modelyolod = YoloD(self.pdetect, task="detect", verbose=self.verbose, **kwargs)
52 | # 检查输入的类别是否在模型中
53 | if not self.per and not self.pclass:
54 | assert False, f"per and pclass is None"
55 | elif not self.per and self.pclass:
56 | self.modeltype = 1
57 | self.modelyoloc = YoloC(self.pclass, task="classify", verbose=self.verbose, **kwargs)
58 | elif self.per and not self.pclass:
59 | self.modeltype = 2
60 | self.modelpre = SiameseOnnx(self.per, providers=['CPUExecutionProvider'])
61 | else:
62 | self.modeltype = 3
63 | self.modelyoloc = YoloC(self.pclass, task="classify", verbose=self.verbose, **kwargs)
64 | self.modelpre = SiameseOnnx(self.per, providers=['CPUExecutionProvider'])
65 |
66 | self._img = None
67 | self._image_path = None
68 | self.extraicon = None
69 |
70 |
71 | ### ### 1. 利用 yolo 检测模型进行检测找到目标并返回具有顺序的坐标
72 | def detect_objects(self, img, **kwargs) -> tuple:
73 | """
74 | 利用 yolo 检测模型进行检测找到目标并返回具有顺序的坐标
75 | 参数:
76 | - img: PIL.Image.Image, 图片对象
77 | - kwargs: dict, 其他参数
78 | 返回:
79 | - tuple: (chars_xyxy, targets_xyxy)
80 | img: PIL.Image.Image, 图片对象
81 | chars_xyxy: list, 字符坐标, [[x1, y1, x2, y2], [x1, y1, x2, y2], ...]
82 | targets_xyxy: list, 目标坐标 [[x1, y1, x2, y2], [x1, y1, x2, y2], ...]
83 | """
84 | assert isinstance(img, Image.Image), f"img type is not Image.Image, but {type(img)}"
85 | imgsz1 = kwargs.get("imgsz", None)
86 | imgsz = imgsz1 if imgsz1 else self.modelyolod.imgsz
87 |
88 | results = self.modelyolod.predict(img, imgsz = imgsz, device = self.modelyolod._device, **kwargs)
89 | xyxy, xywh, box_name, info = self.modelyolod.extract_info(results)
90 |
91 | ### 2. 根据目标按照坐标进行分类 (这里为验证模型, 直接采用模型预测的类别进行分类过滤)
92 | assert self.pclasstags[-1] in box_name, f"pclasstags[-1]: {self.pclasstags[-1]} not in box_name: {box_name}"
93 | targets_xyxy = [i.get("xyxy") for i in info if i.get("classes") == self.pclasstags[-1]]
94 | chars_xyxy= None
95 | if not self._chars_issorted:
96 | assert len(self.pclasstags) == 2, f"pclasstags length is not 2, but {len(self.pclasstags)}"
97 | assert self.pclasstags[0] in box_name, f"pclasstags[0]: {self.pclasstags[0]} not in box_name: {box_name}"
98 | chars_xyxy = [i.get("xyxy") for i in info if i.get("classes") == self.pclasstags[0]]
99 | chars_xyxy.sort(key=lambda x: x[0])
100 | if len(chars_xyxy) != len(targets_xyxy):
101 | min_len = min(len(chars_xyxy), len(targets_xyxy))
102 | chars_xyxy = chars_xyxy[:min_len]
103 | targets_xyxy = targets_xyxy[:min_len]
104 | return chars_xyxy, targets_xyxy
105 |
106 | def per_sortimages(self,
107 | charsImage: list[Image.Image],
108 | targetsImage: list[Image.Image],
109 | **kwargs
110 | ) -> list[tuple[int, int]]:
111 | """
112 | 利用孪生神经网络对图片进行排序, 找出对应相似度最高的图片
113 | :param img: PIL.Image.Image, 图片对象
114 | """
115 | imgsz1 = kwargs.get("imgsz", None)
116 | if isinstance(imgsz1, (int, float)):
117 | imgsz = [imgsz1, imgsz1]
118 | elif isinstance(imgsz1, (list, tuple)) and len(imgsz1) == 1:
119 | imgsz = [imgsz1[0], imgsz1[0]]
120 | else:
121 | imgsz = imgsz1 if imgsz1 else self.modelpre.imgsz
122 |
123 | indices = self.modelpre.predict_list(charsImage, targetsImage, *imgsz)
124 | # 返回的是图片对象
125 | return indices
126 |
127 |
128 | def yolo_classify(self, charsImage, targetsImage, **kwargs):
129 | imax_name_list = []
130 | imax_prob_list = []
131 | prob_matrix = np.zeros((len(charsImage), len(targetsImage)))
132 | for i in range(len(charsImage)):
133 | row_name = []
134 | row_prob = []
135 | for j in range(len(charsImage)):
136 | result_char = self.modelyoloc.predict(
137 | charsImage[i],
138 | conf=self.conf,
139 | imgsz=self.modelyoloc.imgsz,
140 | verbose=self.verbose,
141 | device = self.modelyoloc._device,
142 | **kwargs
143 | )
144 | result_target = self.modelyoloc.predict(
145 | targetsImage[j],
146 | conf=self.conf,
147 | imgsz=self.modelyoloc.imgsz,
148 | device = self.modelyoloc._device,
149 | verbose=self.verbose,
150 | **kwargs
151 | )
152 | _, _, ic_top5name, ic_top5conf = self.modelyoloc.extract_info(result_char)
153 | _, _, it_top5name, it_top5conf = self.modelyoloc.extract_info(result_target)
154 | imax_name, imax_prob = find_max_probability(ic_top5name, ic_top5conf, it_top5name, it_top5conf)
155 | row_name.append(imax_name)
156 | row_prob.append(imax_prob)
157 | imax_name_list.append(row_name)
158 | imax_prob_list.append(row_prob)
159 | prob_matrix[i] = row_prob
160 |
161 | final_indices = process_similarity_matrix(prob_matrix)
162 | char_name = [imax_name_list[i][j] for i, j in final_indices]
163 | target_name = [imax_name_list[i][j] for i, j in final_indices]
164 | return final_indices, char_name, target_name
165 |
166 |
167 |
168 |
169 |
170 | class GTClick(GModel):
171 | def __init__(self, *args, **kwargs):
172 | super().__init__(*args, **kwargs)
173 |
174 | def openimage(self, image_path: Union[str, Path, Image.Image]):
175 | if isinstance(image_path, str) or isinstance(image_path, Path):
176 | assert Path(image_path).exists(), f"image_path: {image_path} is not exists"
177 | self._image_path = image_path
178 | elif isinstance(image_path, Image.Image):
179 | self._image_path = None
180 | else:
181 | assert False, f"image_path type is not str or Image.Image, but {type(image_path)}"
182 | self._img = open_image(image_path)
183 |
184 | def reset_outdata(self):
185 | self.coordination = Coordination()
186 |
187 | def run(self, image_path: Union[str, Path, Image.Image], extraicon:list[str, Image.Image] = None, **kwargs) -> Coordination:
188 | """
189 | 根据图片路径进行返回结果
190 | :param image_path: str, 图片路径
191 | :param extraicon: list[str], 额外的图片路径, 用于排序
192 | """
193 | self.openimage(image_path)
194 | self.reset_outdata()
195 |
196 | self.coordination.set_value("extraicon", extraicon)
197 | ## 1. 利用 yolo 检测模型进行检测找到目标,并返回具有顺序的坐标
198 | chars_xyxy, targets_xyxy = self.detect_objects(self._img, conf=self.conf, verbose=self.verbose, **kwargs)
199 |
200 | self.coordination.set_value("chars_xyxy", chars_xyxy) # 返回的 chars_xyxy 是按照顺序排列的
201 | self.coordination.set_value("targets_xyxy", targets_xyxy)
202 | self.coordination.set_value("targetsImage", [self._img.crop(xyxy) for xyxy in targets_xyxy])
203 | if self._chars_issorted:
204 | charsImage_temp = [open_image(i, rmalpha=self.rmalpha) for i in self.coordination.get_value("extraicon")]
205 | self.coordination.set_value("charsImage", charsImage_temp)
206 | else:
207 | self.coordination.set_value("charsImage", [self._img.crop(xyxy) for xyxy in chars_xyxy])
208 |
209 | charsImage, targetsImage = self.coordination.get_value("charsImage"), self.coordination.get_value("targetsImage")
210 | if self.modeltype in [2]:
211 | indices = self.per_sortimages(charsImage, targetsImage)
212 | char_name= None
213 | target_name = None
214 | elif self.modeltype in [1]:
215 | indices, char_name, target_name = self.yolo_classify(charsImage, targetsImage)
216 | else:
217 | indices0 = self.per_sortimages(charsImage, targetsImage)
218 | indices, char_name, target_name = self.yolo_classify(charsImage, targetsImage)
219 | # 如果不一样,应该以谁为准呢? --- 可以用来判断这张图片是否需要人工干预
220 | if indices0 != indices:
221 | self.coordination.set_value("exist_error", True)
222 | print(f"image_path: {image_path} indices0: {indices0} indices: {indices}")
223 | self.coordination.set_value("chars_name", char_name)
224 | self.coordination.set_value("targets_name", target_name)
225 | self.coordination.set_value("indices", indices)
226 | self.coordination.rank()
227 | return self.coordination
228 |
229 |
230 |
231 |
--------------------------------------------------------------------------------
/src/method/GTnine.py:
--------------------------------------------------------------------------------
1 | from src.utils.YoloOnnx import YoloC
2 | from src.utils.utils import open_image, find_max_probability
3 | from typing import Optional, Union
4 | from PIL import Image
5 | from src.utils.outdata import Coordination
6 | from src.utils.SiameseOnnx import SiameseOnnx
7 | from src.utils.nine import crop_nine, flatten
8 | import os
9 | import pandas as pd
10 |
11 |
12 |
13 |
14 | class GTnine():
15 | def __init__(
16 | self,
17 | pclass: Optional[str] = None,
18 | per: Optional[str] = None,
19 | conf=0.65,
20 | rmalpha: bool = True,
21 | verbose=False,
22 | ) -> None:
23 | '''
24 | 暂时实现 yolo 分类模型, 感觉孪生神经网络模型不太适合
25 | '''
26 | assert pclass or per, "pclass and per is None"
27 | self.pclass = pclass
28 | self.per = per
29 |
30 |
31 | self.modeltype = None
32 | self.conf = conf
33 | self.verbose = verbose
34 | self.rmalpha = rmalpha
35 |
36 | if self.pclass and not self.per:
37 | self.modeltype = 1
38 | self.modelyoloc = YoloC(self.pclass, task="classify", verbose=self.verbose)
39 | elif self.per and not self.pclass:
40 | self.modeltype = 2
41 | self.modelpre = SiameseOnnx(self.per, providers=['CPUExecutionProvider'])
42 | else:
43 | self.modeltype = 3
44 | self.modelyoloc = YoloC(self.pclass, task="classify", verbose=self.verbose)
45 | self.modelpre = SiameseOnnx(self.per, providers=['CPUExecutionProvider'])
46 |
47 |
48 |
49 | def _preprocess(self, charimg: Union[list, str], background:str)-> tuple[list[Image.Image], list[Image.Image]]:
50 | if isinstance(charimg, str):
51 | # 如果是 str 则表面是路径,一定要存在
52 | assert os.path.exists(charimg), f"{charimg} not exists"
53 | charimg = [charimg]
54 |
55 | charimg = [open_image(i, rmalpha=self.rmalpha) for i in charimg]
56 |
57 | if isinstance(background, str):
58 | assert os.path.exists(background), f"{background} not exists"
59 | self._image_path = background
60 | elif isinstance(background, Image.Image):
61 | self._image_path = None
62 | else:
63 | assert False, "background type is not supported"
64 |
65 | self._img = open_image(background)
66 | self._bgsize = self._img.size
67 | crop_nine_list = crop_nine(background)
68 |
69 | return charimg, crop_nine_list
70 | def _get_similarity_byper(self, charimg: list, crop_nine_list: list, num: int = None):
71 | mat = []
72 | for index, i in enumerate(charimg):
73 | for index_j, j in enumerate(crop_nine_list):
74 | prob = self.modelpre.predict(i, j)
75 | mat.append({
76 | "index": index,
77 | "name": None,
78 | "conf": prob,
79 | "msilce": index_j
80 | })
81 | # 从 mat 中找出 conf > self.conf 的元素
82 | df = pd.DataFrame(mat)
83 | # 按照 conf 降序排列
84 | df = df.sort_values(by="conf", ascending=False)
85 | if num:
86 | df = df.head(num)
87 | else:
88 | df = df[df["conf"] > self.conf]
89 |
90 | # 按照 index 分组,
91 | dfg = df.groupby("index")
92 | silece_list = []
93 | for name, group in dfg:
94 | silece_list.append(group["msilce"].tolist())
95 | return silece_list, None
96 |
97 | def _get_similarity_byyolo(self, charimg: list, crop_nine_list: list, num: int = None):
98 | mat = []
99 | for index, i in enumerate(charimg):
100 | results_char = self.modelyoloc.predict(i,
101 | conf=self.conf,
102 | imgsz=self.modelyoloc.imgsz,
103 | verbose= self.verbose,
104 | device = self.modelyoloc._device
105 | )
106 |
107 | ic_top1name, ic_top1conf, ic_top5name, ic_top5conf = self.modelyoloc.extract_info(results_char)
108 | for index_j, j in enumerate(crop_nine_list):
109 | results = self.modelyoloc.predict(j,
110 | conf=self.conf,
111 | imgsz=self.modelyoloc.imgsz,
112 | verbose= self.verbose,
113 | device = self.modelyoloc._device
114 | )
115 | it_top1name, it_top1conf, it_top5name, it_top5conf = self.modelyoloc.extract_info(results)
116 |
117 | imax_name, imax_prob = find_max_probability([ic_top1name], [ic_top1conf] , it_top5name, it_top5conf)
118 | # 返回的概率至少都是大于 0.5 的.
119 | mat.append({
120 | "index": index,
121 | "top1name": ic_top1name,
122 | "top1conf": ic_top1conf,
123 | "name": imax_name,
124 | "conf": imax_prob,
125 | "msilce": index_j
126 | })
127 |
128 |
129 | # 从 mat 中找出 conf > self.conf 的元素
130 | df = pd.DataFrame(mat)
131 | # 按照 conf 降序排列
132 | df = df.sort_values(by="conf", ascending=False)
133 | if num:
134 | df = df.head(num)
135 | else:
136 | df = df[df["conf"] > self.conf]
137 |
138 | # 按照 index 分组,
139 | dfg = df.groupby("index")
140 | silece_list = []
141 | name_list = []
142 | for name, group in dfg:
143 | silece_list.append(group["msilce"].tolist())
144 | name_list.append(group["name"].tolist())
145 | return silece_list, name_list
146 |
147 |
148 | def reset_outdata(self):
149 | self.coordination = Coordination()
150 |
151 |
152 | def run(self, background: Union[ str , list, Image.Image] , charimg: Union[str, list]) -> Coordination:
153 | """
154 | background: 背景图,即大图, 如果是 list,只支持一个元素
155 | charimg: 具有顺序的小图标
156 | """
157 | assert background, "background is None"
158 | if isinstance(background, list) and len(background) > 1:
159 | print("Warning: background is list, only support one element")
160 | background = background[0]
161 | elif isinstance(background, list) and len(background) == 1:
162 | background = background[0]
163 |
164 |
165 | self.reset_outdata()
166 | charimg, crop_nine_list = self._preprocess(charimg, background)
167 |
168 | self.coordination.set_value("charsImage", flatten(charimg))
169 |
170 | if self.modeltype == 1:
171 | indices, names = self._get_similarity_byyolo(charimg, crop_nine_list)
172 | elif self.modeltype == 2:
173 | indices, names = self._get_similarity_byper(charimg, crop_nine_list)
174 | else:
175 | indices1, names1 = self._get_similarity_byyolo(charimg, crop_nine_list)
176 | indices2, names2 = self._get_similarity_byper(charimg, crop_nine_list)
177 | # 可以用来查找两个模型的差异
178 | indices = indices1
179 |
180 |
181 | rowcol = self.get_rowcol(indices)
182 | xyxy = self.get_xyxy(indices, self._bgsize)
183 | self.coordination.set_value("nine_rowcol", rowcol)
184 | # 展平
185 | xyxy = flatten(xyxy)
186 | names = flatten(names)
187 | self.coordination.set_value("targets_xyxy", xyxy)
188 | self.coordination.set_value("targets_name", names)
189 |
190 | return self.coordination
191 | def get_rowcol(self, indices: list):
192 | res = []
193 | maplist = {
194 | "0": [1,1],
195 | "1": [1,2],
196 | "2": [1,3],
197 | "3": [2,1],
198 | "4": [2,2],
199 | "5": [2,3],
200 | "6": [3,1],
201 | "7": [3,2],
202 | "8": [3,3],
203 | }
204 | res = [maplist[str(i)] for i in indices[0]]
205 | return res
206 | def get_xyxy(self, indices: list, size: tuple):
207 | res = []
208 | width, height = size
209 | h = height // 3 - 1
210 | w = width // 3 - 1
211 | for ind in range(len(indices)):
212 | row = []
213 | for j in indices[ind]:
214 | x = (j % 3) * w
215 | y = (j // 3) * h
216 | row.append([x, y, x+w, y+h])
217 | res.append(row)
218 | return res
219 |
220 |
221 |
222 | if __name__ == "__main__":
223 | from conf.config import gtconf
224 | gt = GTnine(pclass=gtconf['nine']['pclass'])
225 | charimg = "assets/nine3/ques_00000_37458.png"
226 | background = "assets/nine3/img_00000_37458.png"
227 |
228 | out = gt.run(background, charimg)
229 | from src.utils.outdata import Outfile
230 | Outfile.draw_image(background,
231 | chars_xyxy= out.get_value("charsImage"),
232 | targets_xyxy = out.get_value("targets_xyxy"),
233 | out_path="temp3/temp1.png"
234 | )
235 |
236 |
237 |
238 |
239 |
--------------------------------------------------------------------------------
/src/utils/MakeCharImage.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple, Optional, Union
2 | from PIL import Image, ImageDraw, ImageFont
3 | import os
4 |
5 | class MakeCharImage:
6 |
7 | def __init__(
8 | self,
9 | text: str,
10 | image_size: Tuple[int, int] = (120, 120),
11 | offset: Union[int, float] = 0,
12 | font_path: str = None,
13 | output_path: Union[str, None] = None
14 | ) -> None:
15 | assert len(text) == 1, "text must be a single character"
16 | self.text = text
17 | self.image_size = image_size
18 | self.offset = offset
19 | if not font_path:
20 | font_path = os.path.join(os.path.dirname(__file__), "simsun.ttc")
21 | self.font_path = font_path
22 | self.output_path = output_path
23 | self.generated_image = self.generate_charimage()
24 | if self.output_path:
25 | self.generated_image.save(self.output_path)
26 |
27 | @classmethod
28 | def load_font(cls, font_path: str, font_size: int) -> Optional[ImageFont.FreeTypeFont]:
29 | try:
30 | return ImageFont.truetype(font_path, font_size)
31 | except Exception as e:
32 | # print(f"Error loading font: {e}")
33 | return None
34 |
35 | def generate_charimage(self) -> Image.Image:
36 | """
37 | generate_image generates an image with the given text and size.
38 | """
39 | assert len(self.text) == 1, "text must be a single character"
40 | assert len(self.image_size) == 2, "image_size must be a tuple of 2 integers (width, height)"
41 |
42 | font = self.load_font(self.font_path, min(self.image_size))
43 | image_width, image_height = self.image_size
44 | background_color = (255, 255, 255) # 白色背景
45 | text_color = (0, 0, 0) # 黑色文本
46 | image = Image.new("RGB", (image_width, image_height), background_color)
47 | draw = ImageDraw.Draw(image)
48 |
49 | # 绘制原始文本
50 | _, _, width, height = draw.textbbox((0, 0), text=self.text, font=font)
51 | x = (image_width - width) // 2
52 | y = (image_height - height) // 2
53 | draw.text((x, y), self.text, font=font, fill=text_color)
54 |
55 | # 绘制加粗文本(偏移一像素)
56 | if self.offset > 0:
57 | draw.text((x - self.offset, y - self.offset), self.text, font=font, fill=text_color)
58 | draw.text((x + self.offset, y - self.offset), self.text, font=font, fill=text_color)
59 | draw.text((x - self.offset, y + self.offset), self.text, font=font, fill=text_color)
60 | draw.text((x + self.offset, y + self.offset), self.text, font=font, fill=text_color)
61 |
62 | return image
63 |
64 |
65 |
66 | if __name__ == "__main__":
67 | text = "利"
68 | image_size = (120, 120)
69 | aa = MakeCharImage(text, image_size, 0.9, output_path="output.png")
70 |
--------------------------------------------------------------------------------
/src/utils/SiameseOnnx.py:
--------------------------------------------------------------------------------
1 | import onnxruntime as ort
2 | from src.utils.siamese import detect_image, sigmoid
3 | from src.utils.utils import open_image, process_similarity_matrix
4 |
5 | class SiameseOnnx():
6 | def __init__(self, model_path: str, providers: list[str] = ['CPUExecutionProvider']) -> None:
7 | options = ort.SessionOptions()
8 | options.enable_profiling=False
9 | options.add_session_config_entry('session.load_model_format', 'ONNX')
10 | siamese_model = ort.InferenceSession(
11 | model_path,
12 | sess_options = options,
13 | providers=providers
14 | )
15 | self.model = siamese_model
16 | input_info = siamese_model.get_inputs()
17 | self.imgsz = input_info[0].shape[2:4]
18 |
19 |
20 |
21 |
22 | def getmodel_inputname(self):
23 | """
24 | 获取模型的信息
25 | """
26 | input_info = self.model.get_inputs()
27 | input_name = [input.name for input in input_info]
28 | # input_shape = [input.shape for input in input_info]
29 | return input_name
30 |
31 |
32 |
33 |
34 | def predict_list(self,
35 | img1: list,
36 | img2: list,
37 | image_width: int =None,
38 | image_height: int =None
39 | ) -> list[tuple[int, int]]:
40 | """
41 | 输入图片, 对 img1 中的每张图片和 img2 中的每张图片进行比较,找出对应最高的相似度图片
42 | img1: 图片1的路径,是一个 list, 根据 img1 的顺序返回
43 | img2: 图片2的路径,是一个 list
44 | image_width: 图片的宽
45 | image_height: 图片的高
46 | indices: 是否返回索引
47 | return: 返回的是 img1 和 img2 的索引,比如: [(0, 1), (1, 0)], 长度为 img1 的长度
48 | """
49 | # 检查 list 中的数据类型
50 | assert isinstance(img1, list), "img1 should be a list"
51 | assert isinstance(img2, list), "img2 should be a list"
52 | assert len(img1) <= len(img2), "img1 should be less than or equal to img2"
53 | # 补全 image_width 和 image_height
54 | if image_width is None:
55 | image_width = self.imgsz[1]
56 | if image_height is None:
57 | image_height = self.imgsz[0]
58 |
59 | img1 = [open_image(i) for i in img1]
60 | img2 = [open_image(i) for i in img2]
61 |
62 | if len(img1) == 0 and len(img2) == 0:
63 | assert False, "img1 or img2 should not be empty"
64 | elif len(img1) == 1 and len(img2) == 1:
65 | return [(0, 0)]
66 |
67 | similarity_matrix = []
68 | for i in img1:
69 | sim_row = []
70 | for j in img2:
71 | sim_row.append(self.predict(i, j, image_width, image_height))
72 | similarity_matrix.append(sim_row)
73 | final_indices = process_similarity_matrix(similarity_matrix)
74 | return final_indices
75 |
76 |
77 |
78 |
79 |
80 |
81 | def predict(self, img1: str, img2: str, image_width: int = 60, image_height: int = 60)-> float:
82 | """
83 | 输入图片的路径,做预处理, 然后预测两个图片的相似度
84 | img1: 图片1的路径 或者 图片1的二进制数据,或者 Image.Image
85 | img2: 图片2的路径 或者 图片2的二进制数据 或者 Image.Image
86 | image_width: 图片的宽
87 | image_height: 图片的高
88 | return: 相似度, 保留两位小数
89 | """
90 | image_1 = open_image(img1)
91 | image_2 = open_image(img2)
92 |
93 | photo_1, photo_2 = detect_image(image_1, image_2, image_width, image_height)
94 | inputs_name = self.getmodel_inputname()
95 | inputs = {
96 | inputs_name[0]: photo_1,
97 | inputs_name[1]: photo_2
98 | }
99 | outs = self.model.run(None, inputs)
100 |
101 | prob = sigmoid(outs[0][0][0])
102 | return round(prob, 2)
103 |
104 |
105 |
106 |
--------------------------------------------------------------------------------
/src/utils/YoloOnnx.py:
--------------------------------------------------------------------------------
1 | from ultralytics import YOLO
2 | from src.utils.utils import get_onnx_shape
3 | from ultralytics.utils.checks import cuda_is_available
4 |
5 |
6 | class YoloD(YOLO):
7 | def __init__(self, *args, **kwargs):
8 | super().__init__(*args, **kwargs)
9 |
10 | tempmodel = self.model
11 | if isinstance(tempmodel, str) and tempmodel.endswith(".onnx"):
12 | self.imgsz = get_onnx_shape(tempmodel)
13 | else:
14 | self.imgsz = self.model.args["imgsz"]
15 | if isinstance(self.imgsz, (int, float)):
16 | self.imgsz = [self.imgsz, self.imgsz]
17 | elif isinstance(self.imgsz, (list, tuple)) and len(self.imgsz) == 1:
18 | self.imgsz = [self.imgsz[0], self.imgsz[0]]
19 | if cuda_is_available():
20 | self._cuda = True
21 | self._device = 'cuda:0'
22 | else:
23 | self._cuda = None
24 | self._device = None
25 |
26 |
27 | def extract_info(self, results)-> tuple[list[list], list[list], list[str], list[dict]]:
28 | """
29 | 只对单张图片进行检测处理
30 | 返回的是 xyxy, xywh, box_name, info
31 | xyxy: [[x1, y1, x2, y2], [x1, y1, x2, y2], ...]
32 | xywh: [[x, y, w, h], [x, y, w, h], ...]
33 | box_name: ["A", "B", ...]
34 | info: [
35 | {
36 | "classes": "A",
37 | "prob": 0.9,
38 | "xyxy": [x1, y1, x2, y2],
39 | "xywh": [x, y, w, h]
40 | },
41 | ...
42 | ]
43 |
44 | """
45 | assert self.task == "detect", "detect only support detect task"
46 | assert len(results) == 1, "detect only support single image"
47 | xyxy_all = []
48 | xywh_all = []
49 | name_all = results[0].names
50 | #取出name_all 的 value
51 | box_name = []
52 | info = []
53 | for result in results:
54 | box_cls = result.boxes.cls.tolist()
55 | xyxy = result.boxes.xyxy.tolist()
56 | xywh = result.boxes.xywh.tolist()
57 | probs = result.probs
58 | for i in range(len(box_cls)):
59 | box_name.append(name_all[box_cls[i]])
60 | info.append({
61 | "classes": name_all[box_cls[i]],
62 | "prob": 1 if not probs else round(probs[i].item(), 2),
63 | "xyxy": xyxy[i],
64 | "xywh": xywh[i],
65 | })
66 | xyxy_all.append(xyxy)
67 | xywh_all.append(xywh)
68 | break # 只对单张图片进行检测处理
69 |
70 | return xyxy_all[0], xywh_all[0], box_name, info
71 |
72 |
73 |
74 | class YoloC(YOLO):
75 | def __init__(self, *args, **kwargs):
76 | super().__init__(*args, **kwargs)
77 | tempmodel = self.model
78 | if isinstance(tempmodel, str) and tempmodel.endswith(".onnx"):
79 | self.imgsz = get_onnx_shape(tempmodel)
80 | else:
81 | self.imgsz = self.model.args["imgsz"]
82 | if isinstance(self.imgsz, (int, float)):
83 | self.imgsz = [self.imgsz, self.imgsz]
84 | elif isinstance(self.imgsz, (list, tuple)) and len(self.imgsz) == 1:
85 | self.imgsz = [self.imgsz[0], self.imgsz[0]]
86 | if cuda_is_available():
87 | self._cuda = True
88 | self._device = 'cuda:0'
89 | else:
90 | self._cuda = None
91 | self._device = None
92 |
93 | def extract_info(self, result) -> tuple[str, float, list[str], list[float]]:
94 | """
95 | 对结果进行分类
96 | :param result: 识别结果
97 | :return: top1name, top1conf, top5name, top5conf
98 | top1name: 最大概率对应的类别
99 | top1conf: 最大概率
100 | top5name: 前五的类别
101 | top5conf: 前五的概率
102 | """
103 | assert self.task == "classify", "classify only support classify task"
104 | assert len(result) == 1, "classify only support single image"
105 | all_names = result[0].names ## 类别字典
106 | top1 = result[0].probs.top1 #最大概率对应的索引
107 | top1name = all_names[top1] #最大概率对应的类别
108 | top1conf = result[0].probs.top1conf.tolist() #最大概率
109 | top5 = result[0].probs.top5 #前五的索引
110 | top5conf = result[0].probs.top5conf.tolist() #前五的概率
111 | top5name = [all_names[i] for i in top5]
112 | # info = {
113 | # "top1": top1,
114 | # "top1name": top1name,
115 | # "top1conf": top1conf,
116 | # "top5": top5,
117 | # "top5name": top5name,
118 | # "top5conf": top5conf
119 | # }
120 | return top1name, top1conf, top5name, top5conf
121 |
122 |
123 |
--------------------------------------------------------------------------------
/src/utils/nine.py:
--------------------------------------------------------------------------------
1 | from src.utils.utils import open_image
2 | from PIL import Image
3 |
4 | def flatten(lst, num=1):
5 | """
6 | 将嵌套的列表展开
7 | :param lst: 嵌套的列表
8 | :param num: 展开的层数
9 | :return: 展开后的列表
10 | """
11 | flattened_list = []
12 | for item in lst:
13 | if isinstance(item, list) and num > 0:
14 | flattened_list.extend(flatten(item, num - 1))
15 | else:
16 | flattened_list.append(item)
17 | return flattened_list
18 |
19 |
20 | def crop_nine(input_file: str) -> list[Image.Image]:
21 | """
22 | 将图片裁剪成九宫格
23 | :param input_file: 输入图片路径
24 | :return: 九宫格图片列表, 按照从左到右,从上到下的顺序排列
25 | """
26 |
27 | img = open_image(input_file)
28 | width, height = img.size
29 | h = height // 3
30 | w = width // 3
31 | crop_img_list = []
32 | for i in range(3):
33 | for j in range(3):
34 | x = j * w
35 | y = i * h
36 | crop_img = img.crop((x, y, x+w, y+h))
37 | crop_img_list.append(crop_img)
38 | return crop_img_list
--------------------------------------------------------------------------------
/src/utils/outdata.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple, Optional, List, Union
2 | from dataclasses import dataclass
3 | from src.utils.yoloclass import Shape, Labelme
4 | from pathlib import Path
5 | import os
6 | from PIL import Image, ImageDraw, ImageFont
7 | @dataclass
8 | class Coordination:
9 | charsImage: List[Image.Image] = None
10 | targetsImage: List[Image.Image] = None
11 | chars_xyxy: List[List[float]] = None
12 | targets_xyxy: List[List[float]] = None
13 | chars_name: List[str] = None
14 | targets_name: List[str] = None
15 | extraicon: Optional[Union[str,Image.Image]] = None
16 | indices: Optional[Tuple[int, int]] = None
17 | exist_error: Optional[bool] = None
18 | nine_rowcol: Optional[Tuple[int, int]] = None
19 | chars_xywh: Optional[List[List[float]]] = None
20 | targets_xywh: Optional[List[List[float]]] = None
21 | def set_value(self, key, value):
22 | assert hasattr(self, key), f"key {key} not in Coordination"
23 | setattr(self, key, value)
24 |
25 | def get_value(self, key):
26 | assert hasattr(self, key), f"key {key} not in Coordination"
27 | return getattr(self, key)
28 |
29 | def rank(self):
30 | assert self.indices is not None, "indices is None"
31 |
32 | self.chars_rank, self.targets_rank = zip(*self.indices)
33 |
34 | self.charsImage = [self.charsImage[i] for i in self.chars_rank] if self.charsImage else None
35 | self.targetsImage = [self.targetsImage[i] for i in self.targets_rank] if self.targetsImage else None
36 | self.chars_xyxy = [self.chars_xyxy[i] for i in self.chars_rank] if self.chars_xyxy else None
37 | self.targets_xyxy = [self.targets_xyxy[i] for i in self.targets_rank] if self.targets_xyxy else None
38 | # 保留 2 位小数
39 | self.chars_xyxy = [[round(j, 2) for j in i] for i in self.chars_xyxy] if self.chars_xyxy else None
40 | self.targets_xyxy = [[round(j, 2) for j in i] for i in self.targets_xyxy] if self.targets_xyxy else None
41 | self.chars_xywh = self.xyxy2xywh(self.chars_xyxy) if self.chars_xyxy else None
42 | self.targets_xywh = self.xyxy2xywh(self.targets_xyxy) if self.targets_xyxy else None
43 | def to_dict(self):
44 | exclude = ["charsImage", "targetsImage", "indices", "chars_rank", "targets_rank"]
45 | return {k: v for k, v in self.__dict__.items() if v is not None and k not in exclude}
46 |
47 | def xyxy2xywh(self, xyxy):
48 | if not xyxy:
49 | return None
50 | assert isinstance(xyxy, list), "xyxy should be a list"
51 | assert all([len(i) == 4 for i in xyxy]), "xyxy should have 4 elements"
52 | return [[ (i[0] + i[2]) / 2, (i[1] + i[3]) / 2, i[2] - i[0], i[3] - i[1]] for i in xyxy]
53 |
54 |
55 |
56 | @dataclass
57 | class Outfile:
58 |
59 | @staticmethod
60 | def concatenate_images(images: List[Image.Image]) -> Image.Image:
61 | widths, heights = zip(*(i.size for i in images))
62 |
63 | total_width = sum(widths)
64 | max_height = max(heights)
65 |
66 | concatenated_image = Image.new('RGB', (total_width, max_height))
67 |
68 | x_offset = 0
69 | for img in images:
70 | concatenated_image.paste(img, (x_offset, 0))
71 | x_offset += img.width
72 |
73 | return concatenated_image
74 |
75 | @staticmethod
76 | def check_format(data):
77 | if not isinstance(data, list):
78 | return False # 如果不是列表,则格式不符合要求
79 |
80 | for sublist in data:
81 | if not isinstance(sublist, list):
82 | return False # 如果子列表不是列表,或者长度不等于4,则格式不符合要求
83 | for item in sublist:
84 | if not isinstance(item, (int, float)):
85 | return False # 如果子列表中的元素不是浮点数,则格式不符合要求
86 | return True
87 |
88 | @staticmethod
89 | def load_font(font_path: str, font_size: int) -> Optional[ImageFont.FreeTypeFont]:
90 | try:
91 | return ImageFont.truetype(font_path, font_size)
92 | except Exception as e:
93 | # print(f"Error loading font: {e}")
94 | return None
95 |
96 | @staticmethod
97 | def draw_image(image_path:str, chars_xyxy:list =None, targets_xyxy:list = None, out_path=None):
98 | assert os.path.exists(image_path), f"{image_path} not exists"
99 | assert chars_xyxy is not None or targets_xyxy is not None, "chars_xyxy or targets_xyxy must be not None"
100 |
101 | img = Image.open(image_path)
102 | out_path = image_path.replace(".png", "_out.png") if out_path is None else out_path
103 |
104 | # ##把坐标在图中画出来
105 | draw = ImageDraw.Draw(img)
106 | try:
107 | font_path = os.path.join(os.path.dirname(__file__), "simsun.ttc")
108 | except:
109 | font_path = None
110 |
111 | if Outfile.check_format(chars_xyxy):
112 | font = Outfile.load_font(font_path, (chars_xyxy[0][2] - chars_xyxy[0][0]) // 2)
113 | for index, xyxy in enumerate(chars_xyxy):
114 |
115 | draw.rectangle(xyxy, outline="red", width=3)
116 | # draw.text((xyxy[0], xyxy[1]), str(index), fill="blue", font=font)
117 | x = xyxy[0]
118 | y = xyxy[1]
119 | text = str(index)
120 | offset = 0.1
121 | text_color = "blue"
122 | draw.text((x, y), text, fill= text_color, font=font)
123 | draw.text((x - offset, y - offset), text, font=font, fill=text_color)
124 | draw.text((x + offset, y - offset), text, font=font, fill=text_color)
125 | draw.text((x - offset, y + offset), text, font=font, fill=text_color)
126 | draw.text((x + offset, y + offset), text, font=font, fill=text_color)
127 |
128 | if chars_xyxy and all([isinstance(i, Image.Image) for i in chars_xyxy]):
129 | # 把这些图按顺序拼接起来, 放到图的左下角
130 | concat_image = Outfile.concatenate_images(chars_xyxy)
131 | # 按比例进行缩放
132 | concat_image = concat_image.resize((concat_image.width//4, concat_image.height // 4))
133 | # 将拼接后的图像放到原始图像的左下角
134 | img.paste(concat_image, (0, img.height - concat_image.height))
135 |
136 |
137 | if targets_xyxy is not None:
138 | font = Outfile.load_font(font_path, (targets_xyxy[0][2] - targets_xyxy[0][0]) // 2)
139 | for index, xyxy in enumerate(targets_xyxy):
140 | draw.rectangle(xyxy, outline="blue", width=3)
141 | x = xyxy[0]
142 | y = xyxy[1]
143 | text = str(index)
144 | offset = 0.1
145 | text_color = "red"
146 | draw.text((x, y), text, fill= text_color, font=font)
147 | draw.text((x - offset, y - offset), text, font=font, fill=text_color)
148 | draw.text((x + offset, y - offset), text, font=font, fill=text_color)
149 | draw.text((x - offset, y + offset), text, font=font, fill=text_color)
150 | draw.text((x + offset, y + offset), text, font=font, fill=text_color)
151 | img.save(out_path)
152 |
153 |
154 | @staticmethod
155 | def word3to_labelme(image_path: str,
156 | chars_xyxy: list,
157 | targets_xyxy: list,
158 | chars_name: list,
159 | targets_name: list,
160 | size: Tuple[int, int] = (384, 344),
161 | output_dir:str = None,
162 | showWarning: bool = True
163 | ) -> Labelme:
164 | if chars_xyxy is None and chars_name is None and showWarning:
165 | print("Warning: chars_xyxy and chars_name are None")
166 | assert len(targets_xyxy) == len(targets_name), "targets_xyxy and targets_name should have the same length"
167 | assert isinstance(targets_xyxy, list), "targets_xyxy should be a list"
168 | assert isinstance(targets_name, list), "targets_name should be a list"
169 |
170 | os.makedirs(output_dir, exist_ok=True)
171 |
172 | labelme = Labelme()
173 | labelme.set_size(*size)
174 | labelme.set_imagePath(os.path.join("../", Path(image_path).parent.stem, Path(image_path).name))
175 | for i in range(len(targets_xyxy)):
176 | if chars_xyxy:
177 | ichar_shape1 = Shape() #创建一个空的shape
178 | ichar_shape1.set_points(chars_xyxy[i])
179 | ichar_shape1.set_group_id(int(i))
180 | ichar_shape1.set_label("char")
181 | ichar_shape1.set_text(chars_name[i])
182 | ichar_shape1.set_description(chars_name[i])
183 | labelme.shapes.append(ichar_shape1.to_dict())
184 | if targets_xyxy:
185 | itarget_shape1 = Shape() #创建一个空的shape
186 | itarget_shape1.set_points(targets_xyxy[i])
187 | itarget_shape1.set_group_id(int(i))
188 | itarget_shape1.set_label("target")
189 | itarget_shape1.set_text(targets_name[i])
190 | itarget_shape1.set_description(targets_name[i])
191 | labelme.shapes.append(itarget_shape1.to_dict())
192 |
193 | new_json = os.path.join(output_dir, f"{Path(image_path).stem}.json")
194 | labelme.to_json_file(new_json)
195 | return labelme
196 |
197 |
198 | @staticmethod
199 | def to_labelme(image_path:str, info:Coordination, size: Tuple[int, int] = (384, 344), output_dir:str = None) -> Labelme:
200 | """
201 | image_path: 图片路径, 不读取图片, 只是用来生成 json 文件
202 | info: Coordination 类型, 包含了图片的坐标信息
203 | output_dir: 输出的文件夹
204 | """
205 | assert isinstance(info, Coordination), "input should be Coordination"
206 | # 调用 word3to_labelme
207 | return Outfile.word3to_labelme(image_path,
208 | info.chars_xyxy, info.targets_xyxy,
209 | info.chars_name, info.targets_name,
210 | size=size,
211 | output_dir=output_dir)
212 |
213 |
214 |
--------------------------------------------------------------------------------
/src/utils/siamese.py:
--------------------------------------------------------------------------------
1 | """
2 | 来自项目: _
3 | """
4 |
5 | from PIL import Image
6 | from PIL.PngImagePlugin import PngImageFile
7 | import numbers
8 | import numpy as np
9 |
10 |
11 | def sigmoid(x):
12 | return 1 / (1 + np.exp(-x))
13 |
14 |
15 |
16 | def cvtColor(image):
17 | if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
18 | return image
19 | else:
20 | image = image.convert('RGB')
21 | return image
22 |
23 | def preprocess_input(x):
24 | x /= 255.0
25 | return x
26 |
27 | def resize(img, size, interpolation=Image.BILINEAR):
28 | if isinstance(size, int):
29 | w, h = img.size
30 | if (w <= h and w == size) or (h <= w and h == size):
31 | return img
32 | if w < h:
33 | ow = size
34 | oh = int(size * h / w)
35 | return img.resize((ow, oh), interpolation)
36 | else:
37 | oh = size
38 | ow = int(size * w / h)
39 | return img.resize((ow, oh), interpolation)
40 | else:
41 | return img.resize(size[::-1], interpolation)
42 |
43 | def crop(img, i, j, h, w):
44 | return img.crop((j, i, j + w, i + h))
45 |
46 | def center_crop(img, output_size):
47 | if isinstance(output_size, numbers.Number):
48 | output_size = (int(output_size), int(output_size))
49 | w, h = img.size
50 | th, tw = output_size
51 | i = int(round((h - th) / 2.))
52 | j = int(round((w - tw) / 2.))
53 | return crop(img, i, j, th, tw)
54 |
55 | def letterbox_image(image, size, letterbox_image):
56 | w, h = size
57 | iw, ih = image.size
58 | if letterbox_image:
59 | '''resize image with unchanged aspect ratio using padding'''
60 | scale = min(w/iw, h/ih)
61 | nw = int(iw*scale)
62 | nh = int(ih*scale)
63 |
64 | image = image.resize((nw,nh), Image.BICUBIC)
65 | new_image = Image.new('RGB', size, (128,128,128))
66 | new_image.paste(image, ((w-nw)//2, (h-nh)//2))
67 | else:
68 | if h == w:
69 | new_image = resize(image, h)
70 | else:
71 | new_image = resize(image, [h ,w])
72 | new_image = center_crop(new_image, [h ,w])
73 | return new_image
74 |
75 |
76 |
77 | def detect_image(image_1:PngImageFile, image_2:PngImageFile, image_width:int =60, image_height:int = 60):
78 | """
79 | 输入图片的路径,做预处理,返回预处理后的图片
80 | image_1: PngImageFile数据类型, 由 PIL.Image.open() 读取的图片
81 | image_2: PngImageFile数据类型
82 | image_width: 图片的宽
83 | image_height: 图片的高
84 | return: photo_1, photo_2
85 | """
86 | assert isinstance(image_1, Image.Image), "image_1 should be a Image.Image"
87 | #---------------------------------------------------------#
88 | # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
89 | #---------------------------------------------------------#
90 | image_1 = cvtColor(image_1)
91 | image_2 = cvtColor(image_2)
92 |
93 | #---------------------------------------------------#
94 | # 对输入图像进行不失真的resize
95 | #---------------------------------------------------#
96 | image_1 = letterbox_image(image_1, [image_width, image_height],False)
97 | image_2 = letterbox_image(image_2, [image_width, image_height],False)
98 | #---------------------------------------------------------#
99 | # 归一化+添加上batch_size维度
100 | #---------------------------------------------------------#
101 | photo_1 = preprocess_input(np.array(image_1, np.float32))
102 | photo_2 = preprocess_input(np.array(image_2, np.float32))
103 |
104 |
105 | #---------------------------------------------------#
106 | # 添加上 batch 维度,才可以放入网络中预测
107 | #---------------------------------------------------#
108 | photo_1 = np.expand_dims(np.transpose(photo_1, (2, 0, 1)), 0).astype(np.float32)
109 | photo_2 = np.expand_dims(np.transpose(photo_2, (2, 0, 1)), 0).astype(np.float32)
110 |
111 | return photo_1, photo_2
112 |
113 |
114 |
115 | if __name__ == '__main__':
116 | image_1 = "testimg/pic_00273_99704_target0.png"
117 | image_2 = "testimg/pic_00273_99704_target0.png"
118 | image_1 = Image.open(image_1)
119 | image_2 = Image.open(image_2)
120 | image_width = 60
121 | image_height = 60
122 |
123 | photo_1, photo_2 = detect_image(image_1, image_2, image_width, image_height)
124 |
125 |
126 |
127 |
--------------------------------------------------------------------------------
/src/utils/simsun.ttc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zscmmm/yzm_captcha/cff648ecfe68f65cac6bcf030cc561ad43cff0b8/src/utils/simsun.ttc
--------------------------------------------------------------------------------
/src/utils/utils.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple, Optional
2 | from PIL import Image
3 | from io import BytesIO
4 | import numpy as np
5 | import onnxruntime as ort
6 | import base64
7 | import os
8 | from pathlib import Path
9 | def get_onnx_shape(onnx_model: str) -> tuple:
10 | providers = ['CPUExecutionProvider']
11 | options = ort.SessionOptions()
12 | options.enable_profiling=False
13 | model = ort.InferenceSession(
14 | onnx_model,
15 | sess_options = options,
16 | providers=providers
17 | )
18 | input_info = model.get_inputs()
19 | input_shape = input_info[0].shape[2:4] # 先高度后宽度
20 | del model
21 | return input_shape
22 |
23 |
24 | def process_similarity_matrix(similarity_matrix:np.ndarray | list[list[float]]) -> list[tuple[int, int]]:
25 | """
26 | 处理相似度矩阵, 返回最大值的索引且不重复
27 | :param similarity_matrix: 相似度矩阵
28 | :return: 行索引和列索引的列表
29 | """
30 | if not isinstance(similarity_matrix, np.ndarray):
31 | similarity_matrix = np.array(similarity_matrix)
32 | # rows, cols = similarity_matrix.shape
33 | final_indices = []
34 | while True:
35 | #无参数的时候,把矩阵按行展开成一维数组,然后返回最大值的索引,如果有多个最大值,返回第一个
36 | max_index = np.argmax(similarity_matrix)
37 | #根据索引返回行和列
38 | max_i, max_j = np.unravel_index(max_index, similarity_matrix.shape)
39 | if similarity_matrix[max_i][max_j] == -np.inf:
40 | break
41 | final_indices.append((max_i, max_j))
42 | similarity_matrix[max_i, :] = -np.inf
43 | similarity_matrix[:, max_j] = -np.inf
44 |
45 | # 对 final_indices 按照行进行排序
46 | final_indices.sort(key=lambda x: x[0])
47 | return final_indices
48 |
49 | def is_base64(s: str) -> bool:
50 | """
51 | 判断字符串是否是 base64 编码
52 | :param s: 字符串
53 | :return: 是否是 base64 编码
54 | """
55 | try:
56 | # 如果能解码则返回 True
57 | base64.b64encode(base64.b64decode(s)) == s
58 | return True
59 | except Exception:
60 | return False
61 |
62 | #打开图片
63 | def open_image(file: str, rmalpha:bool = False, output_path: Optional[str] = None) -> Image.Image:
64 | if isinstance(file, list):
65 | print("Warning: Multiple images provided")
66 | img = [open_image(f, rmalpha, output_path) for f in file]
67 |
68 | elif isinstance(file, np.ndarray):
69 | img = Image.fromarray(file)
70 | elif is_base64(file) and isinstance(file, str):
71 | img = Image.open(BytesIO(base64.b64decode(file)))
72 | elif isinstance(file, bytes):
73 | img = Image.open(BytesIO(file))
74 | elif isinstance(file, Image.Image):
75 | img = file
76 | elif isinstance(file, Path) and file.exists():
77 | img = Image.open(file)
78 | elif isinstance(file, str) and os.path.exists(file):
79 | img = Image.open(file)
80 | else:
81 | assert False, "file type is not supported"
82 |
83 | if img.mode == 'RGBA' and rmalpha:
84 | # 检查图像是否具有 alpha 通道, 创建一个白色背景的图像
85 | white_bg = Image.new("RGB", img.size, (255, 255, 255))
86 | # 将原始图像粘贴到白色背景上
87 | white_bg.paste(img, mask=img.split()[3])
88 | img = white_bg
89 | img = img.convert('RGB')
90 | elif img.mode == 'RGB':
91 | pass
92 | else:
93 | img = img.convert('RGB')
94 |
95 | if output_path:
96 | img.save(output_path)
97 | return img
98 |
99 |
100 | #调整坐标
101 | def adjust_coordinates(coordinates: list, image_size: Tuple[float, float], toint: bool= True) -> list:
102 | """
103 | 输入提供的是两个坐标点: 格式为 [[x1, y1], [x2, y2]],其中 x1, y1 是左上角坐标,x2, y2 是右下角坐标。
104 | 或者是
105 | 四个坐标点: 格式为 [x1, y1, x2, y2],其中 x1, y1 是左上角坐标,x2, y2 是右下角坐标。
106 | 如果不是左上角和右下角,则进行调整
107 | :param coordinates: 坐标
108 | :return: 调整后的坐标,格式为 [x1, y1, x2, y2]
109 | """
110 | # 确保提供的坐标是一个包含两个点的列表
111 | if len(coordinates) == 2:
112 | # 获取坐标点的 x 和 y 值
113 | x1, y1, x2, y2 = coordinates[0][0], coordinates[0][1], coordinates[1][0], coordinates[1][1]
114 | elif len(coordinates) == 4:
115 | x1, y1, x2, y2 = coordinates
116 | else:
117 | raise ValueError("Invalid coordinates format. It should be either [[x1, y1], [x2, y2]] or [x1, y1, x2, y2]")
118 |
119 | width, height = image_size
120 | # 确保坐标在图像范围内
121 | x1 = min(x1, width)
122 | x2 = min(x2, width)
123 | y1 = min(y1, height)
124 | y2 = min(y2, height)
125 |
126 | # 判断是否是左上角和右下角,如果不是则进行调整
127 | if x1 > x2 or y1 > y2:
128 | print("Warning: Input coordinates do not match the expected format, adjusting coordinates.")
129 | # 交换 x 和 y 值,以确保左上角和右下角的关系
130 | x1, y1, x2, y2 = min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2)
131 | # print("Adjusted coordinates to match the expected format.")
132 | # 返回调整后的坐标
133 | if toint:
134 | return [int(x1), int(y1), int(x2), int(y2)]
135 | else:
136 | return [x1, y1, x2, y2]
137 |
138 |
139 | # 根据坐标进行裁剪
140 | def crop_and_save_image(input_path, coordinates:list, output_path:str):
141 | # 打开图像
142 | original_image = Image.open(input_path)
143 | image_size = original_image.size
144 | x1, y1, x2,y2 = adjust_coordinates(coordinates, image_size, toint=False)
145 | # 裁剪图像
146 | cropped_image = original_image.crop((x1, y1, x2, y2))
147 | # 保存图像
148 | cropped_image.save(output_path)
149 |
150 |
151 | def find_max_probability(ichars_five_name, ichars_five_prob, itargets_five_name, itargets_five_prob):
152 | # 创建字典将名称和概率关联起来
153 | ichars_dict = dict(zip(ichars_five_name, ichars_five_prob))
154 | itarget_dict = dict(zip(itargets_five_name, itargets_five_prob))
155 |
156 | merged_dict = ichars_dict.copy()
157 | for key, value in itarget_dict.items():
158 | if key in merged_dict:
159 | merged_dict[key] += value
160 | else:
161 | merged_dict[key] = value
162 | # 找到概率最大的名称
163 | max_name = max(merged_dict, key=merged_dict.get)
164 | max_prob = merged_dict[max_name]
165 | return max_name, round(max_prob / 2, 2)
166 |
167 |
168 | if __name__ == '__main__':
169 |
170 | similarity_matrix = [
171 | [0.1, 0, 0.5],
172 | [0.7, 1, 0.8],
173 | [1, 0.6, 1],
174 | ]
175 |
176 | # 调用函数处理相似度矩阵
177 | final_indices = process_similarity_matrix(similarity_matrix)
178 |
179 | # 打印处理后的索引列表
180 | print(final_indices)
181 |
182 | ichars_three_name = ['a', 'b', 'c']
183 | ichars_three_prob = [0.1, 0.2, 0.3]
184 | itargets_three_name = ['b1', 'c1', 'd1']
185 | itargets_three_prob = [0.2, 0.3, 0.4]
186 |
187 | # 调用函数
188 | max_name, max_prob = find_max_probability(ichars_three_name, ichars_three_prob, itargets_three_name, itargets_three_prob)
189 |
190 | # 打印结果
191 | print("最大概率对应的名称:", max_name)
192 | print("最大概率:", max_prob)
193 |
194 |
195 |
196 |
197 |
198 |
--------------------------------------------------------------------------------
/src/utils/yoloclass.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | import json
3 | from pathlib import Path
4 |
5 | class Shape():
6 | def __init__(self, points = None):
7 | # points = [[x1,y1, x2,y2], [x1,y1, x2,y2], ...]
8 | # 必须是上面的形式
9 | self.label = ""
10 | if not points:
11 | self.points = []
12 | else:
13 | if not isinstance(points, list) and len(points) == 4:
14 | raise Exception("points must be list and len(points) == 4")
15 | self.points = [[float(points[0]), float(points[1])], [float(points[2]), float(points[3])]]
16 | self.group_id = None
17 | self.description = ""
18 | self.shape_type = "rectangle"
19 | self.flags = {}
20 | self.text = ""
21 | self.mask = None
22 | def set_group_id(self, group_id):
23 | assert isinstance(group_id, int), "group_id must be int"
24 | self.group_id = group_id
25 | def set_label(self, label):
26 | self.label = label
27 | def set_points(self, points):
28 | if not isinstance(points, list) and len(points) == 4:
29 | raise Exception("points must be list and len(points) == 4")
30 | self.points = [[float(points[0]), float(points[1])], [float(points[2]), float(points[3])]]
31 |
32 | def set_shape_type(self, shape_type):
33 | self.shape_type = shape_type
34 | def set_description(self, description):
35 | self.description = description
36 | def set_text(self, text):
37 | self.text = text
38 | def to_dict(self):
39 | return self.__dict__
40 |
41 |
42 |
43 | class Labelme():
44 | def __init__(self):
45 | self.version = "5.4.1"
46 | self.flags = {}
47 | self.shapes = []
48 | self.imagePath = ""
49 | self.imageData = None
50 | self.imageHeight = 200
51 | self.imageWidth = 300
52 |
53 | def set_imagePath(self, path):
54 | self.imagePath = path
55 |
56 | def set_size(self, h, w):
57 | self.imageHeight = h
58 | self.imageWidth = w
59 |
60 |
61 | def set_poses_list(self,
62 | poses: List[List[int]],
63 | label:list[str],
64 | text: list[str] = None,
65 | description: list[str] = None,
66 | ):
67 | if not isinstance(poses, list):
68 | raise Exception("poses must be list")
69 | if not isinstance(label, list):
70 | raise Exception("label must be list")
71 |
72 | if len(poses) != len(label) and len(label) ==1:
73 | label = label * len(poses)
74 |
75 | if len(poses) != len(label):
76 | raise Exception("len(poses) must be equal to len(label)")
77 |
78 | for i in range(len(poses)):
79 | shape = Shape(poses[i])
80 | shape.set_label(label[i])
81 | if text:
82 | shape.set_text(text[i])
83 | if description:
84 | shape.set_description(description[i])
85 | self.shapes.append(shape.to_dict())
86 |
87 | def set_shape_text(self, ind:int, texts:str):
88 | if ind >= len(self.shapes):
89 | raise Exception("ind must be less than len(self.shapes)")
90 | self.shapes[ind]["text"] = texts
91 |
92 | def set_shape_description(self, ind:int, description:str):
93 | if ind >= len(self.shapes):
94 | raise Exception("ind must be less than len(self.shapes)")
95 | self.shapes[ind]["description"] = description
96 |
97 | def set_shape_label(self, ind:int, label:str):
98 | if ind > len(self.shapes):
99 | raise Exception("ind must be less than len(self.shapes)")
100 | self.shapes[ind]["label"] = label
101 |
102 | def set_shape_points(self, ind:int, points:List[int]):
103 | if ind >= len(self.shapes):
104 | raise Exception("ind must be less than len(self.shapes)")
105 | self.shapes[ind]["points"] = [[float(points[0]), float(points[1])], [float(points[2]), float(points[3])]]
106 |
107 |
108 |
109 |
110 | def set_poses(self, poses: List[List[float]], label="icon1"):
111 | if not isinstance(poses, list):
112 | raise Exception("poses must be list")
113 | temp = poses[0]
114 | assert isinstance(temp, list) and len(temp) == 4, "poses must be list and len(poses[0]) == 4"
115 | for pose in poses:
116 | shape = Shape(pose)
117 | shape.set_label(label)
118 | self.shapes.append(shape.to_dict())
119 |
120 |
121 | def to_dict(self):
122 | return self.__dict__
123 | def to_json_file(self, file_path):
124 | assert file_path.endswith(".json"), "file_path must be end with .json"
125 |
126 | with open(file_path, "w") as f:
127 | json.dump(self.__dict__, f, ensure_ascii=False, indent=2)
128 |
129 | if __name__ == "__main__":
130 | labelme = Labelme()
131 | labelme.set_imagePath("../icon4_imgs/imgs_00794_41425_0.png")
132 | labelme.set_size(200, 300)
133 | pose = [[1, 2, 3, 4], [5, 6, 7, 9]]
134 | labelme.set_poses(pose)
135 | labelme.to_json_file("icon4_imgs.json")
--------------------------------------------------------------------------------
/webjs/icon4/demo_g4icon.js:
--------------------------------------------------------------------------------
1 | const NodeRSA = require('node-rsa');
2 | const crypto = require('crypto');
3 | const CryptoJS = require("crypto-js");
4 | function get_key() {
5 | var s4 = "";
6 | for (i = 0; i < 4; i++) {
7 | s4 = s4 + ((1 + Math["random"]()) * 65536 | 0)["toString"](16)["substring"](1);
8 | }
9 | return s4;
10 | }
11 | function MD5_Encrypt(word) {
12 | return CryptoJS.MD5(word).toString();
13 | }
14 | function AES_Encrypt(key, word) {
15 | var srcs = CryptoJS.enc.Utf8.parse(word);
16 | var encrypted = CryptoJS.AES.encrypt(srcs, CryptoJS.enc.Utf8.parse(key), {
17 | iv: CryptoJS.enc.Utf8.parse("0000000000000000"),
18 | mode: CryptoJS.mode.CBC,
19 | padding: CryptoJS.pad.Pkcs7
20 | });
21 | return CryptoJS.enc.Hex.stringify(CryptoJS.enc.Base64.parse(encrypted.toString()));
22 | }
23 | function RSA_encrypt(data) {
24 | const public_key_1 = '00C1E3934D1614465B33053E7F48EE4EC87B14B95EF88947713D25EECBFF7E74C7977D02DC1D9451F79DD5D1C10C29ACB6A9B4D6FB7D0A0279B6719E1772565F09AF627715919221AEF91899CAE08C0D686D748B20A3603BE2318CA6BC2B59706592A9219D0BF05C9F65023A21D2330807252AE0066D59CEEFA5F2748EA80BAB81';
25 | const public_key_2 = '10001';
26 | const public_key = new NodeRSA();
27 | public_key.importKey({
28 | n: Buffer.from(public_key_1, 'hex'),
29 | e: parseInt(public_key_2, 16),
30 | }, 'components-public');
31 | const encrypted = crypto.publicEncrypt({
32 | key: public_key.exportKey('public'),
33 | padding: crypto.constants.RSA_PKCS1_PADDING
34 | }, Buffer.from(data));
35 | return encrypted.toString('hex');
36 | }
37 |
38 |
39 |
40 |
41 | function sha256(str) {
42 | const hash = crypto.createHash('sha256');
43 | hash.update(str);
44 | return hash.digest('hex');
45 | }
46 |
47 | function get_w2(gt, lot_number, detail_time, userresponse){
48 | let randomkey = get_key()
49 |
50 | passtime = 3000 + Math.floor(Math.random() * 1000)
51 | pow_msg = '1' + "|" + 12 + "|" + 'sha256' + "|" + detail_time + "|" + gt + "|" + lot_number + "|" + '' + "|" + randomkey
52 | pow_sign = sha256(pow_msg)
53 | // 输入的坐标格式:
54 | // userresponse = [[1554,6199],[1819,2771],[4569,3665]]
55 | xiyu = {
56 | "passtime": passtime,
57 | "userresponse": userresponse,
58 | "device_id": "70ad34ab80cef354efa5b79c622d5ad3",
59 | "lot_number": lot_number,
60 | "pow_msg": pow_msg,
61 | "pow_sign": pow_sign,
62 | "geetest": "captcha",
63 | "lang": "zh",
64 | "ep": "123",
65 | "biht": "1426265548",
66 | "gee_guard": {
67 | "env": {
68 | "sf": {
69 | "data": [
70 | "Arial Unicode MS",
71 | "Gill Sans",
72 | "Helvetica Neue",
73 | "Menlo"
74 | ]
75 | },
76 | "seaof": {
77 | "data": {
78 | "tdf": 148.859375,
79 | "elp": 148.859375,
80 | "fos": 144.3125,
81 | "pos": 148.859375,
82 | "onm": 133.0625,
83 | "nmi": 9.3125,
84 | "mys": 146.09375
85 | }
86 | },
87 | "aosua": {
88 | "data": 124.04344968475198
89 | },
90 | "ecs": {
91 | "data": [
92 | 30,
93 | 0,
94 | 0,
95 | 0
96 | ]
97 | },
98 | "uscpo": {},
99 | "sal": {
100 | "data": [
101 | [
102 | "zh-CN"
103 | ]
104 | ]
105 | },
106 | "hoc": {
107 | "data": 30
108 | },
109 | "ydmed": {
110 | "data": 8
111 | },
112 | "ncs": {
113 | "data": [
114 | 900,
115 | 1440
116 | ]
117 | },
118 | "yah": {
119 | "data": 8
120 | },
121 | "eit": {
122 | "data": "Asia/Shanghai"
123 | },
124 | "ees": {
125 | "data": true
126 | },
127 | "els": {
128 | "data": true
129 | },
130 | "bni": {
131 | "data": true
132 | },
133 | "epo": {
134 | "data": false
135 | },
136 | "sdspc": {},
137 | "mlp": {
138 | "data": "MacIntel"
139 | },
140 | "slp": {
141 | "data": [
142 | {
143 | "name": "PDF Viewer",
144 | "description": "Portable Document Format",
145 | "mimeTypes": [
146 | {
147 | "type": "application/pdf",
148 | "suffixes": "pdf"
149 | },
150 | {
151 | "type": "text/pdf",
152 | "suffixes": "pdf"
153 | }
154 | ]
155 | },
156 | {
157 | "name": "Chrome PDF Viewer",
158 | "description": "Portable Document Format",
159 | "mimeTypes": [
160 | {
161 | "type": "application/pdf",
162 | "suffixes": "pdf"
163 | },
164 | {
165 | "type": "text/pdf",
166 | "suffixes": "pdf"
167 | }
168 | ]
169 | },
170 | {
171 | "name": "Chromium PDF Viewer",
172 | "description": "Portable Document Format",
173 | "mimeTypes": [
174 | {
175 | "type": "application/pdf",
176 | "suffixes": "pdf"
177 | },
178 | {
179 | "type": "text/pdf",
180 | "suffixes": "pdf"
181 | }
182 | ]
183 | },
184 | {
185 | "name": "Microsoft Edge PDF Viewer",
186 | "description": "Portable Document Format",
187 | "mimeTypes": [
188 | {
189 | "type": "application/pdf",
190 | "suffixes": "pdf"
191 | },
192 | {
193 | "type": "text/pdf",
194 | "suffixes": "pdf"
195 | }
196 | ]
197 | },
198 | {
199 | "name": "WebKit built-in PDF",
200 | "description": "Portable Document Format",
201 | "mimeTypes": [
202 | {
203 | "type": "application/pdf",
204 | "suffixes": "pdf"
205 | },
206 | {
207 | "type": "text/pdf",
208 | "suffixes": "pdf"
209 | }
210 | ]
211 | }
212 | ]
213 | },
214 | "sac": {
215 | "data": {
216 | "wpd": true,
217 | "ytg": "1fd188f9714ca90a5a10eb2fc306b5eb",
218 | "tcg": "_tcg_tcg_val",
219 | "xt": "32a115bd05e0f411c5ecd7e285fd36e2"
220 | }
221 | },
222 | "sstot": {
223 | "data": {
224 | "maxTouchPoints": 0,
225 | "touchEvent": false,
226 | "touchStart": false
227 | }
228 | },
229 | "rev": {
230 | "data": "Google Inc."
231 | },
232 | "sadev": {
233 | "data": [
234 | "chrome"
235 | ]
236 | },
237 | "doc": {
238 | "data": true
239 | },
240 | "drh": {
241 | "data": true
242 | },
243 | "lew": {
244 | "data": "Google Inc. (Apple)ANGLE (Apple, Apple M1, OpenGL 4.1)"
245 | },
246 | "slo": {
247 | "data": [
248 | "location"
249 | ]
250 | },
251 | "pst": {
252 | "data": [
253 | false,
254 | false,
255 | false
256 | ]
257 | }
258 | },
259 | "roe": {
260 | "aup": "3",
261 | "sep": "3",
262 | "egp": "3",
263 | "auh": "3",
264 | "rew": "3",
265 | "snh": "3",
266 | "snih": "3",
267 | "res": "3",
268 | "resl": "3",
269 | "stpn": "3"
270 | }
271 | },
272 | "va8R": "wG3Q",
273 | "em": {
274 | "ph": 0,
275 | "cp": 0,
276 | "ek": "11",
277 | "wd": 1,
278 | "nt": 0,
279 | "si": 0,
280 | "sc": 0
281 | }
282 | }
283 | xiyu = JSON.stringify(xiyu).replace(" ", "").replace("'", '"')
284 | w = AES_Encrypt(randomkey, xiyu)+ RSA_encrypt(randomkey)
285 | return w
286 | }
287 |
288 |
289 |
290 | // lot_number = 'de023059ed154096bc535dece6904205'
291 | // captcha_id = gt = '0b2abaab0ad3f4744ab45342a2f3d409'
292 | // detail_time = '2024-03-12T13:50:04.645097+08:00'
293 | // distance = 300
294 | // passtime = 1786
295 | // track = [[34,45,67,78],[23, 45, 56, 67]]
296 | // console.log(get_w2(gt, lot_number, detail_time, distance, passtime, track))
297 |
298 |
299 |
--------------------------------------------------------------------------------
/webjs/icon4/tools.py:
--------------------------------------------------------------------------------
1 | from urllib import request
2 | import os
3 | from typing import Union
4 | from urllib.parse import urljoin
5 | import time
6 | def download_img(url: Union[str, list], path: Union[str, list]) -> None:
7 | """
8 | 通过url下载图片,已经被 urllib.request 封装好了的
9 | :param url: 图片url
10 | :param path: 保存路径,带后缀名
11 | """
12 | if isinstance(url, str) and isinstance(path, str):
13 | if r"/" in path:
14 | os.makedirs(os.path.dirname(path), exist_ok=True)
15 | if "https" not in url:
16 | url = urljoin("https://static.geetest.com", url)
17 | request.urlretrieve(url, path)
18 |
19 | elif isinstance(url, list) and isinstance(path, list):
20 | assert len(url) == len(path), "url和path长度不一致"
21 | for i in range(len(url)):
22 | if "https" not in url[i]:
23 | url[i] = urljoin("https://static.geetest.com", url[i])
24 | request.urlretrieve(url[i], path[i])
25 |
26 | def xyxy2gtformat(xyxy):
27 | xyxy_center = []
28 | for point in xyxy:
29 | x = (point[0] + point[2]) // 2
30 | y = (point[1] + point[3]) // 2
31 | xyxy_center.append([x, y])
32 |
33 | new_points = []
34 | for point in xyxy_center:
35 | x = point[0] * 32 # 32 和 48 是手动计算出来的
36 | y = point[1] * 48
37 | new_points.append([x, y])
38 | return new_points
39 |
40 |
41 |
42 |
43 | now_str = str(int(time.time() * 1000))
44 |
45 | cookies = {
46 | 'aliyungf_tc': '180a86da32644284df3bb8fbeeb91f03283c3444515b4888d1f04eb2eb504862',
47 | '_MHYUUID': 'b337f507-855b-4c73-afd8-13b573b69469',
48 | 'DEVICEFP_SEED_ID': 'f42cee7a2fbc6a2b',
49 | 'DEVICEFP_SEED_TIME': now_str,
50 | 'DEVICEFP': '38d7f7f987b09'
51 | }
52 |
53 | headers = {
54 | 'Accept': 'application/json, text/plain, */*',
55 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
56 | 'Cache-Control': 'no-cache',
57 | 'Connection': 'keep-alive',
58 | 'Origin': 'https://user.mihoyo.com',
59 | 'Pragma': 'no-cache',
60 | 'Referer': 'https://user.mihoyo.com/',
61 | 'Sec-Fetch-Dest': 'empty',
62 | 'Sec-Fetch-Mode': 'cors',
63 | 'Sec-Fetch-Site': 'same-site',
64 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
65 | 'x-rpc-client_type': '4',
66 | 'x-rpc-device_fp': '38d7f7f987b09',
67 | 'x-rpc-device_id': 'b337f507-855b-4c73-afd8-13b573b69469',
68 | 'x-rpc-device_model': 'Microsoft%20Edge%20120.0.0.0',
69 | 'x-rpc-device_name': 'Microsoft%20Edge',
70 | 'x-rpc-mi_referrer': 'https://user.mihoyo.com/',
71 | 'x-rpc-source': 'accountWebsite',
72 | }
73 |
--------------------------------------------------------------------------------
/webjs/nine3/demo.js:
--------------------------------------------------------------------------------
1 | const NodeRSA = require('node-rsa');
2 | const crypto = require('crypto');
3 | const CryptoJS = require("crypto-js");
4 | function get_key() {
5 | var s4 = "";
6 | for (i = 0; i < 4; i++) {
7 | s4 = s4 + ((1 + Math["random"]()) * 65536 | 0)["toString"](16)["substring"](1);
8 | }
9 | return s4;
10 | }
11 | function MD5_Encrypt(word) {
12 | return CryptoJS.MD5(word).toString();
13 | }
14 | function AES_Encrypt(key, word) {
15 | var srcs = CryptoJS.enc.Utf8.parse(word);
16 | var encrypted = CryptoJS.AES.encrypt(srcs, CryptoJS.enc.Utf8.parse(key), {
17 | iv: CryptoJS.enc.Utf8.parse("0000000000000000"),
18 | mode: CryptoJS.mode.CBC,
19 | padding: CryptoJS.pad.Pkcs7
20 | });
21 | // 把加密后的数据(base64 -- > 解析 base64 -- > hex)
22 | return CryptoJS.enc.Hex.stringify(CryptoJS.enc.Base64.parse(encrypted.toString()));
23 | }
24 | function RSA_encrypt(data) {
25 | const public_key_1 = '00C1E3934D1614465B33053E7F48EE4EC87B14B95EF88947713D25EECBFF7E74C7977D02DC1D9451F79DD5D1C10C29ACB6A9B4D6FB7D0A0279B6719E1772565F09AF627715919221AEF91899CAE08C0D686D748B20A3603BE2318CA6BC2B59706592A9219D0BF05C9F65023A21D2330807252AE0066D59CEEFA5F2748EA80BAB81';
26 | const public_key_2 = '10001';
27 | const public_key = new NodeRSA();
28 | public_key.importKey({
29 | n: Buffer.from(public_key_1, 'hex'),
30 | e: parseInt(public_key_2, 16),
31 | }, 'components-public');
32 | const encrypted = crypto.publicEncrypt({
33 | key: public_key.exportKey('public'),
34 | padding: crypto.constants.RSA_PKCS1_PADDING
35 | }, Buffer.from(data));
36 | return encrypted.toString('hex');
37 | }
38 |
39 | function get_w(captchaId, lot_number, detail_time, userresponse) {
40 |
41 | // 随机产生一个 3000 --4000 之间的值
42 | passtime = 3000 + Math.floor(Math.random() * 1000)
43 | romdon_key = get_key()
44 | pow_msg = "1|0|md5|" + detail_time + "|" + captchaId + "|" + lot_number + "||" + romdon_key
45 | xiyu = {
46 | "passtime": passtime,
47 | "userresponse": userresponse, //[[2,3],[2,2],[1,2]], // 这里是从 1 开始的,[行,列]
48 | "device_id": "D00D",
49 | "lot_number": lot_number,
50 | "pow_msg": pow_msg,
51 | "pow_sign": MD5_Encrypt(pow_msg),
52 | "geetest": "captcha",
53 | "lang": "zh",
54 | "ep": "123",
55 | "biht":"1426265548",
56 | "gee_guard":{"roe":{"aup":"3","sep":"3","egp":"3","auh":"3","rew":"3","snh":"3","res":"3","cdc":"3"}},
57 | "Dqf2":"zgWV",
58 | "em":{"ph":0,"cp":0,"ek":"11","wd":1,"nt":0,"si":0,"sc":0}
59 | }
60 | xiyu = JSON.stringify(xiyu).replace(" ", "").replace("'", '"')
61 | w = AES_Encrypt(romdon_key, xiyu)+ RSA_encrypt(romdon_key)
62 | return w
63 | }
64 |
65 |
66 |
--------------------------------------------------------------------------------
/webjs/nine3/utils.py:
--------------------------------------------------------------------------------
1 |
2 | import json
3 | import os
4 | import random
5 |
6 | from urllib import request
7 | from typing import Union
8 | from urllib.parse import urljoin
9 | from lxml import etree
10 | from jsonpath import jsonpath
11 | import copy
12 |
13 | def create_directory(directory):
14 | os.makedirs(directory, exist_ok=True)
15 |
16 | def write_json_file(json_data, file_path):
17 | with open(file_path, "w", encoding="utf-8") as f:
18 | json.dump(json_data, f, indent=2, ensure_ascii=False, sort_keys=True)
19 |
20 | def generate_filename_prefix(ii):
21 | ss = random.randint(10000, 99999)
22 | kk = str(ii).zfill(5)
23 | return kk, ss
24 |
25 | def generate_paths(directory, prefix, count):
26 | if count == 1:
27 | return [os.path.join(directory, f"{prefix}.png")]
28 | else:
29 | return [os.path.join(directory, f"{prefix}_{i}.png") for i in range(count)]
30 |
31 |
32 |
33 |
34 | def download_img(url: Union[str, list], path: Union[str, list]) -> None:
35 | """
36 | 通过url下载图片,已经被 urllib.request 封装好了的
37 | :param url: 图片url
38 | :param path: 保存路径,带后缀名
39 | """
40 | if isinstance(url, str) and isinstance(path, str):
41 | os.makedirs(os.path.dirname(path), exist_ok=True)
42 | request.urlretrieve(url, path)
43 |
44 | elif isinstance(url, list) and isinstance(path, list):
45 | assert len(url) == len(path), "url和path长度不一致"
46 | for i in range(len(url)):
47 | request.urlretrieve(url[i], path[i])
48 |
49 | def handle_json_data(resp_json: dict):
50 | try:
51 | data = copy.deepcopy(resp_json['data'])
52 | if isinstance(data, str):
53 | data = json.loads(data)
54 | resp_json['data'] = data
55 | return resp_json
56 | except:
57 | return resp_json
58 |
59 | def generate_list(i:str|list) -> list:
60 | if isinstance(i, str):
61 | return [i]
62 | else:
63 | return i
64 | def generate_url(imgs: str|list) -> Union[list, int]:
65 | imgs = generate_list(imgs)
66 | imgs_list = [urljoin("https://static.geetest.com/", img) for img in imgs]
67 | return imgs_list, len(imgs_list)
68 |
69 |
70 | def from_json_download_imgs_icon(resp_json: dict, ii: int):
71 | resp_json = handle_json_data(resp_json)
72 | json_dir = "icon4_json"
73 | imgs_dir = "icon4_imgs"
74 | ques_dir = "icon4_ques"
75 |
76 | create_directory(json_dir)
77 | create_directory(imgs_dir)
78 | create_directory(ques_dir)
79 |
80 | kk, ss = generate_filename_prefix(ii)
81 | filejson = os.path.join(json_dir, f"res_{kk}_{ss}.json")
82 | write_json_file(resp_json, filejson)
83 |
84 | imgs, imgs_count = generate_url(jsonpath(resp_json, '$..imgs')[0])
85 | imgs_path = generate_paths(imgs_dir, f"img_{kk}_{ss}", imgs_count)
86 | download_img(imgs, imgs_path)
87 |
88 | ques, ques_count= generate_url(jsonpath(resp_json, '$..ques')[0])
89 | ques_path = generate_paths(ques_dir, f"ques_{kk}_{ss}", ques_count)
90 | download_img(ques, ques_path)
91 | return ii
92 |
93 |
94 |
95 | def from_json_download_imgs_silde(resp_json: dict, ii: int):
96 | resp_json = handle_json_data(resp_json)
97 | json_dir = "slide4_json"
98 | slide_dir = "slide4_slide"
99 | bg_dir = "slide4_bg"
100 |
101 | create_directory(json_dir)
102 | create_directory(slide_dir)
103 | create_directory(bg_dir)
104 |
105 | kk, ss = generate_filename_prefix(ii)
106 |
107 | filejson = os.path.join(json_dir, f"res_{kk}_{ss}.json")
108 | write_json_file(resp_json, filejson)
109 |
110 | m_slice, m_slice_count = generate_url(jsonpath(resp_json, '$..slice')[0])
111 | m_slice_path = generate_paths(slide_dir, f"slice_{kk}_{ss}", m_slice_count)
112 | download_img(m_slice, m_slice_path)
113 |
114 | m_bg, m_bg_count = generate_url(jsonpath(resp_json, '$..bg')[0])
115 | m_bg_path = generate_paths(bg_dir, f"bg_{kk}_{ss}", m_bg_count)
116 | download_img(m_bg, m_bg_path)
117 |
118 | return ii
119 |
120 |
121 |
122 |
123 |
124 | def from_json_download_imgs_phrase(resp_json: dict, ii: int):
125 | resp_json = handle_json_data(resp_json)
126 | json_dir = "phrase4_json"
127 | imgs_dir = "phrase4_imgs"
128 |
129 | create_directory(json_dir)
130 | create_directory(imgs_dir)
131 |
132 | kk, ss = generate_filename_prefix(ii)
133 |
134 | filejson = os.path.join(json_dir, f"res_{kk}_{ss}.json")
135 | write_json_file(resp_json, filejson)
136 |
137 | imgs, imgs_count = generate_url(jsonpath(resp_json, '$..imgs')[0])
138 | imgs_path = generate_paths(imgs_dir, f"img_{kk}_{ss}", imgs_count)
139 | download_img(imgs, imgs_path)
140 | return ii
141 |
142 |
143 |
144 | def from_json_download_imgs_winlinze(resp_json: dict, ii: int):
145 | resp_json = handle_json_data(resp_json)
146 | json_dir = "winlinze4_json"
147 | imgs_dir = "winlinze4_imgs"
148 |
149 | create_directory(json_dir)
150 | create_directory(imgs_dir)
151 |
152 | kk, ss = generate_filename_prefix(ii)
153 | filejson = os.path.join(json_dir, f"res_{kk}_{ss}.json")
154 | write_json_file(resp_json, filejson)
155 |
156 | imgs, imgs_count = generate_url(jsonpath(resp_json, '$..imgs')[0])
157 | imgs_path = generate_paths(imgs_dir, f"img_{kk}_{ss}", imgs_count)
158 | download_img(imgs, imgs_path)
159 | return ii
160 |
161 |
162 | def from_json_download_imgs_nine(resp_json: dict, ii: int):
163 | resp_json = handle_json_data(resp_json)
164 | json_dir = "nine4_json"
165 | imgs_dir = "nine4_imgs"
166 | ques_dir = "nine4_ques"
167 | create_directory(json_dir)
168 | create_directory(imgs_dir)
169 | create_directory(ques_dir)
170 |
171 | kk, ss = generate_filename_prefix(ii)
172 | filejson = os.path.join(json_dir, f"res_{kk}_{ss}.json")
173 | write_json_file(resp_json, filejson)
174 |
175 | imgs, imgs_count = generate_url(jsonpath(resp_json, '$..imgs')[0])
176 | imgs_path = generate_paths(imgs_dir, f"img_{kk}_{ss}", imgs_count)
177 | download_img(imgs, imgs_path)
178 |
179 | ques, ques_count = generate_url(jsonpath(resp_json, '$..ques')[0])
180 | ques_path = generate_paths(ques_dir, f"ques_{kk}_{ss}", ques_count)
181 | download_img(ques, ques_path)
182 | return ii
183 |
184 |
185 |
186 |
187 | def process_and_download(
188 | resp_json: dict, #返回的json
189 | ii: int, #计数
190 | imgtype: str, #类型
191 | imgs_key: str = 'imgs', #图片的key
192 | additional_key: str = None, #额外的图片保存目录路径
193 | ) -> int:
194 | resp_json = handle_json_data(resp_json)
195 | json_dir = f"{imgtype}_json"
196 | imgs_dir = f"{imgtype}_{imgs_key}"
197 | create_directory(json_dir)
198 | create_directory(imgs_dir)
199 |
200 | imgs_file_prefix = imgs_key #以imgs_key为前缀
201 |
202 | kk, ss = generate_filename_prefix(ii)
203 | filejson = os.path.join(json_dir, f"res_{kk}_{ss}.json") #写入json文件
204 | write_json_file(resp_json, filejson)
205 |
206 | imgs_url, imgs_count = generate_url(jsonpath(resp_json, f'$..{imgs_key}')[0])
207 | imgs_path = generate_paths(imgs_dir, f"{imgs_file_prefix}_{kk}_{ss}", imgs_count)
208 | download_img(imgs_url, imgs_path)
209 |
210 | if additional_key:
211 | additional_dir = f"{imgtype}_{additional_key}"
212 | additional_file_prefix = additional_key #以additional_key为前缀
213 | create_directory(additional_dir)
214 | additional_url, additional_count = generate_url(jsonpath(resp_json, f'$..{additional_key}')[0])
215 | additional_path = generate_paths(additional_dir, f"{additional_file_prefix}_{kk}_{ss}", additional_count)
216 | download_img(additional_url, additional_path)
217 | return ii
218 |
219 | # ##### 一个统一的函数
220 | # process_and_download(resp_json, ii, "icon4", "imgs", "ques")
221 | # process_and_download(resp_json, ii, "slide4", "slice", "bg")
222 | # process_and_download(resp_json, ii, "phrase4", "imgs")
223 | # process_and_download(resp_json, ii, "winlinze4", "imgs")
224 | # process_and_download(resp_json, ii, "nine4", "imgs", "ques")
225 |
--------------------------------------------------------------------------------
/webjs/word3/f1/tools.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import requests
3 | from urllib.parse import urljoin
4 | from urllib import request
5 | from typing import Union
6 | import os
7 |
8 | def send_image2server(image_path: Union[str, bytes],
9 | image_id: str = "string",
10 | server_url: str = "http://127.0.0.1:9100/gt3/word3"):
11 | """
12 | 根据图片路径,读取图片,并发送到服务器, 获取返回结果,返回结果为json格式
13 | :param image_path: 图片路径,
14 | str类型, 表示图片的路径,
15 | bytes类型, 表示图片的二进制数据,即图片的内容,一般是通过open('rb')读取的或者直接 request.get(url).content
16 | :param image_id: 图片id
17 | :param server_url: 服务器地址
18 | :return: 返回结果, json格式,里面包含识别的详细信息
19 | """
20 | if isinstance(image_path, bytes):
21 | image_data = image_path
22 | elif isinstance(image_path, str) and os.path.exists(image_path):
23 | with open(image_path, 'rb') as f:
24 | image_data = f.read()
25 | else:
26 | raise ValueError("image_path should be bytes or str")
27 | data = {
28 | "dataType": 2,
29 | "imageSource": [base64.b64encode(image_data).decode('utf-8')],
30 | "imageID": image_id,
31 | "extraicon": None,
32 | "imageID": "string",
33 | "token": "abc1"
34 | }
35 | response = requests.post(server_url, json=data)
36 | try:
37 | resp_json = response.json()
38 | return resp_json['data']['res']
39 | except:
40 | return response.text
41 |
42 |
43 | def download_img(url: Union[str, list], path: Union[str, list]) -> None:
44 | """
45 | 通过url下载图片,已经被 urllib.request 封装好了的
46 | :param url: 图片url
47 | :param path: 保存路径,带后缀名
48 | """
49 | if isinstance(url, str) and isinstance(path, str):
50 | if r'https://static.geetest.com' in url:
51 | pass
52 | else:
53 | url = urljoin('https://static.geetest.com', url)
54 | if r"/" in path or r"\\" in path:
55 | os.makedirs(os.path.dirname(path), exist_ok=True)
56 | request.urlretrieve(url, path)
57 |
58 | elif isinstance(url, list) and isinstance(path, list):
59 | assert len(url) == len(path), "url和path长度不一致"
60 | if r"/" in path or r"\\" in path:
61 | os.makedirs(os.path.dirname(path), exist_ok=True)
62 |
63 | for i in range(len(url)):
64 | if r'https://static.geetest.com' in url[i]:
65 | pass
66 | else:
67 | url[i] = urljoin('https://static.geetest.com', url[i])
68 | request.urlretrieve(url[i], path[i])
69 |
70 |
71 |
72 |
73 |
74 | if __name__ == "__main__":
75 | import os
76 | image_path = os.path.join("docs", "a01.jpg")
77 | response = send_image2server(image_path)
78 | print(response.text)
79 | # import hashlib
80 | # import base64
81 | # with open(image_path, 'rb') as f:
82 | # image_data = f.read()
83 | # imageSource = base64.b64encode(image_data).decode('utf-8')
84 | # imageSource2 = base64.b64decode(bytes(imageSource, 'utf-8'))
85 | # hash_value = hashlib.md5(imageSource2).hexdigest()
86 | # print("Image Hash (MD5):", hash_value)
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
--------------------------------------------------------------------------------
/webjs/word3/f2/loadmodel.py:
--------------------------------------------------------------------------------
1 | from src.method.GTClick import GTClick
2 | from src.method.GTnine import GTnine
3 | from conf.config import gtconf
4 |
5 |
6 |
7 | gt3word = GTClick(
8 | pdetect = gtconf['word']['pdetect'],
9 | per = gtconf['word']['per'],
10 | pclass = gtconf['word']['pclass'],
11 | pclasstags = gtconf['word']['pclasstags'],
12 | chars_issorted = False,
13 | rmalpha = True,
14 | )
15 |
16 | gt3nine = GTnine(pclass=gtconf['nine']['pclass'])
17 |
18 | gt4icon = GTClick(
19 | pdetect = gtconf['icon4mi']['pdetect'],
20 | per = gtconf['icon4mi']['per'],
21 | pclass = gtconf['icon4mi']['pclass'],
22 | pclasstags = gtconf['icon4mi']['pclasstags'],
23 | chars_issorted = True,
24 | rmalpha = True,
25 | )
--------------------------------------------------------------------------------
/webjs/word3/f2/tools.py:
--------------------------------------------------------------------------------
1 | from urllib.parse import urljoin
2 | from urllib import request
3 | from typing import Union
4 | import os
5 |
6 | def download_img(url: Union[str, list], path: Union[str, list]) -> None:
7 | """
8 | 通过url下载图片,已经被 urllib.request 封装好了的
9 | :param url: 图片url
10 | :param path: 保存路径,带后缀名
11 | """
12 | if isinstance(url, str) and isinstance(path, str):
13 | if r'https://static.geetest.com' in url:
14 | pass
15 | else:
16 | url = urljoin('https://static.geetest.com', url)
17 | if r"/" in path or r"\\" in path:
18 | os.makedirs(os.path.dirname(path), exist_ok=True)
19 | request.urlretrieve(url, path)
20 |
21 | elif isinstance(url, list) and isinstance(path, list):
22 | assert len(url) == len(path), "url和path长度不一致"
23 | if r"/" in path or r"\\" in path:
24 | os.makedirs(os.path.dirname(path), exist_ok=True)
25 |
26 | for i in range(len(url)):
27 | if r'https://static.geetest.com' in url[i]:
28 | pass
29 | else:
30 | url[i] = urljoin('https://static.geetest.com', url[i])
31 | request.urlretrieve(url[i], path[i])
32 |
33 |
34 |
35 | def poses2geetest(poses: list) -> str:
36 | """
37 | 处理坐标,变为极验需要的样子
38 | 参数:
39 | poses: list: 坐标信息, 格式是: [[x1, y1, x2, y2], [x1, y1, x2, y2], ...] 需要转为极验需要的格式
40 | 返回:
41 | str: 返回处理后的坐标
42 | """
43 | new = []
44 | for pose in poses:
45 | x, y = (pose[0] + pose[2]) / 2, (pose[1] + pose[3]) / 2
46 | final_x = int(round(int(x) / 333.375 * 100 * 100, 0))
47 | final_y = int(round(int(y) / 333.375 * 100 * 100, 0))
48 | final = f'{final_x}_{final_y}'
49 | new.append(final)
50 | stringCodes = ','.join(new)
51 | return stringCodes
52 |
53 |
54 | cookies = None
55 |
56 | headers = {
57 | 'authority': 'passport.bilibili.com',
58 | 'accept': '*/*',
59 | 'accept-language': 'zh-CN,zh;q=0.9',
60 | 'cache-control': 'no-cache',
61 | 'origin': 'https://www.bilibili.com',
62 | 'pragma': 'no-cache',
63 | 'referer': 'https://www.bilibili.com',
64 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
65 | }
66 |
67 | HD= headers.copy()
68 |
--------------------------------------------------------------------------------