├── src
├── __init__.py
├── toolify
│ ├── __init__.py
│ ├── parser.py
│ ├── detector.py
│ ├── prompt.py
│ └── core.py
├── toolify_config.py
├── exceptions.py
├── models.py
├── constants.py
├── utils.py
├── toolify_handler.py
├── config.py
├── token_updater.py
├── token_manager.py
├── response_processor.py
└── api_handler.py
├── data
├── accounts.example.txt
└── tokens.example.txt
├── requirements.txt
├── .gitignore
├── .dockerignore
├── docker-compose.yml
├── Dockerfile
├── .github
└── workflows
│ ├── docker-test.yml
│ └── docker-build-push.yml
├── .env.example
├── get_tokens.py
├── tests
└── test_tool_calling.py
├── k2think_proxy.py
└── README.md
/src/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | K2Think API Proxy 源代码包
3 | """
--------------------------------------------------------------------------------
/data/accounts.example.txt:
--------------------------------------------------------------------------------
1 | # {"email": "user1@example.com", "k2_password": "password1"}
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | uvicorn[standard]
3 | httpx
4 | pydantic
5 | python-dotenv
6 | pytz
7 | requests
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | 工具调用参考/
3 | test/
4 | utils/
5 |
6 | .vscode/
7 |
8 | .env
9 | tokens.txt
10 | accounts.txt
11 | tokens.txt.backup
12 | *.pyc
13 | *.log
--------------------------------------------------------------------------------
/src/toolify/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Toolify 插件 - 为 LLM 提供工具调用能力
3 | 从 Toolify 项目提取的核心功能模块
4 | """
5 |
6 | from .core import ToolifyCore
7 | from .parser import parse_function_calls_xml, remove_think_blocks
8 | from .detector import StreamingFunctionCallDetector
9 | from .prompt import generate_function_prompt
10 |
11 | __all__ = [
12 | 'ToolifyCore',
13 | 'parse_function_calls_xml',
14 | 'remove_think_blocks',
15 | 'StreamingFunctionCallDetector',
16 | 'generate_function_prompt',
17 | ]
18 |
19 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Git相关
2 | .git
3 | .gitignore
4 |
5 | # Python相关
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 | *.so
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # 虚拟环境
28 | env/
29 | venv/
30 | ENV/
31 | env.bak/
32 | venv.bak/
33 |
34 | # IDE相关
35 | .vscode/
36 | .idea/
37 | *.swp
38 | *.swo
39 | *~
40 |
41 | # 配置和数据文件(这些通过volume挂载)
42 | .env
43 | .env.local
44 | .env.example
45 | tokens.txt
46 | tokens.example.txt
47 | tokens.txt.backup
48 |
49 | # 日志文件
50 | *.log
51 | logs/
52 |
53 | # 测试相关
54 | .pytest_cache/
55 | .coverage
56 | htmlcov/
57 | test/
58 |
59 | # Docker相关
60 | Dockerfile*
61 | docker-compose*.yml
62 | .dockerignore
63 |
64 | # 文档
65 | README.md
66 | *.md
67 |
68 | # 其他
69 | .DS_Store
70 | Thumbs.db
--------------------------------------------------------------------------------
/data/tokens.example.txt:
--------------------------------------------------------------------------------
1 | # K2Think Token文件示例
2 | # 每行一个token,以下为示例格式(请替换为实际的token)
3 |
4 | # 注意事项:
5 | # 1. 每行只能有一个token
6 | # 2. 空行和以#开头的注释行会被忽略
7 | # 3. Token失效时会自动标记,可通过API管理界面重置
8 | # 4. 建议至少配置2-3个token以确保高可用性
9 |
10 | # 以下放置几个可用token,随时可能失效,请自行替换自己的token
11 | eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IjlhMGE1ZDY5LWQ0ZDgtNGFiMC1hYjhjLTQ5ODNmY2NhZDM4NyIsImV4cCI6MTc1ODIwMjg4NX0.mTDsIrtO0iVTE5hhLcX1bTgmJHMydsHQqGKUsucEg_0
12 | eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6ImY2NmU3MWUwLWUyYTUtNGIwMi04MGY1LWE0Y2RiYjJjZTM1OSIsImV4cCI6MTc1ODIwMjg4Nn0.zcNU3ylq5YXFSFidgzQOXwoicqAefUnf9x1HtKFpY2I
13 | eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IjMxZWZmMWU3LTQwNTUtNDg3ZS04MzA1LWFiMDU5MTE1OTc0OSIsImV4cCI6MTc1ODIwMjkxM30.3lgvrmPo6esDsfAbVlkl37vRsN3EKYs6BXq45bvu9-E
14 | eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6ImIwZDY2YTZmLTAwN2MtNDNkNS1hZWExLWRkMzM2NjM1ZmUyNCIsImV4cCI6MTc1ODIwMjk0NX0.urcVmh_lBivvE6tNnCmVeDW5vW52GXoYFoqFh196T1I
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3.8'
2 |
3 | services:
4 | k2think-api:
5 | image: julienol/k2think2api:latest
6 | container_name: k2think-api
7 | ports:
8 | - "${HOST_PORT:-8001}:8001"
9 | volumes:
10 | # 使用目录挂载而非文件挂载,避免文件锁定问题
11 | - ./data:/app/data
12 | # 或者使用命名卷(推荐用于生产环境)
13 | # - k2think_data:/app/data
14 | # 直接以root用户运行,简化权限管理
15 | env_file:
16 | - .env
17 | environment:
18 | - PYTHONUNBUFFERED=1
19 | - PYTHONIOENCODING=utf-8
20 | - PYTHONLEGACYWINDOWSSTDIO=0
21 | - LC_ALL=C.UTF-8
22 | - LANG=C.UTF-8
23 | # 更新配置文件路径指向data目录
24 | - TOKENS_FILE=/app/data/tokens.txt
25 | - ACCOUNTS_FILE=/app/data/accounts.txt
26 | restart: unless-stopped
27 | # 健康检查
28 | healthcheck:
29 | test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
30 | interval: 30s
31 | timeout: 10s
32 | retries: 3
33 | start_period: 10s
34 |
35 | # 可选:使用命名卷(推荐用于生产环境)
36 | # volumes:
37 | # k2think_data:
38 | # driver: local
--------------------------------------------------------------------------------
/src/toolify_config.py:
--------------------------------------------------------------------------------
1 | """
2 | Toolify 配置模块
3 | 管理工具调用功能的配置和实例
4 | """
5 |
6 | import logging
7 | from typing import Optional
8 | from src.toolify import ToolifyCore
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 | # 全局 Toolify 实例
13 | _toolify_instance: Optional[ToolifyCore] = None
14 |
15 |
16 | def get_toolify() -> Optional[ToolifyCore]:
17 | """
18 | 获取 Toolify 实例(单例模式)
19 |
20 | Returns:
21 | ToolifyCore实例,如果功能未启用则返回None
22 | """
23 | global _toolify_instance
24 |
25 | # 延迟导入配置以避免循环依赖
26 | from src.config import Config
27 |
28 | if not Config.ENABLE_TOOLIFY:
29 | logger.debug("[TOOLIFY] 工具调用功能已禁用")
30 | return None
31 |
32 | if _toolify_instance is None:
33 | _toolify_instance = ToolifyCore(enable_function_calling=True)
34 | logger.info("[TOOLIFY] 工具调用功能已启用并初始化")
35 |
36 | return _toolify_instance
37 |
38 |
39 | def is_toolify_enabled() -> bool:
40 | """检查 Toolify 功能是否启用"""
41 | from src.config import Config
42 | return Config.ENABLE_TOOLIFY
43 |
44 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.12-slim
2 |
3 | # 安装curl用于健康检查
4 | RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
5 |
6 | # 设置环境变量 - 强化编码支持
7 | ENV PYTHONUNBUFFERED=1
8 | ENV PYTHONIOENCODING=utf-8
9 | ENV PYTHONLEGACYWINDOWSSTDIO=0
10 | ENV LC_ALL=C.UTF-8
11 | ENV LANG=C.UTF-8
12 |
13 | # 设置工作目录
14 | WORKDIR /app
15 |
16 | # 复制依赖文件并安装
17 | COPY requirements.txt .
18 | RUN pip install --no-cache-dir -r requirements.txt
19 |
20 | # 复制应用代码
21 | COPY k2think_proxy.py .
22 | COPY get_tokens.py .
23 | COPY src/ ./src/
24 |
25 | # 创建数据目录和默认文件
26 | RUN mkdir -p /app/data && \
27 | touch /app/data/tokens.txt && \
28 | echo "# Token文件将由自动更新服务生成" > /app/data/tokens.txt && \
29 | touch /app/data/accounts.txt && \
30 | echo "# 请通过volume挂载实际的accounts.txt文件" > /app/data/accounts.txt
31 |
32 | # 创建简单的启动脚本
33 | RUN echo '#!/bin/bash\n\
34 | # 确保数据目录存在\n\
35 | mkdir -p /app/data\n\
36 | # 直接运行应用\n\
37 | exec "$@"' > /app/entrypoint.sh && \
38 | chmod +x /app/entrypoint.sh
39 |
40 | # 暴露端口
41 | EXPOSE 8001
42 |
43 | # 健康检查
44 | HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
45 | CMD curl -f http://localhost:8001/health || exit 1
46 |
47 | # 设置entrypoint和默认命令
48 | ENTRYPOINT ["/app/entrypoint.sh"]
49 | CMD ["python", "k2think_proxy.py"]
--------------------------------------------------------------------------------
/.github/workflows/docker-test.yml:
--------------------------------------------------------------------------------
1 | name: Test Docker Build
2 |
3 | on:
4 | # 对PR进行测试构建,但不推送
5 | pull_request:
6 | branches:
7 | - main
8 | - master
9 | paths-ignore:
10 | - 'README.md'
11 | - '*.md'
12 | - '.gitignore'
13 | - 'LICENSE'
14 |
15 | jobs:
16 | test-build:
17 | runs-on: ubuntu-latest
18 |
19 | steps:
20 | - name: Checkout repository
21 | uses: actions/checkout@v4
22 |
23 | - name: Set up Docker Buildx
24 | uses: docker/setup-buildx-action@v3
25 |
26 | - name: Test Docker build
27 | uses: docker/build-push-action@v5
28 | with:
29 | context: .
30 | file: ./Dockerfile
31 | push: false
32 | tags: test:latest
33 | cache-from: type=gha
34 | cache-to: type=gha,mode=max
35 |
36 | - name: Test container startup
37 | run: |
38 | # 创建测试环境变量文件
39 | cat > .env.test << EOF
40 | VALID_API_KEY=test-key
41 | K2THINK_TOKEN=test-token
42 | TOOL_SUPPORT=true
43 | DEBUG_LOGGING=true
44 | HOST=0.0.0.0
45 | PORT=8001
46 | EOF
47 |
48 | # 启动容器
49 | docker run -d --name test-container -p 8001:8001 --env-file .env.test test:latest
50 |
51 | # 等待容器启动
52 | sleep 10
53 |
54 | # 测试健康检查
55 | curl -f http://localhost:8001/health || exit 1
56 |
57 | # 测试模型接口
58 | curl -f http://localhost:8001/v1/models || exit 1
59 |
60 | # 停止容器
61 | docker stop test-container
62 | docker rm test-container
63 |
64 | echo "✅ Docker container test passed!"
65 |
--------------------------------------------------------------------------------
/src/exceptions.py:
--------------------------------------------------------------------------------
1 | """
2 | 自定义异常类模块
3 | 统一管理所有自定义异常
4 | """
5 |
6 | class K2ThinkProxyError(Exception):
7 | """K2Think代理服务基础异常类"""
8 | def __init__(self, message: str, error_type: str = "api_error", status_code: int = 500):
9 | self.message = message
10 | self.error_type = error_type
11 | self.status_code = status_code
12 | super().__init__(self.message)
13 |
14 | class ConfigurationError(K2ThinkProxyError):
15 | """配置错误异常"""
16 | def __init__(self, message: str):
17 | super().__init__(message, "configuration_error", 500)
18 |
19 | class AuthenticationError(K2ThinkProxyError):
20 | """认证错误异常"""
21 | def __init__(self, message: str = "Invalid API key provided"):
22 | super().__init__(message, "authentication_error", 401)
23 |
24 | class UpstreamError(K2ThinkProxyError):
25 | """上游服务错误异常"""
26 | def __init__(self, message: str, status_code: int = 502):
27 | super().__init__(message, "upstream_error", status_code)
28 |
29 | class TimeoutError(K2ThinkProxyError):
30 | """超时错误异常"""
31 | def __init__(self, message: str = "请求超时"):
32 | super().__init__(message, "timeout_error", 504)
33 |
34 | class SerializationError(K2ThinkProxyError):
35 | """序列化错误异常"""
36 | def __init__(self, message: str = "请求数据序列化失败"):
37 | super().__init__(message, "serialization_error", 400)
38 |
39 | class ToolProcessingError(K2ThinkProxyError):
40 | """工具处理错误异常"""
41 | def __init__(self, message: str):
42 | super().__init__(message, "tool_processing_error", 400)
43 |
44 | class ContentProcessingError(K2ThinkProxyError):
45 | """内容处理错误异常"""
46 | def __init__(self, message: str):
47 | super().__init__(message, "content_processing_error", 400)
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | # K2Think API 代理服务环境变量配置文件
2 | # 复制此文件为 .env 并根据实际情况修改配置值
3 | # 思考内容输出控制: MBZUAI-IFM/K2-Think(显示思考) / MBZUAI-IFM/K2-Think-nothink(不显示思考)
4 |
5 | # API认证配置
6 | VALID_API_KEY=sk-k2think # 客户端访问API时使用的密钥
7 | # K2THINK_TOKEN=your_k2think_jwt_token_here # 从K2Think官网获取的JWT Token
8 |
9 | # 服务器配置
10 | HOST=0.0.0.0 # 监听地址,0.0.0.0为所有接口,127.0.0.1为仅本地
11 | PORT=8001 # 服务监听端口
12 |
13 | # 上游API配置
14 | K2THINK_API_URL=https://www.k2think.ai/api/chat/completions # K2Think API完整URL
15 |
16 | # 工具调用配置
17 | ENABLE_TOOLIFY=true
18 |
19 | # Token管理配置
20 | # Token文件路径(每行一个token)
21 | TOKENS_FILE=data/tokens.txt
22 |
23 | # Token最大失败次数(超过后将被标记为失效)
24 | MAX_TOKEN_FAILURES=3
25 |
26 | # Token自动更新配置
27 | ENABLE_TOKEN_AUTO_UPDATE=false # 是否启用token自动更新
28 | TOKEN_UPDATE_INTERVAL=3600 # token更新间隔(秒),默认1小时
29 | ACCOUNTS_FILE=data/accounts.txt # 账户文件路径
30 | GET_TOKENS_SCRIPT=get_tokens.py # token获取脚本路径
31 |
32 | # 代理配置(用于get_tokens.py)
33 | # PROXY_URL=http://username:password@proxy_host:proxy_port # HTTP/HTTPS代理地址,留空则不使用代理
34 | PROXY_URL="" # 示例: http://admin:sk-123456@192.168.10.100:8282
35 |
36 | # 调试配置
37 | LOG_LEVEL=INFO # 调试日志级别: DEBUG/INFO/WARNING/ERROR
38 | DEBUG_LOGGING=false # 是否启用详细请求日志
39 |
40 | # 高级配置
41 | REQUEST_TIMEOUT=60 # HTTP请求超时时间(秒)
42 | MAX_KEEPALIVE_CONNECTIONS=20 # 最大保持连接数
43 | MAX_CONNECTIONS=100 # 最大连接数
44 |
45 | # 性能配置
46 | STREAM_DELAY=0.05 # 流式响应模拟延迟(秒)
47 | STREAM_CHUNK_SIZE=50 # 流式响应块大小(字符数)
48 | MAX_STREAM_TIME=6 # 流式响应块最大用时(秒)
49 |
50 | # 部署配置
51 | APP_ENV=development # 应用环境: development/production/testing
52 | ENABLE_ACCESS_LOG=true # 是否启用访问日志
53 | CORS_ORIGINS=* # CORS允许的源
54 |
55 | # 使用说明:
56 | # 1. 必须配置: VALID_API_KEY, TOKENS_FILE (tokens.txt文件,每行一个token)
57 | # 2. 推荐配置: HOST, PORT
58 | # 3. 可选配置: TOOL_SUPPORT, DEBUG_LOGGING
59 | # 4. Token自动更新: 设置ENABLE_TOKEN_AUTO_UPDATE=true并提供accounts.txt文件
60 |
--------------------------------------------------------------------------------
/.github/workflows/docker-build-push.yml:
--------------------------------------------------------------------------------
1 | name: Build and Push Docker Image
2 |
3 | on:
4 | # 自动触发:当推送到main分支时
5 | push:
6 | branches:
7 | - main
8 | - master
9 | # 忽略README等文档文件的更改
10 | paths-ignore:
11 | - 'README.md'
12 | - '*.md'
13 | - '.gitignore'
14 | - 'LICENSE'
15 |
16 | # 手动触发
17 | workflow_dispatch:
18 | inputs:
19 | tag:
20 | description: 'Docker image tag (default: latest)'
21 | required: false
22 | default: 'latest'
23 |
24 | env:
25 | REGISTRY: docker.io
26 | IMAGE_NAME: julienol/k2think2api
27 |
28 | jobs:
29 | build-and-push:
30 | runs-on: ubuntu-latest
31 |
32 | steps:
33 | - name: Checkout repository
34 | uses: actions/checkout@v4
35 |
36 | - name: Set up Docker Buildx
37 | uses: docker/setup-buildx-action@v3
38 |
39 | - name: Log in to Docker Hub
40 | uses: docker/login-action@v3
41 | with:
42 | username: ${{ secrets.DOCKER_USERNAME }}
43 | password: ${{ secrets.DOCKER_PASSWORD }}
44 |
45 | - name: Extract metadata
46 | id: meta
47 | uses: docker/metadata-action@v5
48 | with:
49 | images: ${{ env.IMAGE_NAME }}
50 | tags: |
51 | type=ref,event=branch
52 | type=ref,event=pr
53 | type=sha,prefix={{branch}}-
54 | type=raw,value=latest,enable={{is_default_branch}}
55 | type=raw,value=${{ github.event.inputs.tag }},enable=${{ github.event_name == 'workflow_dispatch' }}
56 |
57 | - name: Build and push Docker image
58 | uses: docker/build-push-action@v5
59 | with:
60 | context: .
61 | file: ./Dockerfile
62 | push: true
63 | tags: ${{ steps.meta.outputs.tags }}
64 | labels: ${{ steps.meta.outputs.labels }}
65 | platforms: linux/amd64,linux/arm64
66 | cache-from: type=gha
67 | cache-to: type=gha,mode=max
68 |
69 | - name: Update Docker Hub description
70 | uses: peter-evans/dockerhub-description@v4
71 | with:
72 | username: ${{ secrets.DOCKER_USERNAME }}
73 | password: ${{ secrets.DOCKER_PASSWORD }}
74 | repository: ${{ env.IMAGE_NAME }}
75 | readme-filepath: ./README.md
--------------------------------------------------------------------------------
/src/models.py:
--------------------------------------------------------------------------------
1 | """
2 | 数据模型定义
3 | 定义所有API请求和响应的数据模型
4 | """
5 | from pydantic import BaseModel
6 | from typing import List, Dict, Optional, Union, Any
7 |
8 | class ImageUrl(BaseModel):
9 | """Image URL model for vision content"""
10 | url: str
11 | detail: Optional[str] = "auto"
12 |
13 | class ContentPart(BaseModel):
14 | """Content part model for OpenAI's new content format"""
15 | type: str
16 | text: Optional[str] = None
17 | image_url: Optional[ImageUrl] = None
18 |
19 | class Message(BaseModel):
20 | role: str
21 | content: Optional[Union[str, List[ContentPart]]] = None
22 | tool_call_id: Optional[str] = None # 用于tool消息
23 | tool_calls: Optional[List[Dict[str, Any]]] = None # 用于assistant消息
24 |
25 | class FunctionParameters(BaseModel):
26 | """Function parameters schema"""
27 | type: str = "object"
28 | properties: Dict[str, Any] = {}
29 | required: Optional[List[str]] = None
30 |
31 | class FunctionDefinition(BaseModel):
32 | """Function definition"""
33 | name: str
34 | description: Optional[str] = None
35 | parameters: Optional[FunctionParameters] = None
36 |
37 | class ToolDefinition(BaseModel):
38 | """Tool definition"""
39 | type: str = "function"
40 | function: FunctionDefinition
41 |
42 | class ToolChoice(BaseModel):
43 | """Tool choice specification"""
44 | type: str = "function"
45 | function: Dict[str, str] # {"name": "tool_name"}
46 |
47 | class ChatCompletionRequest(BaseModel):
48 | model: str = "MBZUAI-IFM/K2-Think"
49 | messages: List[Message]
50 | stream: bool = False
51 | temperature: float = 0.7
52 | max_tokens: Optional[int] = None
53 | top_p: Optional[float] = None
54 | frequency_penalty: Optional[float] = None
55 | presence_penalty: Optional[float] = None
56 | stop: Optional[Union[str, List[str]]] = None
57 | # 工具调用相关字段
58 | tools: Optional[List[ToolDefinition]] = None
59 | tool_choice: Optional[Union[str, ToolChoice]] = None # "auto", "none", 或指定工具
60 |
61 | class ModelInfo(BaseModel):
62 | id: str
63 | object: str = "model"
64 | created: int
65 | owned_by: str
66 | permission: List[Dict] = []
67 | root: str
68 | parent: Optional[str] = None
69 |
70 | class ModelsResponse(BaseModel):
71 | object: str = "list"
72 | data: List[ModelInfo]
--------------------------------------------------------------------------------
/src/constants.py:
--------------------------------------------------------------------------------
1 | """
2 | 常量定义模块
3 | 统一管理所有魔法数字和硬编码字符串
4 | """
5 |
6 | # API相关常量
7 | class APIConstants:
8 | MODEL_ID = "MBZUAI-IFM/K2-Think"
9 | MODEL_ID_NOTHINK = "MBZUAI-IFM/K2-Think-nothink"
10 | MODEL_OWNER = "MBZUAI"
11 | MODEL_ROOT = "mbzuai-k2-think-2508"
12 |
13 | # HTTP状态码
14 | HTTP_OK = 200
15 | HTTP_UNAUTHORIZED = 401
16 | HTTP_NOT_FOUND = 404
17 | HTTP_INTERNAL_ERROR = 500
18 | HTTP_GATEWAY_TIMEOUT = 504
19 |
20 | # 认证相关
21 | BEARER_PREFIX = "Bearer "
22 | BEARER_PREFIX_LENGTH = 7
23 |
24 | # 响应相关常量
25 | class ResponseConstants:
26 | CHAT_COMPLETION_OBJECT = "chat.completion"
27 | CHAT_COMPLETION_CHUNK_OBJECT = "chat.completion.chunk"
28 | MODEL_OBJECT = "model"
29 | LIST_OBJECT = "list"
30 |
31 | # 完成原因
32 | FINISH_REASON_STOP = "stop"
33 | FINISH_REASON_ERROR = "error"
34 |
35 | # 流式响应标记
36 | STREAM_DONE_MARKER = "data: [DONE]\n\n"
37 | STREAM_DATA_PREFIX = "data: "
38 |
39 | # 内容处理相关常量
40 | class ContentConstants:
41 | # XML标签
42 | THINK_START_TAG = ""
43 | THINK_END_TAG = ""
44 | ANSWER_START_TAG = ""
45 | ANSWER_END_TAG = ""
46 |
47 | # 内容类型
48 | TEXT_TYPE = "text"
49 | IMAGE_URL_TYPE = "image_url"
50 |
51 | # 图像占位符
52 | IMAGE_PLACEHOLDER = "[图像内容]"
53 |
54 | # 默认值
55 | DEFAULT_USER_NAME = "User"
56 | DEFAULT_USER_LOCATION = "Unknown"
57 | DEFAULT_USER_LANGUAGE = "en-US"
58 | DEFAULT_TIMEZONE = "Asia/Shanghai"
59 |
60 | # 错误消息常量
61 | class ErrorMessages:
62 | INVALID_API_KEY = "Invalid API key provided"
63 | AUTHENTICATION_ERROR = "authentication_error"
64 | UPSTREAM_ERROR = "upstream_error"
65 | TIMEOUT_ERROR = "timeout_error"
66 | API_ERROR = "api_error"
67 |
68 | # 中文错误消息
69 | REQUEST_TIMEOUT = "请求超时"
70 | SERIALIZATION_FAILED = "请求数据序列化失败"
71 | UPSTREAM_SERVICE_ERROR = "上游服务错误"
72 |
73 | # 日志消息常量
74 | class LogMessages:
75 | MESSAGE_RECEIVED = "📥 接收到的原始消息数: {}"
76 | ROLE_DISTRIBUTION = "📊 {}消息角色分布: {}"
77 | JSON_VALIDATION_SUCCESS = "✅ K2Think请求体JSON序列化验证通过"
78 | JSON_VALIDATION_FAILED = "❌ K2Think请求体JSON序列化失败: {}"
79 | JSON_FIXED = "🔧 使用default=str修复了序列化问题"
80 |
81 | # 动态chunk计算日志
82 | DYNAMIC_CHUNK_CALC = "动态chunk_size计算: 内容长度={}, 计算值={}, 最终值={}"
83 |
84 | # HTTP头常量
85 | class HeaderConstants:
86 | AUTHORIZATION = "Authorization"
87 | CONTENT_TYPE = "Content-Type"
88 | ACCEPT = "Accept"
89 | ORIGIN = "Origin"
90 | REFERER = "Referer"
91 | USER_AGENT = "User-Agent"
92 | CACHE_CONTROL = "Cache-Control"
93 | CONNECTION = "Connection"
94 | X_ACCEL_BUFFERING = "X-Accel-Buffering"
95 |
96 | # 值
97 | APPLICATION_JSON = "application/json"
98 | TEXT_EVENT_STREAM = "text/event-stream"
99 | EVENT_STREAM_JSON = "text/event-stream,application/json"
100 | NO_CACHE = "no-cache"
101 | KEEP_ALIVE = "keep-alive"
102 | NO_BUFFERING = "no"
103 |
104 | # User-Agent值
105 | DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0"
106 |
107 | # 时间相关常量
108 | class TimeConstants:
109 | # 时间格式
110 | DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
111 | DATE_FORMAT = "%Y-%m-%d"
112 | TIME_FORMAT = "%H:%M:%S"
113 | WEEKDAY_FORMAT = "%A"
114 |
115 | # 微秒转换
116 | MICROSECONDS_MULTIPLIER = 1000000
117 |
118 | # 数值常量
119 | class NumericConstants:
120 | # chunk大小限制
121 | MIN_CHUNK_SIZE = 50
122 |
123 | # 内容预览长度
124 | CONTENT_PREVIEW_LENGTH = 200
125 | CONTENT_PREVIEW_SUFFIX = "..."
126 |
127 | # 默认token使用量
128 | DEFAULT_PROMPT_TOKENS = 0
129 | DEFAULT_COMPLETION_TOKENS = 0
130 | DEFAULT_TOTAL_TOKENS = 0
--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | 工具函数模块
3 | 提供通用的工具函数
4 | """
5 | import logging
6 | import sys
7 |
8 | def safe_log_error(logger: logging.Logger, message: str, exception: Exception = None):
9 | """
10 | 安全地记录错误日志,避免编码问题
11 |
12 | Args:
13 | logger: 日志记录器
14 | message: 错误消息
15 | exception: 异常对象(可选)
16 | """
17 | try:
18 | # 确保消息是字符串类型
19 | if not isinstance(message, str):
20 | message = str(message)
21 |
22 | if exception:
23 | # 安全地处理异常信息,避免编码问题
24 | try:
25 | error_msg = str(exception)
26 | # 处理可能的编码问题
27 | if isinstance(error_msg, bytes):
28 | error_msg = error_msg.decode('utf-8', errors='replace')
29 | else:
30 | error_msg = error_msg.encode('utf-8', errors='replace').decode('utf-8')
31 | except Exception:
32 | error_msg = repr(exception)
33 |
34 | full_message = f"{message}: {error_msg}"
35 | else:
36 | full_message = message
37 |
38 | # 确保消息本身也是安全的
39 | try:
40 | if isinstance(full_message, bytes):
41 | safe_message = full_message.decode('utf-8', errors='replace')
42 | else:
43 | safe_message = full_message.encode('utf-8', errors='replace').decode('utf-8')
44 | except Exception:
45 | safe_message = repr(full_message)
46 |
47 | logger.error(safe_message)
48 |
49 | except Exception as e:
50 | # 如果连安全日志都失败了,使用最基本的方式记录
51 | try:
52 | fallback_msg = f"Logging error: {repr(e)}, Original: {repr(message)}"
53 | logger.error(fallback_msg)
54 | except Exception:
55 | # 最后的保险措施 - 直接打印到控制台
56 | try:
57 | print(f"CRITICAL LOGGING FAILURE: {repr(message)}", file=sys.stderr)
58 | except Exception:
59 | pass # 如果连print都失败了,就放弃
60 |
61 | def safe_log_info(logger: logging.Logger, message: str):
62 | """
63 | 安全地记录信息日志,避免编码问题
64 |
65 | Args:
66 | logger: 日志记录器
67 | message: 信息消息
68 | """
69 | try:
70 | # 确保消息是字符串类型
71 | if not isinstance(message, str):
72 | message = str(message)
73 |
74 | # 确保消息是安全的
75 | try:
76 | if isinstance(message, bytes):
77 | safe_message = message.decode('utf-8', errors='replace')
78 | else:
79 | safe_message = message.encode('utf-8', errors='replace').decode('utf-8')
80 | except Exception:
81 | safe_message = repr(message)
82 |
83 | logger.info(safe_message)
84 |
85 | except Exception as e:
86 | try:
87 | fallback_msg = f"Logging info error: {repr(e)}, Original: {repr(message)}"
88 | logger.info(fallback_msg)
89 | except Exception:
90 | try:
91 | print(f"CRITICAL INFO LOGGING FAILURE: {repr(message)}", file=sys.stderr)
92 | except Exception:
93 | pass
94 |
95 | def safe_log_warning(logger: logging.Logger, message: str):
96 | """
97 | 安全地记录警告日志,避免编码问题
98 |
99 | Args:
100 | logger: 日志记录器
101 | message: 警告消息
102 | """
103 | try:
104 | # 确保消息是字符串类型
105 | if not isinstance(message, str):
106 | message = str(message)
107 |
108 | # 确保消息是安全的
109 | try:
110 | if isinstance(message, bytes):
111 | safe_message = message.decode('utf-8', errors='replace')
112 | else:
113 | safe_message = message.encode('utf-8', errors='replace').decode('utf-8')
114 | except Exception:
115 | safe_message = repr(message)
116 |
117 | logger.warning(safe_message)
118 |
119 | except Exception as e:
120 | try:
121 | fallback_msg = f"Logging warning error: {repr(e)}, Original: {repr(message)}"
122 | logger.warning(fallback_msg)
123 | except Exception:
124 | try:
125 | print(f"CRITICAL WARNING LOGGING FAILURE: {repr(message)}", file=sys.stderr)
126 | except Exception:
127 | pass
128 |
129 | def safe_str(obj) -> str:
130 | """
131 | 安全地将对象转换为字符串,避免编码问题
132 |
133 | Args:
134 | obj: 要转换的对象
135 |
136 | Returns:
137 | str: 安全的字符串表示
138 | """
139 | try:
140 | if isinstance(obj, bytes):
141 | return obj.decode('utf-8', errors='replace')
142 | elif isinstance(obj, str):
143 | return obj.encode('utf-8', errors='replace').decode('utf-8')
144 | else:
145 | return str(obj).encode('utf-8', errors='replace').decode('utf-8')
146 | except Exception:
147 | return repr(obj)
--------------------------------------------------------------------------------
/src/toolify/parser.py:
--------------------------------------------------------------------------------
1 | """
2 | Toolify XML 解析器
3 | 解析模型响应中的工具调用XML格式
4 | """
5 |
6 | import re
7 | import json
8 | import logging
9 | from typing import List, Dict, Any, Optional
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | def remove_think_blocks(text: str) -> str:
15 | """
16 | 临时移除所有 ... 块用于XML解析
17 | 支持嵌套think标签
18 | 注意:此函数仅用于临时解析,不影响返回给用户的原始内容
19 | """
20 | while '' in text and '' in text:
21 | start_pos = text.find('')
22 | if start_pos == -1:
23 | break
24 |
25 | pos = start_pos + 7
26 | depth = 1
27 |
28 | while pos < len(text) and depth > 0:
29 | if text[pos:pos+7] == '':
30 | depth += 1
31 | pos += 7
32 | elif text[pos:pos+8] == '':
33 | depth -= 1
34 | pos += 8
35 | else:
36 | pos += 1
37 |
38 | if depth == 0:
39 | text = text[:start_pos] + text[pos:]
40 | else:
41 | break
42 |
43 | return text
44 |
45 |
46 | def parse_function_calls_xml(xml_string: str, trigger_signal: str) -> Optional[List[Dict[str, Any]]]:
47 | """
48 | 增强型XML解析函数,支持动态触发信号
49 |
50 | 1. 保留 ... 块(它们应正常返回给用户)
51 | 2. 解析时临时移除think块,防止干扰XML解析
52 | 3. 查找触发信号的最后一次出现
53 | 4. 从最后一个触发信号开始解析function_calls
54 |
55 | Args:
56 | xml_string: 包含XML的响应字符串
57 | trigger_signal: 触发信号字符串
58 |
59 | Returns:
60 | 解析出的工具调用列表,格式为 [{"name": "tool_name", "args": {...}}, ...]
61 | 如果没有找到工具调用,返回None
62 | """
63 | logger.debug(f"[TOOLIFY] 开始解析XML,输入长度: {len(xml_string) if xml_string else 0}")
64 | logger.debug(f"[TOOLIFY] 使用触发信号: {trigger_signal[:20]}...")
65 |
66 | if not xml_string or trigger_signal not in xml_string:
67 | logger.debug(f"[TOOLIFY] 输入为空或不包含触发信号")
68 | return None
69 |
70 | # 临时移除think块用于解析
71 | cleaned_content = remove_think_blocks(xml_string)
72 | logger.debug(f"[TOOLIFY] 移除think块后内容长度: {len(cleaned_content)}")
73 |
74 | # 查找所有触发信号位置
75 | signal_positions = []
76 | start_pos = 0
77 | while True:
78 | pos = cleaned_content.find(trigger_signal, start_pos)
79 | if pos == -1:
80 | break
81 | signal_positions.append(pos)
82 | start_pos = pos + 1
83 |
84 | if not signal_positions:
85 | logger.debug(f"[TOOLIFY] 在清理后的内容中未找到触发信号")
86 | return None
87 |
88 | logger.debug(f"[TOOLIFY] 找到 {len(signal_positions)} 个触发信号位置: {signal_positions}")
89 |
90 | # 使用最后一个触发信号位置
91 | last_signal_pos = signal_positions[-1]
92 | content_after_signal = cleaned_content[last_signal_pos:]
93 | logger.debug(f"[TOOLIFY] 从最后触发信号开始的内容: {repr(content_after_signal[:100])}")
94 |
95 | # 查找function_calls标签
96 | calls_content_match = re.search(r"([\s\S]*?)", content_after_signal)
97 | if not calls_content_match:
98 | logger.warning(f"[TOOLIFY] 未找到function_calls标签!内容: {repr(content_after_signal[:300])}")
99 | # 检查是否有不完整的function_calls开始标签
100 | if "([\s\S]*?)", calls_content)
110 | logger.debug(f"[TOOLIFY] 找到 {len(call_blocks)} 个function_call块")
111 |
112 | for i, block in enumerate(call_blocks):
113 | logger.debug(f"[TOOLIFY] 处理function_call #{i+1}: {repr(block)}")
114 |
115 | # 提取tool名称
116 | tool_match = re.search(r"(.*?)", block)
117 | if not tool_match:
118 | logger.debug(f"[TOOLIFY] 块 #{i+1} 中未找到tool标签")
119 | continue
120 |
121 | name = tool_match.group(1).strip()
122 | args = {}
123 |
124 | # 提取args块
125 | args_block_match = re.search(r"([\s\S]*?)", block)
126 | if args_block_match:
127 | args_content = args_block_match.group(1)
128 | # 支持包含连字符的参数标签名(如-i, -A);匹配任何非空格、非'>'、非'/'字符
129 | arg_matches = re.findall(r"<([^\s>/]+)>([\s\S]*?)\1>", args_content)
130 |
131 | def _coerce_value(v: str):
132 | """尝试将字符串值转换为JSON对象"""
133 | try:
134 | return json.loads(v)
135 | except Exception:
136 | pass
137 | return v
138 |
139 | for k, v in arg_matches:
140 | args[k] = _coerce_value(v)
141 |
142 | result = {"name": name, "args": args}
143 | results.append(result)
144 | logger.debug(f"[TOOLIFY] 添加工具调用: {result}")
145 |
146 | logger.debug(f"[TOOLIFY] 最终解析结果: {results}")
147 | return results if results else None
148 |
149 |
--------------------------------------------------------------------------------
/src/toolify_handler.py:
--------------------------------------------------------------------------------
1 | """
2 | Toolify 请求和响应处理模块
3 | 处理工具调用相关的请求预处理和响应解析
4 | """
5 |
6 | import json
7 | import logging
8 | import uuid
9 | from typing import Dict, Any, List, Optional
10 |
11 | from src.toolify_config import get_toolify, is_toolify_enabled
12 | from src.toolify.prompt import generate_function_prompt, safe_process_tool_choice
13 | from src.toolify.parser import parse_function_calls_xml
14 |
15 | logger = logging.getLogger(__name__)
16 |
17 |
18 | def should_enable_toolify(request_dict: Dict[str, Any]) -> bool:
19 | """
20 | 判断是否应该为当前请求启用工具调用功能
21 |
22 | Args:
23 | request_dict: 请求字典
24 |
25 | Returns:
26 | 是否启用工具调用
27 | """
28 | if not is_toolify_enabled():
29 | return False
30 |
31 | # 检查请求中是否包含tools
32 | has_tools = request_dict.get("tools") and len(request_dict.get("tools", [])) > 0
33 |
34 | return has_tools
35 |
36 |
37 | def prepare_toolify_request(request_dict: Dict[str, Any], messages: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], bool]:
38 | """
39 | 准备带工具调用的请求
40 |
41 | Args:
42 | request_dict: 原始请求字典
43 | messages: 消息列表
44 |
45 | Returns:
46 | (处理后的消息列表, 是否启用了工具调用)
47 | """
48 | toolify = get_toolify()
49 | if not toolify:
50 | return messages, False
51 |
52 | tools = request_dict.get("tools")
53 | if not tools or len(tools) == 0:
54 | return messages, False
55 |
56 | logger.info(f"[TOOLIFY] 检测到 {len(tools)} 个工具定义,启用工具调用功能")
57 |
58 | # 预处理消息(转换tool和tool_calls)
59 | processed_messages = toolify.preprocess_messages(messages)
60 | logger.debug(f"[TOOLIFY] 消息预处理完成: {len(messages)} -> {len(processed_messages)}")
61 |
62 | # 生成工具调用提示词
63 | from src.config import Config
64 | function_prompt, trigger_signal = generate_function_prompt(
65 | tools,
66 | toolify.trigger_signal,
67 | Config.TOOLIFY_CUSTOM_PROMPT
68 | )
69 |
70 | # 处理 tool_choice
71 | tool_choice = request_dict.get("tool_choice")
72 | tool_choice_prompt = safe_process_tool_choice(tool_choice)
73 | if tool_choice_prompt:
74 | function_prompt += tool_choice_prompt
75 |
76 | # 在消息开头注入系统提示词
77 | system_message = {"role": "system", "content": function_prompt}
78 | processed_messages.insert(0, system_message)
79 |
80 | logger.debug(f"[TOOLIFY] 已注入工具调用系统提示词,消息数: {len(processed_messages)}")
81 |
82 | return processed_messages, True
83 |
84 |
85 | def parse_toolify_response(content: str, model: str) -> Optional[Dict[str, Any]]:
86 | """
87 | 解析响应中的工具调用
88 |
89 | Args:
90 | content: 响应内容
91 | model: 模型名称
92 |
93 | Returns:
94 | 如果检测到工具调用,返回包含tool_calls的响应字典;否则返回None
95 | """
96 | toolify = get_toolify()
97 | if not toolify:
98 | return None
99 |
100 | logger.debug(f"[TOOLIFY] 开始解析响应中的工具调用,内容长度: {len(content)}")
101 |
102 | # 解析 XML 格式的工具调用
103 | parsed_tools = parse_function_calls_xml(content, toolify.trigger_signal)
104 |
105 | if not parsed_tools:
106 | logger.debug("[TOOLIFY] 未检测到工具调用")
107 | return None
108 |
109 | logger.info(f"[TOOLIFY] 检测到 {len(parsed_tools)} 个工具调用")
110 |
111 | # 转换为 OpenAI 格式
112 | tool_calls = toolify.convert_parsed_tools_to_openai_format(parsed_tools)
113 |
114 | return {
115 | "tool_calls": tool_calls,
116 | "content": None,
117 | "role": "assistant"
118 | }
119 |
120 |
121 | def format_toolify_response_for_stream(parsed_tools: List[Dict[str, Any]], model: str, chat_id: str) -> List[str]:
122 | """
123 | 格式化工具调用为流式响应块
124 |
125 | Args:
126 | parsed_tools: 解析出的工具列表
127 | model: 模型名称
128 | chat_id: 会话ID
129 |
130 | Returns:
131 | SSE格式的响应块列表
132 | """
133 | toolify = get_toolify()
134 | if not toolify:
135 | return []
136 |
137 | tool_calls = toolify.convert_parsed_tools_to_openai_format(parsed_tools)
138 | chunks: List[str] = []
139 |
140 | # 初始块 - 发送角色和tool_calls
141 | initial_chunk = {
142 | "id": chat_id,
143 | "object": "chat.completion.chunk",
144 | "created": int(uuid.uuid4().time_low),
145 | "model": model,
146 | "choices": [{
147 | "index": 0,
148 | "delta": {
149 | "role": "assistant",
150 | "content": None,
151 | "tool_calls": tool_calls
152 | },
153 | "finish_reason": None
154 | }],
155 | }
156 | chunks.append(f"data: {json.dumps(initial_chunk)}\n\n")
157 |
158 | # 结束块
159 | final_chunk = {
160 | "id": chat_id,
161 | "object": "chat.completion.chunk",
162 | "created": int(uuid.uuid4().time_low),
163 | "model": model,
164 | "choices": [{
165 | "index": 0,
166 | "delta": {},
167 | "finish_reason": "tool_calls"
168 | }],
169 | }
170 | chunks.append(f"data: {json.dumps(final_chunk)}\n\n")
171 | chunks.append("data: [DONE]\n\n")
172 |
173 | return chunks
174 |
175 |
--------------------------------------------------------------------------------
/src/toolify/detector.py:
--------------------------------------------------------------------------------
1 | """
2 | Toolify 流式检测器
3 | 用于在流式响应中检测工具调用
4 | """
5 |
6 | import logging
7 | from typing import Optional, List, Dict, Any
8 | from .parser import parse_function_calls_xml
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | class StreamingFunctionCallDetector:
14 | """
15 | 增强型流式函数调用检测器,支持动态触发信号,避免在标签内误判
16 |
17 | 核心特性:
18 | 1. 避免在块内触发工具调用检测
19 | 2. 正常输出块内容给用户
20 | 3. 支持嵌套think标签
21 | """
22 |
23 | def __init__(self, trigger_signal: str):
24 | self.trigger_signal = trigger_signal
25 | self.reset()
26 |
27 | def reset(self):
28 | """重置检测器状态"""
29 | self.content_buffer = ""
30 | self.state = "detecting" # detecting, signal_detected, tool_parsing
31 | self.in_think_block = False
32 | self.think_depth = 0
33 | self.signal = self.trigger_signal
34 | self.signal_len = len(self.signal)
35 | self.signal_position = -1 # 记录触发信号的位置
36 |
37 | def process_chunk(self, delta_content: str) -> tuple[bool, str]:
38 | """
39 | 处理流式内容块
40 |
41 | Args:
42 | delta_content: 新的内容块
43 |
44 | Returns:
45 | (is_tool_call_detected, content_to_yield): 是否检测到工具调用,以及应该输出的内容
46 | """
47 | if not delta_content:
48 | return False, ""
49 |
50 | self.content_buffer += delta_content
51 | content_to_yield = ""
52 |
53 | if self.state == "tool_parsing":
54 | # 已经在解析工具调用,继续累积内容
55 | logger.debug(f"[TOOLIFY-DETECTOR] 状态已是tool_parsing,继续累积,缓冲区长度: {len(self.content_buffer)}")
56 | return False, ""
57 |
58 | if self.state == "signal_detected":
59 | # 已检测到触发信号,等待标签
60 | logger.debug(f"[TOOLIFY-DETECTOR] 状态是signal_detected,检查是否有,缓冲区长度: {len(self.content_buffer)}")
61 | if "" in self.content_buffer:
62 | logger.debug(f"[TOOLIFY-DETECTOR] 确认有标签,进入tool_parsing状态")
63 | self.state = "tool_parsing"
64 | return True, ""
65 | elif len(self.content_buffer) > 300:
66 | # 触发信号后300字符内还没有,认为是误判
67 | logger.debug(f"[TOOLIFY-DETECTOR] 触发信号后300字符内未发现,视为误判,恢复正常输出")
68 | self.state = "detecting"
69 | # 输出所有缓冲的内容
70 | output = self.content_buffer
71 | self.content_buffer = ""
72 | self.signal_position = -1
73 | return False, output
74 | else:
75 | # 继续等待
76 | return False, ""
77 |
78 | if delta_content:
79 | logger.debug(f"[TOOLIFY-DETECTOR] 处理块: {repr(delta_content[:50])}{'...' if len(delta_content) > 50 else ''}, 缓冲区长度: {len(self.content_buffer)}, think状态: {self.in_think_block}")
80 |
81 | i = 0
82 | while i < len(self.content_buffer):
83 | # 更新think状态
84 | skip_chars = self._update_think_state(i)
85 | if skip_chars > 0:
86 | for j in range(skip_chars):
87 | if i + j < len(self.content_buffer):
88 | content_to_yield += self.content_buffer[i + j]
89 | i += skip_chars
90 | continue
91 |
92 | # 在非think块中检测触发信号
93 | if not self.in_think_block and self._can_detect_signal_at(i):
94 | if self.content_buffer[i:i+self.signal_len] == self.signal:
95 | # 检测到触发信号
96 | logger.debug(f"[TOOLIFY-DETECTOR] 在非think块中检测到触发信号! 信号: {self.signal[:20]}...")
97 | logger.debug(f"[TOOLIFY-DETECTOR] 触发信号位置: {i}, think状态: {self.in_think_block}, think深度: {self.think_depth}")
98 |
99 | # 输出触发信号之前的内容
100 | # 保留触发信号及之后的内容在缓冲区,进入signal_detected状态等待验证
101 | self.state = "signal_detected"
102 | self.signal_position = 0 # 触发信号现在在缓冲区开头
103 | self.content_buffer = self.content_buffer[i:]
104 | logger.debug(f"[TOOLIFY-DETECTOR] 进入signal_detected状态,等待标签")
105 | return False, content_to_yield
106 |
107 | # 如果剩余内容不足以判断,保留在缓冲区
108 | remaining_len = len(self.content_buffer) - i
109 | if remaining_len < self.signal_len or remaining_len < 8:
110 | break
111 |
112 | content_to_yield += self.content_buffer[i]
113 | i += 1
114 |
115 | self.content_buffer = self.content_buffer[i:]
116 | return False, content_to_yield
117 |
118 | def _update_think_state(self, pos: int):
119 | """更新think标签状态,支持嵌套"""
120 | remaining = self.content_buffer[pos:]
121 |
122 | if remaining.startswith(''):
123 | self.think_depth += 1
124 | self.in_think_block = True
125 | logger.debug(f"[TOOLIFY-DETECTOR] 进入think块,深度: {self.think_depth}")
126 | return 7
127 |
128 | elif remaining.startswith(''):
129 | self.think_depth = max(0, self.think_depth - 1)
130 | self.in_think_block = self.think_depth > 0
131 | logger.debug(f"[TOOLIFY-DETECTOR] 退出think块,深度: {self.think_depth}")
132 | return 8
133 |
134 | return 0
135 |
136 | def _can_detect_signal_at(self, pos: int) -> bool:
137 | """检查是否可以在指定位置检测信号"""
138 | return (pos + self.signal_len <= len(self.content_buffer) and
139 | not self.in_think_block)
140 |
141 | def finalize(self) -> tuple[Optional[List[Dict[str, Any]]], str]:
142 | """
143 | 流结束时的最终处理
144 |
145 | Returns:
146 | (parsed_tools, remaining_content): 解析出的工具调用和剩余未输出的内容
147 | """
148 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 当前状态: {self.state}, 缓冲区长度: {len(self.content_buffer)}")
149 |
150 | if self.state == "tool_parsing":
151 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 缓冲区内容前500字符: {repr(self.content_buffer[:500])}")
152 | result = parse_function_calls_xml(self.content_buffer, self.trigger_signal)
153 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 解析结果: {result}")
154 | return result, ""
155 |
156 | elif self.state == "signal_detected":
157 | # 流结束时还在等待标签,说明模型输出了触发信号但没有完整的工具调用
158 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 流结束但状态是signal_detected,可能是不完整的工具调用")
159 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 缓冲区内容: {repr(self.content_buffer[:300])}")
160 | # 尝试解析,如果失败就把缓冲区内容作为普通文本返回
161 | result = parse_function_calls_xml(self.content_buffer, self.trigger_signal)
162 | if result:
163 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 成功解析出工具调用: {result}")
164 | return result, ""
165 | else:
166 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 解析失败,返回缓冲区内容作为普通文本")
167 | return None, self.content_buffer
168 |
169 | # detecting状态:没有检测到工具调用,返回缓冲区中剩余的内容
170 | if self.content_buffer:
171 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 状态是detecting,返回缓冲区内容: {repr(self.content_buffer[:100])}")
172 | else:
173 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 状态是detecting,缓冲区为空")
174 | return None, self.content_buffer
175 |
176 |
--------------------------------------------------------------------------------
/src/toolify/prompt.py:
--------------------------------------------------------------------------------
1 | """
2 | Toolify 提示词生成器
3 | 生成工具调用的系统提示词
4 | """
5 |
6 | import json
7 | import logging
8 | from typing import List, Dict, Any
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | def get_function_call_prompt_template(trigger_signal: str, custom_template: str = None) -> str:
14 | """
15 | 基于动态触发信号生成提示词模板
16 |
17 | Args:
18 | trigger_signal: 触发信号字符串
19 | custom_template: 自定义模板(可选)
20 |
21 | Returns:
22 | 提示词模板字符串
23 | """
24 | if custom_template:
25 | logger.info("[TOOLIFY] 使用配置中的自定义提示词模板")
26 | return custom_template.format(
27 | trigger_signal=trigger_signal,
28 | tools_list="{tools_list}"
29 | )
30 |
31 | return f"""
32 | 你可以访问以下可用工具来帮助解决问题:
33 |
34 | {{tools_list}}
35 |
36 | **重要上下文说明:**
37 | 1. 如果需要,你可以在单次响应中调用多个工具。
38 | 2. 对话上下文中可能已包含之前函数调用的工具执行结果。请仔细查看对话历史,避免不必要的重复工具调用。
39 | 3. 当工具执行结果出现在上下文中时,它们将使用 ... 这样的XML标签格式化,便于识别。
40 | 4. 这是你可以使用的唯一工具调用格式,任何偏差都将导致失败。
41 |
42 | 当你需要使用工具时,你**必须**严格遵循以下格式。不要在工具调用语法的第一行和第二行包含任何额外的文本、解释或对话:
43 |
44 | 1. 开始工具调用时,在新行上准确输出:
45 | {trigger_signal}
46 | 不要有前导或尾随空格,完全按照上面显示的输出。触发信号必须单独占一行,且只出现一次。
47 |
48 | 2. 从第二行开始,**立即**紧跟完整的 XML块。
49 |
50 | 3. 对于多个工具调用,在同一个 包装器中包含多个 块。
51 |
52 | 4. 在结束标签 后不要添加任何文本或解释。
53 |
54 | 严格的参数键规则:
55 | - 你必须使用**完全相同**的参数键(区分大小写和标点符号)。不要重命名、添加或删除字符。
56 | - 如果键以连字符开头(例如 -i, -C),你必须在标签名中保留连字符。例如:<-i>true-i>, <-C>2-C>。
57 | - 永远不要将 "-i" 转换为 "i" 或将 "-C" 转换为 "C"。不要复数化、翻译或给参数键起别名。
58 | - 标签必须包含列表中某个工具的确切名称。任何其他工具名称都是无效的。
59 | - 必须包含该工具的所有必需参数。
60 |
61 | 正确示例(多个工具调用,包括带连字符的键):
62 | ...响应内容(可选)...
63 | {trigger_signal}
64 |
65 |
66 | search
67 |
68 | ["Python Document", "how to use python"]
69 |
70 |
71 |
72 |
73 | 现在请准备好严格遵循以上规范。
74 | """
75 |
76 |
77 | def generate_function_prompt(tools: List[Dict[str, Any]], trigger_signal: str, custom_template: str = None) -> tuple[str, str]:
78 | """
79 | 基于客户端请求中的工具定义生成注入的系统提示词
80 |
81 | Args:
82 | tools: 工具定义列表(OpenAI格式)
83 | trigger_signal: 触发信号
84 | custom_template: 自定义模板(可选)
85 |
86 | Returns:
87 | (prompt_content, trigger_signal): 提示词内容和触发信号
88 | """
89 | tools_list_str = []
90 | for i, tool in enumerate(tools):
91 | func = tool.get("function", {})
92 | name = func.get("name", "")
93 | description = func.get("description", "")
94 |
95 | # 读取 JSON Schema 字段
96 | schema: Dict[str, Any] = func.get("parameters", {}) or {}
97 | props: Dict[str, Any] = schema.get("properties", {}) or {}
98 | required_list: List[str] = schema.get("required", []) or []
99 |
100 | # 简要摘要行:name (type)
101 | params_summary = ", ".join([
102 | f"{p_name} ({(p_info or {}).get('type', 'any')})" for p_name, p_info in props.items()
103 | ]) or "None"
104 |
105 | # 构建详细参数规范
106 | detail_lines: List[str] = []
107 | for p_name, p_info in props.items():
108 | p_info = p_info or {}
109 | p_type = p_info.get("type", "any")
110 | is_required = "Yes" if p_name in required_list else "No"
111 | p_desc = p_info.get("description")
112 | enum_vals = p_info.get("enum")
113 | default_val = p_info.get("default")
114 | examples_val = p_info.get("examples") or p_info.get("example")
115 |
116 | # 常见约束和提示
117 | constraints: Dict[str, Any] = {}
118 | for key in [
119 | "minimum", "maximum", "exclusiveMinimum", "exclusiveMaximum",
120 | "minLength", "maxLength", "pattern", "format",
121 | "minItems", "maxItems", "uniqueItems"
122 | ]:
123 | if key in p_info:
124 | constraints[key] = p_info.get(key)
125 |
126 | # 数组项类型提示
127 | if p_type == "array":
128 | items = p_info.get("items") or {}
129 | if isinstance(items, dict):
130 | itype = items.get("type")
131 | if itype:
132 | constraints["items.type"] = itype
133 |
134 | # 组合详细行
135 | detail_lines.append(f"- {p_name}:")
136 | detail_lines.append(f" - type: {p_type}")
137 | detail_lines.append(f" - required: {is_required}")
138 | if p_desc:
139 | detail_lines.append(f" - description: {p_desc}")
140 | if enum_vals is not None:
141 | try:
142 | detail_lines.append(f" - enum: {json.dumps(enum_vals, ensure_ascii=False)}")
143 | except Exception:
144 | detail_lines.append(f" - enum: {enum_vals}")
145 | if default_val is not None:
146 | try:
147 | detail_lines.append(f" - default: {json.dumps(default_val, ensure_ascii=False)}")
148 | except Exception:
149 | detail_lines.append(f" - default: {default_val}")
150 | if examples_val is not None:
151 | try:
152 | detail_lines.append(f" - examples: {json.dumps(examples_val, ensure_ascii=False)}")
153 | except Exception:
154 | detail_lines.append(f" - examples: {examples_val}")
155 | if constraints:
156 | try:
157 | detail_lines.append(f" - constraints: {json.dumps(constraints, ensure_ascii=False)}")
158 | except Exception:
159 | detail_lines.append(f" - constraints: {constraints}")
160 |
161 | detail_block = "\n".join(detail_lines) if detail_lines else "(无参数详情)"
162 |
163 | desc_block = f"```\n{description}\n```" if description else "None"
164 |
165 | tools_list_str.append(
166 | f"{i + 1}. \n"
167 | f" 描述:\n{desc_block}\n"
168 | f" 参数摘要: {params_summary}\n"
169 | f" 必需参数: {', '.join(required_list) if required_list else 'None'}\n"
170 | f" 参数详情:\n{detail_block}"
171 | )
172 |
173 | prompt_template = get_function_call_prompt_template(trigger_signal, custom_template)
174 | prompt_content = prompt_template.replace("{tools_list}", "\n\n".join(tools_list_str))
175 |
176 | return prompt_content, trigger_signal
177 |
178 |
179 | def safe_process_tool_choice(tool_choice) -> str:
180 | """
181 | 安全处理tool_choice字段,避免类型错误
182 |
183 | Args:
184 | tool_choice: tool_choice参数(可能是字符串或对象)
185 |
186 | Returns:
187 | 附加的提示词内容
188 | """
189 | try:
190 | if tool_choice is None:
191 | return ""
192 |
193 | if isinstance(tool_choice, str):
194 | if tool_choice == "none":
195 | return "\n\n**重要提示:** 本轮你被禁止使用任何工具。请像普通聊天助手一样响应,直接回答用户的问题。"
196 | else:
197 | logger.debug(f"[TOOLIFY] 未知的tool_choice字符串值: {tool_choice}")
198 | return ""
199 |
200 | elif hasattr(tool_choice, 'function') and hasattr(tool_choice.function, 'name'):
201 | required_tool_name = tool_choice.function.name
202 | return f"\n\n**重要提示:** 本轮你必须**仅**使用名为 `{required_tool_name}` 的工具。生成必要的参数并按指定的XML格式输出。"
203 |
204 | else:
205 | logger.debug(f"[TOOLIFY] 不支持的tool_choice类型: {type(tool_choice)}")
206 | return ""
207 |
208 | except Exception as e:
209 | logger.error(f"[TOOLIFY] 处理tool_choice时出错: {e}")
210 | return ""
211 |
212 |
--------------------------------------------------------------------------------
/src/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | 配置管理模块
4 | 统一管理所有环境变量和配置项
5 | """
6 | import os
7 | import logging
8 | from typing import List
9 | from dotenv import load_dotenv
10 | from src.token_manager import TokenManager
11 | from src.token_updater import TokenUpdater
12 |
13 | # 加载环境变量
14 | load_dotenv()
15 |
16 | class Config:
17 | """应用配置类"""
18 |
19 | # API认证配置
20 | VALID_API_KEY: str = os.getenv("VALID_API_KEY", "")
21 | # 移除硬编码的K2THINK_TOKEN,使用token管理器
22 | K2THINK_API_URL: str = os.getenv("K2THINK_API_URL", "https://www.k2think.ai/api/chat/completions")
23 |
24 | # Token管理配置
25 | TOKENS_FILE: str = os.getenv("TOKENS_FILE", "tokens.txt")
26 | MAX_TOKEN_FAILURES: int = int(os.getenv("MAX_TOKEN_FAILURES", "3"))
27 |
28 | # Token自动更新配置
29 | ENABLE_TOKEN_AUTO_UPDATE: bool = os.getenv("ENABLE_TOKEN_AUTO_UPDATE", "false").lower() == "true"
30 | TOKEN_UPDATE_INTERVAL: int = int(os.getenv("TOKEN_UPDATE_INTERVAL", "86400")) # 默认24小时
31 | ACCOUNTS_FILE: str = os.getenv("ACCOUNTS_FILE", "accounts.txt")
32 | GET_TOKENS_SCRIPT: str = os.getenv("GET_TOKENS_SCRIPT", "get_tokens.py")
33 |
34 | # Token管理器实例(延迟初始化)
35 | _token_manager: TokenManager = None
36 | _token_updater: TokenUpdater = None
37 |
38 | # 服务器配置
39 | HOST: str = os.getenv("HOST", "0.0.0.0")
40 | PORT: int = int(os.getenv("PORT", "8001"))
41 |
42 | # 功能开关
43 | DEBUG_LOGGING: bool = os.getenv("DEBUG_LOGGING", "false").lower() == "true"
44 | ENABLE_ACCESS_LOG: bool = os.getenv("ENABLE_ACCESS_LOG", "true").lower() == "true"
45 |
46 | # 性能配置
47 | REQUEST_TIMEOUT: float = float(os.getenv("REQUEST_TIMEOUT", "60"))
48 | MAX_KEEPALIVE_CONNECTIONS: int = int(os.getenv("MAX_KEEPALIVE_CONNECTIONS", "20"))
49 | MAX_CONNECTIONS: int = int(os.getenv("MAX_CONNECTIONS", "100"))
50 | STREAM_DELAY: float = float(os.getenv("STREAM_DELAY", "0.05"))
51 | STREAM_CHUNK_SIZE: int = int(os.getenv("STREAM_CHUNK_SIZE", "50"))
52 | MAX_STREAM_TIME: float = float(os.getenv("MAX_STREAM_TIME", "10.0"))
53 |
54 | # 日志配置
55 | LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO").upper()
56 |
57 | # CORS配置
58 | CORS_ORIGINS: List[str] = (
59 | os.getenv("CORS_ORIGINS", "*").split(",")
60 | if os.getenv("CORS_ORIGINS", "*") != "*"
61 | else ["*"]
62 | )
63 |
64 | # 工具调用配置
65 | ENABLE_TOOLIFY: bool = os.getenv("ENABLE_TOOLIFY", "true").lower() == "true"
66 | TOOLIFY_CUSTOM_PROMPT: str = os.getenv("TOOLIFY_CUSTOM_PROMPT", "")
67 |
68 | @classmethod
69 | def validate(cls) -> None:
70 | """验证必需的配置项"""
71 | if not cls.VALID_API_KEY:
72 | raise ValueError("错误:VALID_API_KEY 环境变量未设置。请在 .env 文件中提供一个安全的API密钥。")
73 |
74 | # 验证token文件是否存在
75 | if not os.path.exists(cls.TOKENS_FILE):
76 | if cls.ENABLE_TOKEN_AUTO_UPDATE:
77 | # 如果启用了自动更新,检查必要的文件是否存在
78 | if not os.path.exists(cls.ACCOUNTS_FILE):
79 | raise ValueError(f"错误:启用了token自动更新,但账户文件 {cls.ACCOUNTS_FILE} 不存在。请创建账户文件或禁用自动更新。")
80 | if not os.path.exists(cls.GET_TOKENS_SCRIPT):
81 | raise ValueError(f"错误:启用了token自动更新,但脚本文件 {cls.GET_TOKENS_SCRIPT} 不存在。")
82 |
83 | # 创建一个空的token文件,让token更新服务来处理
84 | print(f"Token文件 {cls.TOKENS_FILE} 不存在,已启用自动更新。创建空token文件,等待更新服务生成...")
85 | try:
86 | with open(cls.TOKENS_FILE, 'w', encoding='utf-8') as f:
87 | f.write("# Token文件将由自动更新服务生成\n")
88 | print("空token文件已创建,服务启动后将自动更新token池。")
89 | except Exception as e:
90 | raise ValueError(f"错误:无法创建token文件 {cls.TOKENS_FILE}: {e}")
91 | else:
92 | # 如果没有启用自动更新,则要求手动提供token文件
93 | raise ValueError(f"错误:Token文件 {cls.TOKENS_FILE} 不存在。请手动创建token文件或启用自动更新功能(设置 ENABLE_TOKEN_AUTO_UPDATE=true)。")
94 |
95 | # 验证数值范围
96 | if cls.PORT < 1 or cls.PORT > 65535:
97 | raise ValueError(f"错误:PORT 值 {cls.PORT} 不在有效范围内 (1-65535)")
98 |
99 | if cls.REQUEST_TIMEOUT <= 0:
100 | raise ValueError(f"错误:REQUEST_TIMEOUT 必须大于0,当前值: {cls.REQUEST_TIMEOUT}")
101 |
102 | if cls.STREAM_DELAY < 0:
103 | raise ValueError(f"错误:STREAM_DELAY 不能为负数,当前值: {cls.STREAM_DELAY}")
104 |
105 | @classmethod
106 | def setup_logging(cls) -> None:
107 | """设置日志配置"""
108 | import sys
109 |
110 | level_map = {
111 | "DEBUG": logging.DEBUG,
112 | "INFO": logging.INFO,
113 | "WARNING": logging.WARNING,
114 | "ERROR": logging.ERROR
115 | }
116 |
117 | log_level = level_map.get(cls.LOG_LEVEL, logging.INFO)
118 |
119 | # 确保日志输出使用UTF-8编码
120 | logging.basicConfig(
121 | level=log_level,
122 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
123 | handlers=[
124 | logging.StreamHandler(sys.stdout)
125 | ]
126 | )
127 |
128 | # 确保标准输出使用UTF-8编码
129 | if hasattr(sys.stdout, 'reconfigure'):
130 | sys.stdout.reconfigure(encoding='utf-8')
131 | if hasattr(sys.stderr, 'reconfigure'):
132 | sys.stderr.reconfigure(encoding='utf-8')
133 |
134 | @classmethod
135 | def get_token_manager(cls) -> TokenManager:
136 | """获取token管理器实例(单例模式)"""
137 | if cls._token_manager is None:
138 | cls._token_manager = TokenManager(
139 | tokens_file=cls.TOKENS_FILE,
140 | max_failures=cls.MAX_TOKEN_FAILURES,
141 | allow_empty=cls.ENABLE_TOKEN_AUTO_UPDATE # 自动更新模式下允许空文件
142 | )
143 | # 如果启用了自动更新,设置强制刷新回调
144 | if cls.ENABLE_TOKEN_AUTO_UPDATE:
145 | cls._setup_force_refresh_callback()
146 | return cls._token_manager
147 |
148 | @classmethod
149 | def get_token_updater(cls) -> TokenUpdater:
150 | """获取token更新器实例(单例模式)"""
151 | if cls._token_updater is None:
152 | cls._token_updater = TokenUpdater(
153 | update_interval=cls.TOKEN_UPDATE_INTERVAL,
154 | get_tokens_script=cls.GET_TOKENS_SCRIPT,
155 | accounts_file=cls.ACCOUNTS_FILE,
156 | tokens_file=cls.TOKENS_FILE
157 | )
158 | # 如果token_manager已存在且启用了自动更新,建立连接
159 | if cls._token_manager is not None and cls.ENABLE_TOKEN_AUTO_UPDATE:
160 | cls._setup_force_refresh_callback()
161 | return cls._token_updater
162 |
163 | @classmethod
164 | def reload_tokens(cls) -> None:
165 | """重新加载token"""
166 | if cls._token_manager is not None:
167 | cls._token_manager.reload_tokens()
168 |
169 | @classmethod
170 | def _setup_force_refresh_callback(cls) -> None:
171 | """设置强制刷新回调函数"""
172 | if cls._token_manager is not None and cls._token_updater is None:
173 | # 确保token_updater已被初始化
174 | cls.get_token_updater()
175 |
176 | if cls._token_manager is not None and cls._token_updater is not None:
177 | # 设置强制刷新回调
178 | def force_refresh_callback():
179 | try:
180 | logging.getLogger(__name__).info("🔄 检测到token问题,启动自动刷新")
181 | success = cls._token_updater.force_update()
182 | if success:
183 | # 强制刷新成功后,重新加载token管理器
184 | cls._token_manager.reload_tokens()
185 | cls._token_manager.reset_consecutive_failures()
186 | logging.getLogger(__name__).info("✅ 自动刷新完成,tokens.txt已更新,token池已重新加载")
187 | else:
188 | logging.getLogger(__name__).error("❌ 自动刷新失败,请检查accounts.txt文件或手动更新token")
189 | except Exception as e:
190 | logging.getLogger(__name__).error(f"❌ 自动刷新回调执行失败: {e}")
191 |
192 | cls._token_manager.set_force_refresh_callback(force_refresh_callback)
193 | logging.getLogger(__name__).info("已设置连续失效自动强制刷新机制")
--------------------------------------------------------------------------------
/get_tokens.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import os
5 | import sys
6 | import requests
7 | import json
8 | import time
9 | import threading
10 | from concurrent.futures import ThreadPoolExecutor, as_completed
11 | from typing import Optional
12 | import re
13 | from dotenv import load_dotenv
14 |
15 | # 确保使用UTF-8编码
16 | os.environ.setdefault('PYTHONIOENCODING', 'utf-8')
17 | os.environ.setdefault('PYTHONLEGACYWINDOWSSTDIO', '0')
18 |
19 | # 强制设置UTF-8编码
20 | import locale
21 | try:
22 | locale.setlocale(locale.LC_ALL, 'C.UTF-8')
23 | except locale.Error:
24 | try:
25 | locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
26 | except locale.Error:
27 | pass # 如果设置失败,继续使用默认设置
28 |
29 | # 重新配置标准输入输出流
30 | if hasattr(sys.stdout, 'reconfigure'):
31 | sys.stdout.reconfigure(encoding='utf-8', errors='replace')
32 | if hasattr(sys.stderr, 'reconfigure'):
33 | sys.stderr.reconfigure(encoding='utf-8', errors='replace')
34 | if hasattr(sys.stdin, 'reconfigure'):
35 | sys.stdin.reconfigure(encoding='utf-8', errors='replace')
36 |
37 | # 加载环境变量
38 | load_dotenv()
39 |
40 | class K2ThinkTokenExtractor:
41 | def __init__(self):
42 | self.base_url = "https://www.k2think.ai"
43 | self.login_url = f"{self.base_url}/api/v1/auths/signin"
44 |
45 | # 从环境变量读取代理配置
46 | proxy_url = os.getenv("PROXY_URL", "")
47 | self.proxies = {}
48 | if proxy_url:
49 | self.proxies = {
50 | 'http': proxy_url,
51 | 'https': proxy_url
52 | }
53 | print(f"使用代理: {proxy_url}")
54 | else:
55 | print("未配置代理,直接连接")
56 |
57 | # 基于f12调试信息的请求头
58 | self.headers = {
59 | 'Accept': '*/*',
60 | 'Accept-Encoding': 'gzip, deflate, br, zstd',
61 | 'Accept-Language': 'zh-CN,zh;q=0.9',
62 | 'Content-Type': 'application/json',
63 | 'Origin': 'https://www.k2think.ai',
64 | 'Priority': 'u=1, i',
65 | 'Referer': 'https://www.k2think.ai/auth?mode=signin',
66 | 'Sec-Ch-Ua': '"Chromium";v="140", "Not=A?Brand";v="24", "Microsoft Edge";v="140"',
67 | 'Sec-Ch-Ua-Mobile': '?0',
68 | 'Sec-Ch-Ua-Platform': '"Windows"',
69 | 'Sec-Fetch-Dest': 'empty',
70 | 'Sec-Fetch-Mode': 'cors',
71 | 'Sec-Fetch-Site': 'same-origin',
72 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0'
73 | }
74 |
75 | self.lock = threading.Lock()
76 |
77 | def extract_token_from_set_cookie(self, response: requests.Response) -> Optional[str]:
78 | """从响应的Set-Cookie头中提取token"""
79 | set_cookie_headers = response.headers.get_list('Set-Cookie') if hasattr(response.headers, 'get_list') else [response.headers.get('Set-Cookie')]
80 |
81 | # 处理多个Set-Cookie头
82 | if set_cookie_headers:
83 | for cookie_header in set_cookie_headers:
84 | if cookie_header and 'token=' in cookie_header:
85 | # 使用正则提取token值
86 | match = re.search(r'token=([^;]+)', cookie_header)
87 | if match:
88 | return match.group(1)
89 |
90 | return None
91 |
92 | def login_and_get_token(self, email: str, password: str, retry_count: int = 3) -> Optional[str]:
93 | """登录并获取token,带重试机制"""
94 | login_data = {
95 | "email": email,
96 | "password": password
97 | }
98 |
99 | for attempt in range(retry_count):
100 | try:
101 | session = requests.Session()
102 | session.headers.update(self.headers)
103 |
104 | response = session.post(
105 | self.login_url,
106 | json=login_data,
107 | proxies=self.proxies if self.proxies else None,
108 | timeout=30
109 | )
110 |
111 | if response.status_code == 200:
112 | token = self.extract_token_from_set_cookie(response)
113 | if token:
114 | return token
115 |
116 | except Exception as e:
117 | if attempt == retry_count - 1:
118 | return None
119 | time.sleep(2) # 重试间隔2秒
120 | continue
121 |
122 | return None
123 |
124 | def load_accounts(self, file_path: str = "./accounts.txt"):
125 | """从文件加载账户信息"""
126 | accounts = []
127 | try:
128 | with open(file_path, 'r', encoding='utf-8') as f:
129 | for line in f:
130 | line = line.strip()
131 | if not line:
132 | continue
133 |
134 | try:
135 | account_data = json.loads(line)
136 | if 'email' in account_data and 'k2_password' in account_data:
137 | accounts.append({
138 | 'email': account_data['email'],
139 | 'password': account_data['k2_password']
140 | })
141 | except:
142 | continue
143 |
144 | return accounts
145 |
146 | except FileNotFoundError:
147 | return []
148 | except Exception:
149 | return []
150 |
151 | def save_token(self, token: str, file_path: str = "./tokens.txt"):
152 | """保存token到文件"""
153 | try:
154 | with self.lock:
155 | with open(file_path, 'a', encoding='utf-8') as f:
156 | f.write(token + '\n')
157 | except Exception:
158 | pass
159 |
160 | def clear_tokens_file(self, file_path: str = "./tokens.txt"):
161 | """清空tokens文件,准备写入新的tokens"""
162 | try:
163 | with open(file_path, 'w', encoding='utf-8') as f:
164 | f.write('') # 清空文件
165 | print(f"已清空tokens文件: {file_path}")
166 | except Exception as e:
167 | print(f"清空tokens文件失败: {e}")
168 |
169 | def process_account(self, account, tokens_file: str = "./tokens.txt"):
170 | """处理单个账户"""
171 | token = self.login_and_get_token(account['email'], account['password'])
172 | if token:
173 | self.save_token(token, tokens_file)
174 | return True
175 | return False
176 |
177 | def process_all_accounts(self, accounts_file: str = "./accounts.txt", tokens_file: str = "./tokens.txt"):
178 | """使用并发处理所有账户"""
179 | accounts = self.load_accounts(accounts_file)
180 | if not accounts:
181 | print("没有账户需要处理或accounts.txt文件不存在")
182 | return False
183 |
184 | # 清空现有的tokens文件
185 | self.clear_tokens_file(tokens_file)
186 |
187 | print(f"开始处理 {len(accounts)} 个账户,4线程并发...")
188 | success_count = 0
189 | failed_count = 0
190 |
191 | # 先测试单个账户
192 | test_account = accounts[0]
193 | print(f"测试账户: {test_account['email']}")
194 |
195 | try:
196 | token = self.login_and_get_token(test_account['email'], test_account['password'])
197 | if token:
198 | print(f"测试成功,获取token: {token[:50]}...")
199 | else:
200 | print("测试失败,无法获取token")
201 | except Exception as e:
202 | print(f"测试异常: {e}")
203 |
204 | with ThreadPoolExecutor(max_workers=4) as executor:
205 | # 提交所有任务
206 | future_to_account = {executor.submit(self.process_account, account, tokens_file): account for account in accounts}
207 |
208 | # 处理结果
209 | for future in as_completed(future_to_account):
210 | account = future_to_account[future]
211 | try:
212 | if future.result():
213 | success_count += 1
214 | print(f"✓ {account['email']}")
215 | else:
216 | failed_count += 1
217 | print(f"✗ {account['email']}")
218 | except Exception as e:
219 | failed_count += 1
220 | print(f"✗ {account['email']} - {e}")
221 |
222 | print(f"\n处理完成: 成功 {success_count}, 失败 {failed_count}")
223 |
224 | # 返回是否有成功获取的token
225 | return success_count > 0
226 |
227 |
228 | def main():
229 | import sys
230 |
231 | # 支持命令行参数
232 | accounts_file = sys.argv[1] if len(sys.argv) > 1 else "./accounts.txt"
233 | tokens_file = sys.argv[2] if len(sys.argv) > 2 else "./tokens.txt"
234 |
235 | extractor = K2ThinkTokenExtractor()
236 | success = extractor.process_all_accounts(accounts_file, tokens_file)
237 |
238 | # 设置退出码
239 | sys.exit(0 if success else 1)
240 |
241 |
242 | if __name__ == "__main__":
243 | main()
244 |
--------------------------------------------------------------------------------
/tests/test_tool_calling.py:
--------------------------------------------------------------------------------
1 | """
2 | K2Think API Proxy 工具调用示例
3 | 演示如何使用工具调用功能
4 | """
5 | import json
6 | from openai import OpenAI
7 |
8 | # 配置客户端
9 | client = OpenAI(
10 | base_url="http://localhost:8001/v1",
11 | api_key="sk-123456"
12 | )
13 |
14 | # 定义工具
15 | tools = [
16 | {
17 | "type": "function",
18 | "function": {
19 | "name": "get_weather",
20 | "description": "获取指定城市的天气信息",
21 | "parameters": {
22 | "type": "object",
23 | "properties": {
24 | "city": {
25 | "type": "string",
26 | "description": "城市名称,例如:北京、上海、深圳"
27 | },
28 | "unit": {
29 | "type": "string",
30 | "enum": ["celsius", "fahrenheit"],
31 | "description": "温度单位",
32 | "default": "celsius"
33 | }
34 | },
35 | "required": ["city"]
36 | }
37 | }
38 | },
39 | {
40 | "type": "function",
41 | "function": {
42 | "name": "search_web",
43 | "description": "在互联网上搜索信息",
44 | "parameters": {
45 | "type": "object",
46 | "properties": {
47 | "query": {
48 | "type": "string",
49 | "description": "搜索关键词"
50 | },
51 | "num_results": {
52 | "type": "integer",
53 | "description": "返回结果数量",
54 | "default": 5,
55 | "minimum": 1,
56 | "maximum": 10
57 | }
58 | },
59 | "required": ["query"]
60 | }
61 | }
62 | },
63 | {
64 | "type": "function",
65 | "function": {
66 | "name": "calculate",
67 | "description": "执行数学计算",
68 | "parameters": {
69 | "type": "object",
70 | "properties": {
71 | "expression": {
72 | "type": "string",
73 | "description": "数学表达式,例如:2+2, 10*5, sqrt(16)"
74 | }
75 | },
76 | "required": ["expression"]
77 | }
78 | }
79 | }
80 | ]
81 |
82 | def example_basic_tool_call():
83 | """基础工具调用示例"""
84 | print("\n=== 基础工具调用示例 ===\n")
85 |
86 | response = client.chat.completions.create(
87 | model="MBZUAI-IFM/K2-Think",
88 | messages=[
89 | {"role": "user", "content": "北京今天天气怎么样?"}
90 | ],
91 | tools=tools,
92 | tool_choice="auto"
93 | )
94 |
95 | # 处理响应
96 | message = response.choices[0].message
97 |
98 | if message.tool_calls:
99 | print("模型请求调用工具:")
100 | for tool_call in message.tool_calls:
101 | print(f"\n工具名称: {tool_call.function.name}")
102 | print(f"工具参数: {tool_call.function.arguments}")
103 |
104 | # 模拟执行工具并返回结果
105 | function_name = tool_call.function.name
106 | function_args = json.loads(tool_call.function.arguments)
107 |
108 | # 模拟工具执行结果
109 | if function_name == "get_weather":
110 | result = {
111 | "city": function_args.get("city"),
112 | "temperature": 22,
113 | "condition": "晴天",
114 | "humidity": 45,
115 | "unit": function_args.get("unit", "celsius")
116 | }
117 | else:
118 | result = {"status": "success", "data": "模拟数据"}
119 |
120 | print(f"工具执行结果: {json.dumps(result, ensure_ascii=False)}")
121 | else:
122 | print("模型直接回答:")
123 | print(message.content)
124 |
125 |
126 | def example_multi_turn_conversation():
127 | """多轮对话示例(包含工具调用)"""
128 | print("\n=== 多轮对话示例 ===\n")
129 |
130 | messages = [
131 | {"role": "user", "content": "查一下上海的天气,然后搜索关于上海的旅游景点"}
132 | ]
133 |
134 | response = client.chat.completions.create(
135 | model="MBZUAI-IFM/K2-Think",
136 | messages=messages,
137 | tools=tools,
138 | tool_choice="auto"
139 | )
140 |
141 | message = response.choices[0].message
142 |
143 | if message.tool_calls:
144 | print("第一轮 - 模型请求调用工具:")
145 | messages.append(message) # 添加助手的响应
146 |
147 | # 处理每个工具调用
148 | for tool_call in message.tool_calls:
149 | print(f"\n调用工具: {tool_call.function.name}")
150 | print(f"参数: {tool_call.function.arguments}")
151 |
152 | # 模拟工具执行并返回结果
153 | function_name = tool_call.function.name
154 |
155 | if function_name == "get_weather":
156 | result = '{"temperature": 25, "condition": "多云", "city": "上海"}'
157 | elif function_name == "search_web":
158 | result = '{"results": ["外滩", "东方明珠", "豫园", "南京路"]}'
159 | else:
160 | result = '{"status": "success"}'
161 |
162 | # 添加工具结果到消息历史
163 | messages.append({
164 | "role": "tool",
165 | "tool_call_id": tool_call.id,
166 | "content": result
167 | })
168 |
169 | # 发送工具结果给模型,获取最终回答
170 | print("\n第二轮 - 发送工具结果给模型...")
171 |
172 | final_response = client.chat.completions.create(
173 | model="MBZUAI-IFM/K2-Think",
174 | messages=messages,
175 | tools=tools
176 | )
177 |
178 | print("\n模型的最终回答:")
179 | print(final_response.choices[0].message.content)
180 |
181 |
182 | def example_forced_tool_call():
183 | """强制使用特定工具的示例"""
184 | print("\n=== 强制工具调用示例 ===\n")
185 |
186 | response = client.chat.completions.create(
187 | model="MBZUAI-IFM/K2-Think",
188 | messages=[
189 | {"role": "user", "content": "计算 123 * 456"}
190 | ],
191 | tools=tools,
192 | tool_choice={
193 | "type": "function",
194 | "function": {"name": "calculate"}
195 | }
196 | )
197 |
198 | message = response.choices[0].message
199 |
200 | if message.tool_calls:
201 | print("模型被强制使用工具:")
202 | for tool_call in message.tool_calls:
203 | print(f"工具: {tool_call.function.name}")
204 | print(f"参数: {tool_call.function.arguments}")
205 |
206 |
207 | def example_stream_with_tools():
208 | """流式响应中的工具调用示例"""
209 | print("\n=== 流式工具调用示例 ===\n")
210 |
211 | stream = client.chat.completions.create(
212 | model="MBZUAI-IFM/K2-Think",
213 | messages=[
214 | {"role": "user", "content": "帮我搜索一下人工智能的最新发展"}
215 | ],
216 | tools=tools,
217 | stream=True
218 | )
219 |
220 | print("流式响应:")
221 | for chunk in stream:
222 | if chunk.choices[0].delta.content:
223 | print(chunk.choices[0].delta.content, end="", flush=True)
224 |
225 | # 检查是否有工具调用
226 | if hasattr(chunk.choices[0].delta, 'tool_calls') and chunk.choices[0].delta.tool_calls:
227 | print("\n检测到工具调用:")
228 | for tool_call in chunk.choices[0].delta.tool_calls:
229 | if hasattr(tool_call, 'function'):
230 | print(f"\n工具: {tool_call.function.name if hasattr(tool_call.function, 'name') else '未知'}")
231 |
232 | # 检查结束原因
233 | if chunk.choices[0].finish_reason == "tool_calls":
234 | print("\n[流结束 - 需要工具调用]")
235 | break
236 | elif chunk.choices[0].finish_reason == "stop":
237 | print("\n[流结束]")
238 | break
239 |
240 | print()
241 |
242 |
243 | def example_disable_tools():
244 | """禁用工具调用的示例"""
245 | print("\n=== 禁用工具调用示例 ===\n")
246 |
247 | response = client.chat.completions.create(
248 | model="MBZUAI-IFM/K2-Think",
249 | messages=[
250 | {"role": "user", "content": "北京今天天气怎么样?"}
251 | ],
252 | tools=tools,
253 | tool_choice="none" # 禁用工具调用
254 | )
255 |
256 | print("模型直接回答(未使用工具):")
257 | print(response.choices[0].message.content)
258 |
259 |
260 | if __name__ == "__main__":
261 | print("=" * 60)
262 | print("K2Think API Proxy - 工具调用功能示例")
263 | print("=" * 60)
264 |
265 | try:
266 | # 运行示例
267 | example_basic_tool_call()
268 | example_forced_tool_call()
269 | example_stream_with_tools()
270 | example_disable_tools()
271 | example_multi_turn_conversation()
272 |
273 | print("\n" + "=" * 60)
274 | print("示例运行完成!")
275 | print("=" * 60)
276 |
277 | except Exception as e:
278 | print(f"\n错误: {e}")
279 | print("\n请确保:")
280 | print("1. K2Think API Proxy 服务正在运行(http://localhost:8001)")
281 | print("2. 环境变量 ENABLE_TOOLIFY=true")
282 | print("3. API密钥配置正确")
283 |
284 |
--------------------------------------------------------------------------------
/src/toolify/core.py:
--------------------------------------------------------------------------------
1 | """
2 | Toolify 核心功能模块
3 | 提供工具调用的主要功能:请求处理、响应解析、格式转换
4 | """
5 |
6 | import uuid
7 | import json
8 | import secrets
9 | import string
10 | import logging
11 | from typing import List, Dict, Any, Optional
12 | from collections import OrderedDict
13 | import time
14 | import threading
15 |
16 | logger = logging.getLogger(__name__)
17 |
18 |
19 | def generate_random_trigger_signal() -> str:
20 | """生成随机的、自闭合的触发信号,如 """
21 | chars = string.ascii_letters + string.digits
22 | random_str = ''.join(secrets.choice(chars) for _ in range(4))
23 | return f""
24 |
25 |
26 | class ToolCallMappingManager:
27 | """
28 | 工具调用映射管理器(带TTL和大小限制)
29 |
30 | 功能:
31 | 1. 自动过期清理 - 条目在指定时间后自动删除
32 | 2. 大小限制 - 防止内存无限增长
33 | 3. LRU驱逐 - 达到大小限制时删除最少使用的条目
34 | 4. 线程安全 - 支持并发访问
35 | 5. 周期性清理 - 后台线程定期清理过期条目
36 | """
37 |
38 | def __init__(self, max_size: int = 1000, ttl_seconds: int = 3600, cleanup_interval: int = 300):
39 | """
40 | 初始化映射管理器
41 |
42 | Args:
43 | max_size: 最大存储条目数
44 | ttl_seconds: 条目生存时间(秒)
45 | cleanup_interval: 清理间隔(秒)
46 | """
47 | self.max_size = max_size
48 | self.ttl_seconds = ttl_seconds
49 | self.cleanup_interval = cleanup_interval
50 |
51 | self._data: OrderedDict[str, Dict[str, Any]] = OrderedDict()
52 | self._timestamps: Dict[str, float] = {}
53 | self._lock = threading.RLock()
54 |
55 | self._cleanup_thread = threading.Thread(target=self._periodic_cleanup, daemon=True)
56 | self._cleanup_thread.start()
57 |
58 | logger.debug(f"[TOOLIFY] 工具调用映射管理器已启动 - 最大条目: {max_size}, TTL: {ttl_seconds}s")
59 |
60 | def store(self, tool_call_id: str, name: str, args: dict, description: str = "") -> None:
61 | """存储工具调用映射"""
62 | with self._lock:
63 | current_time = time.time()
64 |
65 | if tool_call_id in self._data:
66 | del self._data[tool_call_id]
67 | del self._timestamps[tool_call_id]
68 |
69 | while len(self._data) >= self.max_size:
70 | oldest_key = next(iter(self._data))
71 | del self._data[oldest_key]
72 | del self._timestamps[oldest_key]
73 | logger.debug(f"[TOOLIFY] 因大小限制移除最旧条目: {oldest_key}")
74 |
75 | self._data[tool_call_id] = {
76 | "name": name,
77 | "args": args,
78 | "description": description,
79 | "created_at": current_time
80 | }
81 | self._timestamps[tool_call_id] = current_time
82 |
83 | logger.debug(f"[TOOLIFY] 存储工具调用映射: {tool_call_id} -> {name}")
84 |
85 | def get(self, tool_call_id: str) -> Optional[Dict[str, Any]]:
86 | """获取工具调用映射(更新LRU顺序)"""
87 | with self._lock:
88 | current_time = time.time()
89 |
90 | if tool_call_id not in self._data:
91 | logger.debug(f"[TOOLIFY] 未找到工具调用映射: {tool_call_id}")
92 | return None
93 |
94 | if current_time - self._timestamps[tool_call_id] > self.ttl_seconds:
95 | logger.debug(f"[TOOLIFY] 工具调用映射已过期: {tool_call_id}")
96 | del self._data[tool_call_id]
97 | del self._timestamps[tool_call_id]
98 | return None
99 |
100 | result = self._data[tool_call_id]
101 | self._data.move_to_end(tool_call_id)
102 |
103 | logger.debug(f"[TOOLIFY] 找到工具调用映射: {tool_call_id} -> {result['name']}")
104 | return result
105 |
106 | def cleanup_expired(self) -> int:
107 | """清理过期条目,返回清理数量"""
108 | with self._lock:
109 | current_time = time.time()
110 | expired_keys = []
111 |
112 | for key, timestamp in self._timestamps.items():
113 | if current_time - timestamp > self.ttl_seconds:
114 | expired_keys.append(key)
115 |
116 | for key in expired_keys:
117 | del self._data[key]
118 | del self._timestamps[key]
119 |
120 | if expired_keys:
121 | logger.debug(f"[TOOLIFY] 清理了 {len(expired_keys)} 个过期条目")
122 |
123 | return len(expired_keys)
124 |
125 | def _periodic_cleanup(self) -> None:
126 | """后台周期性清理线程"""
127 | while True:
128 | try:
129 | time.sleep(self.cleanup_interval)
130 | self.cleanup_expired()
131 | except Exception as e:
132 | logger.error(f"[TOOLIFY] 后台清理线程异常: {e}")
133 |
134 |
135 | class ToolifyCore:
136 | """Toolify 核心类 - 管理工具调用功能"""
137 |
138 | def __init__(self, enable_function_calling: bool = True):
139 | """
140 | 初始化 Toolify 核心
141 |
142 | Args:
143 | enable_function_calling: 是否启用函数调用功能
144 | """
145 | self.enable_function_calling = enable_function_calling
146 | self.mapping_manager = ToolCallMappingManager()
147 | self.trigger_signal = generate_random_trigger_signal()
148 |
149 | logger.info(f"[TOOLIFY] 核心已初始化 - 功能启用: {enable_function_calling}")
150 | logger.debug(f"[TOOLIFY] 触发信号: {self.trigger_signal}")
151 |
152 | def store_tool_call_mapping(self, tool_call_id: str, name: str, args: dict, description: str = ""):
153 | """存储工具调用ID与调用内容的映射"""
154 | self.mapping_manager.store(tool_call_id, name, args, description)
155 |
156 | def get_tool_call_mapping(self, tool_call_id: str) -> Optional[Dict[str, Any]]:
157 | """获取工具调用ID对应的调用内容"""
158 | return self.mapping_manager.get(tool_call_id)
159 |
160 | def format_tool_result_for_ai(self, tool_call_id: str, result_content: str) -> str:
161 | """格式化工具调用结果供AI理解"""
162 | logger.debug(f"[TOOLIFY] 格式化工具调用结果: tool_call_id={tool_call_id}")
163 | tool_info = self.get_tool_call_mapping(tool_call_id)
164 | if not tool_info:
165 | logger.debug(f"[TOOLIFY] 未找到工具调用映射,使用默认格式")
166 | return f"Tool execution result:\n\n{result_content}\n"
167 |
168 | formatted_text = f"""Tool execution result:
169 | - Tool name: {tool_info['name']}
170 | - Execution result:
171 |
172 | {result_content}
173 | """
174 |
175 | logger.debug(f"[TOOLIFY] 格式化完成,工具名: {tool_info['name']}")
176 | return formatted_text
177 |
178 | def format_assistant_tool_calls_for_ai(self, tool_calls: List[Dict[str, Any]]) -> str:
179 | """将助手的工具调用格式化为AI可读的字符串格式"""
180 | logger.debug(f"[TOOLIFY] 格式化助手工具调用. 数量: {len(tool_calls)}")
181 |
182 | xml_calls_parts = []
183 | for tool_call in tool_calls:
184 | function_info = tool_call.get("function", {})
185 | name = function_info.get("name", "")
186 | arguments_json = function_info.get("arguments", "{}")
187 |
188 | try:
189 | args_dict = json.loads(arguments_json)
190 | except (json.JSONDecodeError, TypeError):
191 | args_dict = {"raw_arguments": arguments_json}
192 |
193 | args_parts = []
194 | for key, value in args_dict.items():
195 | json_value = json.dumps(value, ensure_ascii=False)
196 | args_parts.append(f"<{key}>{json_value}{key}>")
197 |
198 | args_content = "\n".join(args_parts)
199 |
200 | xml_call = f"\n{name}\n\n{args_content}\n\n"
201 | xml_calls_parts.append(xml_call)
202 |
203 | all_calls = "\n".join(xml_calls_parts)
204 | final_str = f"{self.trigger_signal}\n\n{all_calls}\n"
205 |
206 | logger.debug("[TOOLIFY] 助手工具调用格式化成功")
207 | return final_str
208 |
209 | def preprocess_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
210 | """
211 | 预处理消息,转换工具类型消息为AI可理解格式
212 |
213 | Args:
214 | messages: OpenAI格式的消息列表
215 |
216 | Returns:
217 | 处理后的消息列表
218 | """
219 | processed_messages = []
220 |
221 | for message in messages:
222 | if isinstance(message, dict):
223 | # 处理 tool 角色消息
224 | if message.get("role") == "tool":
225 | tool_call_id = message.get("tool_call_id")
226 | content = message.get("content")
227 |
228 | if tool_call_id and content:
229 | formatted_content = self.format_tool_result_for_ai(tool_call_id, content)
230 | processed_message = {
231 | "role": "user",
232 | "content": formatted_content
233 | }
234 | processed_messages.append(processed_message)
235 | logger.debug(f"[TOOLIFY] 转换tool消息为user消息: tool_call_id={tool_call_id}")
236 | else:
237 | logger.debug(f"[TOOLIFY] 跳过无效tool消息: tool_call_id={tool_call_id}")
238 |
239 | # 处理 assistant 角色的 tool_calls
240 | elif message.get("role") == "assistant" and "tool_calls" in message and message["tool_calls"]:
241 | tool_calls = message.get("tool_calls", [])
242 | formatted_tool_calls_str = self.format_assistant_tool_calls_for_ai(tool_calls)
243 |
244 | # 与原始内容合并
245 | original_content = message.get("content") or ""
246 | final_content = f"{original_content}\n{formatted_tool_calls_str}".strip()
247 |
248 | processed_message = {
249 | "role": "assistant",
250 | "content": final_content
251 | }
252 | # 复制其他字段(除了tool_calls)
253 | for key, value in message.items():
254 | if key not in ["role", "content", "tool_calls"]:
255 | processed_message[key] = value
256 |
257 | processed_messages.append(processed_message)
258 | logger.debug(f"[TOOLIFY] 转换assistant的tool_calls为content")
259 | else:
260 | processed_messages.append(message)
261 | else:
262 | processed_messages.append(message)
263 |
264 | return processed_messages
265 |
266 | def convert_parsed_tools_to_openai_format(self, parsed_tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
267 | """
268 | 将解析出的工具调用转换为OpenAI格式的tool_calls
269 |
270 | Args:
271 | parsed_tools: 解析出的工具列表 [{"name": "tool_name", "args": {...}}, ...]
272 |
273 | Returns:
274 | OpenAI格式的tool_calls列表
275 | """
276 | tool_calls = []
277 | for tool in parsed_tools:
278 | tool_call_id = f"call_{uuid.uuid4().hex}"
279 | self.store_tool_call_mapping(
280 | tool_call_id,
281 | tool["name"],
282 | tool["args"],
283 | f"调用工具 {tool['name']}"
284 | )
285 | tool_calls.append({
286 | "id": tool_call_id,
287 | "type": "function",
288 | "function": {
289 | "name": tool["name"],
290 | "arguments": json.dumps(tool["args"])
291 | }
292 | })
293 |
294 | logger.debug(f"[TOOLIFY] 转换了 {len(tool_calls)} 个工具调用")
295 | return tool_calls
296 |
297 |
--------------------------------------------------------------------------------
/src/token_updater.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Token更新服务模块
4 | 定期运行get_tokens.py来更新token池
5 | """
6 | import os
7 | import time
8 | import logging
9 | import threading
10 | import subprocess
11 | import shutil
12 | from typing import Optional
13 | from datetime import datetime, timedelta
14 | from src.utils import safe_log_error, safe_log_info, safe_log_warning
15 | # 移除循环导入,Config在需要时动态导入
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 | class TokenUpdater:
20 | """Token更新服务 - 定期更新token池"""
21 |
22 | def __init__(self,
23 | update_interval: int = 86400, # 默认24小时更新一次
24 | get_tokens_script: str = "get_tokens.py",
25 | accounts_file: str = "accounts.txt",
26 | tokens_file: str = "tokens.txt"):
27 | """
28 | 初始化Token更新器
29 |
30 | Args:
31 | update_interval: 更新间隔(秒)
32 | get_tokens_script: get_tokens.py脚本路径
33 | accounts_file: 账户文件路径
34 | tokens_file: tokens文件路径
35 | """
36 | self.update_interval = update_interval
37 | self.get_tokens_script = get_tokens_script
38 | self.accounts_file = accounts_file
39 | self.tokens_file = tokens_file
40 |
41 | self.is_running = False
42 | self.update_thread: Optional[threading.Thread] = None
43 | self.last_update: Optional[datetime] = None
44 | self.update_count = 0
45 | self.error_count = 0
46 | self.is_updating = False
47 | self.last_error: Optional[str] = None
48 |
49 | safe_log_info(logger, f"Token更新器初始化完成 - 更新间隔: {update_interval}秒")
50 |
51 | # 清理可能遗留的临时文件
52 | self.cleanup_all_temp_files()
53 |
54 | def _check_files_exist(self) -> bool:
55 | """检查必要文件是否存在"""
56 | if not os.path.exists(self.get_tokens_script):
57 | safe_log_error(logger, f"get_tokens.py脚本不存在: {self.get_tokens_script}")
58 | return False
59 |
60 | if not os.path.exists(self.accounts_file):
61 | safe_log_error(logger, f"账户文件不存在: {self.accounts_file}")
62 | return False
63 |
64 | return True
65 |
66 | def _run_token_update(self) -> bool:
67 | """运行token更新脚本(原子性更新)"""
68 | if self.is_updating:
69 | safe_log_warning(logger, "Token更新已在进行中,跳过此次更新")
70 | return False
71 |
72 | self.is_updating = True
73 | self.last_error = None
74 | temp_tokens_file = f"{self.tokens_file}.tmp"
75 |
76 | try:
77 | safe_log_info(logger, "开始更新token池...")
78 |
79 | # 使用临时文件进行更新,避免服务中断
80 | result = subprocess.run(
81 | ["python", self.get_tokens_script, self.accounts_file, temp_tokens_file],
82 | capture_output=True,
83 | encoding='utf-8',
84 | text=True,
85 | timeout=300 # 5分钟超时
86 | )
87 |
88 | if result.returncode == 0:
89 | # 检查临时文件是否生成且不为空
90 | if os.path.exists(temp_tokens_file) and os.path.getsize(temp_tokens_file) > 0:
91 | try:
92 | # 原子性替换:避免重命名正在使用的文件
93 | if os.path.exists(self.tokens_file):
94 | # 备份当前文件(使用复制而非重命名,避免文件锁定问题)
95 | backup_file = f"{self.tokens_file}.backup"
96 | if os.path.exists(backup_file):
97 | os.remove(backup_file) # 删除旧备份
98 |
99 | # 复制当前文件到备份位置
100 | shutil.copy2(self.tokens_file, backup_file)
101 | logger.debug(f"已备份当前tokens文件到: {backup_file}")
102 |
103 | # 使用临时文件替换原文件(Windows下更安全的方式)
104 | if os.name == 'nt': # Windows系统
105 | # 在Windows下,先删除目标文件再重命名
106 | if os.path.exists(self.tokens_file):
107 | os.remove(self.tokens_file)
108 | os.rename(temp_tokens_file, self.tokens_file)
109 | else:
110 | # Unix/Linux系统可以直接重命名覆盖
111 | os.rename(temp_tokens_file, self.tokens_file)
112 |
113 | safe_log_info(logger, "Token更新成功,文件已原子性替换")
114 | logger.debug(f"更新输出: {result.stdout}")
115 | self.update_count += 1
116 | self.last_update = datetime.now()
117 |
118 | # 通知需要重新加载token管理器
119 | self._notify_token_reload()
120 |
121 | return True
122 | except Exception as rename_error:
123 | error_msg = f"文件重命名失败: {rename_error}"
124 | safe_log_error(logger, error_msg)
125 | self.last_error = error_msg
126 | self._cleanup_temp_file(temp_tokens_file)
127 | self.error_count += 1
128 | return False
129 | else:
130 | error_msg = "Token更新失败 - 临时文件为空或不存在"
131 | safe_log_error(logger, error_msg)
132 | self.last_error = error_msg
133 | self._cleanup_temp_file(temp_tokens_file)
134 | self.error_count += 1
135 | return False
136 | else:
137 | error_msg = f"Token更新失败 - 返回码: {result.returncode}, 错误: {result.stderr}"
138 | safe_log_error(logger, error_msg)
139 | self.last_error = error_msg
140 | self._cleanup_temp_file(temp_tokens_file)
141 | self.error_count += 1
142 | return False
143 |
144 | except subprocess.TimeoutExpired:
145 | error_msg = "Token更新超时"
146 | safe_log_error(logger, error_msg)
147 | self.last_error = error_msg
148 | self._cleanup_temp_file(temp_tokens_file)
149 | self.error_count += 1
150 | return False
151 | except Exception as e:
152 | error_msg = f"Token更新异常: {e}"
153 | safe_log_error(logger, error_msg)
154 | self.last_error = error_msg
155 | self._cleanup_temp_file(temp_tokens_file)
156 | self.error_count += 1
157 | return False
158 | finally:
159 | self.is_updating = False
160 |
161 | def _cleanup_temp_file(self, temp_file: str):
162 | """清理临时文件"""
163 | try:
164 | if os.path.exists(temp_file):
165 | os.remove(temp_file)
166 | logger.debug(f"已清理临时文件: {temp_file}")
167 | except Exception as e:
168 | safe_log_warning(logger, f"清理临时文件失败: {e}")
169 |
170 | def cleanup_all_temp_files(self):
171 | """清理所有相关的临时文件"""
172 | temp_patterns = [
173 | f"{self.tokens_file}.tmp",
174 | f"{self.tokens_file}.backup"
175 | ]
176 |
177 | cleaned_count = 0
178 | for pattern in temp_patterns:
179 | try:
180 | if os.path.exists(pattern):
181 | os.remove(pattern)
182 | safe_log_info(logger, f"已清理遗留文件: {pattern}")
183 | cleaned_count += 1
184 | except Exception as e:
185 | safe_log_warning(logger, f"清理遗留文件失败 {pattern}: {e}")
186 |
187 | if cleaned_count > 0:
188 | safe_log_info(logger, f"共清理了 {cleaned_count} 个遗留文件")
189 | else:
190 | logger.debug("没有发现需要清理的遗留文件")
191 |
192 | return cleaned_count
193 |
194 | def _notify_token_reload(self):
195 | """通知需要重新加载token管理器"""
196 | try:
197 | # 导入Config来触发token重新加载
198 | from src.config import Config
199 | if Config._token_manager is not None:
200 | Config._token_manager.reload_tokens()
201 | safe_log_info(logger, "Token管理器已重新加载")
202 | except Exception as e:
203 | safe_log_warning(logger, f"通知token重新加载失败: {e}")
204 |
205 |
206 | def _update_loop(self):
207 | """更新循环"""
208 | safe_log_info(logger, "Token更新服务启动")
209 |
210 | # # 首次启动时,如果tokens.txt中没有token(非#开头),立即更新一次
211 | # 判断tokens.txt中的token数量
212 | if os.path.exists(self.tokens_file):
213 | try:
214 | # 读取文件内容并立即关闭文件句柄
215 | with open(self.tokens_file, "r", encoding="utf-8") as f:
216 | content = f.read()
217 |
218 | # 在文件句柄关闭后处理内容
219 | lines = content.splitlines()
220 | valid_lines = [line.strip() for line in lines if line.strip() and not line.strip().startswith("#")]
221 |
222 | if len(valid_lines) < 1:
223 | # 动态导入Config避免循环导入
224 | from src.config import Config
225 | if Config.ENABLE_TOKEN_AUTO_UPDATE:
226 | safe_log_info(logger, "首次启动时,tokens.txt中没有token(非#开头),立即更新一次")
227 | # 添加小延迟确保文件句柄完全释放
228 |
229 | time.sleep(0.1)
230 | self._run_token_update()
231 | except Exception as e:
232 | safe_log_warning(logger, f"检查tokens文件时出错: {e}")
233 |
234 | while self.is_running:
235 | try:
236 | time.sleep(self.update_interval)
237 |
238 | if not self.is_running:
239 | break
240 |
241 | if self._check_files_exist():
242 | self._run_token_update()
243 | else:
244 | safe_log_warning(logger, "跳过此次更新 - 必要文件不存在")
245 |
246 | except Exception as e:
247 | safe_log_error(logger, "更新循环异常", e)
248 | time.sleep(60) # 异常时等待1分钟再继续
249 |
250 | def start(self) -> bool:
251 | """启动token更新服务"""
252 | if self.is_running:
253 | safe_log_warning(logger, "Token更新服务已在运行")
254 | return False
255 |
256 | if not self._check_files_exist():
257 | safe_log_error(logger, "启动失败 - 必要文件不存在")
258 | return False
259 |
260 | self.is_running = True
261 | self.update_thread = threading.Thread(target=self._update_loop, daemon=True)
262 | self.update_thread.start()
263 |
264 | safe_log_info(logger, "Token更新服务已启动")
265 | return True
266 |
267 | def stop(self):
268 | """停止token更新服务"""
269 | if not self.is_running:
270 | safe_log_warning(logger, "Token更新服务未在运行")
271 | return
272 |
273 | self.is_running = False
274 | if self.update_thread and self.update_thread.is_alive():
275 | self.update_thread.join(timeout=5)
276 |
277 | safe_log_info(logger, "Token更新服务已停止")
278 |
279 | def force_update(self) -> bool:
280 | """强制立即更新token"""
281 | if not self._check_files_exist():
282 | safe_log_error(logger, "强制更新失败 - 必要文件不存在")
283 | return False
284 |
285 | safe_log_info(logger, "执行强制token更新")
286 | return self._run_token_update()
287 |
288 | async def force_update_async(self) -> bool:
289 | """异步强制立即更新token"""
290 | import asyncio
291 | loop = asyncio.get_event_loop()
292 | return await loop.run_in_executor(None, self.force_update)
293 |
294 | def get_status(self) -> dict:
295 | """获取更新服务状态"""
296 | return {
297 | "is_running": self.is_running,
298 | "is_updating": self.is_updating,
299 | "update_interval": self.update_interval,
300 | "last_update": self.last_update.isoformat() if self.last_update else None,
301 | "update_count": self.update_count,
302 | "error_count": self.error_count,
303 | "last_error": self.last_error,
304 | "next_update": (
305 | (self.last_update + timedelta(seconds=self.update_interval)).isoformat()
306 | if self.last_update else None
307 | ),
308 | "files": {
309 | "get_tokens_script": os.path.exists(self.get_tokens_script),
310 | "accounts_file": os.path.exists(self.accounts_file),
311 | "tokens_file": os.path.exists(self.tokens_file)
312 | }
313 | }
--------------------------------------------------------------------------------
/k2think_proxy.py:
--------------------------------------------------------------------------------
1 | """
2 | K2Think API 代理服务 - 重构版本
3 | 提供OpenAI兼容的API接口,代理到K2Think服务
4 | """
5 | import os
6 | import sys
7 | import time
8 | import logging
9 | from contextlib import asynccontextmanager
10 | from fastapi import FastAPI, Request
11 | from fastapi.middleware.cors import CORSMiddleware
12 | from fastapi.responses import JSONResponse, Response
13 |
14 | # 确保使用UTF-8编码
15 | os.environ.setdefault('PYTHONIOENCODING', 'utf-8')
16 | os.environ.setdefault('PYTHONLEGACYWINDOWSSTDIO', '0')
17 |
18 | # 强制设置UTF-8编码
19 | import locale
20 | try:
21 | locale.setlocale(locale.LC_ALL, 'C.UTF-8')
22 | except locale.Error:
23 | try:
24 | locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
25 | except locale.Error:
26 | pass # 如果设置失败,继续使用默认设置
27 |
28 | # 重新配置标准输入输出流
29 | if hasattr(sys.stdout, 'reconfigure'):
30 | sys.stdout.reconfigure(encoding='utf-8', errors='replace')
31 | if hasattr(sys.stderr, 'reconfigure'):
32 | sys.stderr.reconfigure(encoding='utf-8', errors='replace')
33 | if hasattr(sys.stdin, 'reconfigure'):
34 | sys.stdin.reconfigure(encoding='utf-8', errors='replace')
35 |
36 | from src.config import Config
37 | from src.constants import APIConstants
38 | from src.exceptions import K2ThinkProxyError
39 | from src.models import ChatCompletionRequest
40 | from src.api_handler import APIHandler
41 |
42 | # 初始化配置
43 | try:
44 | Config.validate()
45 | Config.setup_logging()
46 | except Exception as e:
47 | print(f"配置错误: {e}")
48 | exit(1)
49 |
50 | logger = logging.getLogger(__name__)
51 |
52 | # 全局HTTP客户端管理
53 | @asynccontextmanager
54 | async def lifespan(app: FastAPI):
55 | logger.info("K2Think API Proxy 启动中...")
56 |
57 | # 如果启用了token自动更新,启动更新服务
58 | if Config.ENABLE_TOKEN_AUTO_UPDATE:
59 | token_updater = Config.get_token_updater()
60 | if token_updater.start():
61 | logger.info(f"Token自动更新服务已启动 - 更新间隔: {Config.TOKEN_UPDATE_INTERVAL}秒")
62 | else:
63 | logger.error("Token自动更新服务启动失败")
64 | else:
65 | logger.info("Token自动更新服务未启用")
66 |
67 | yield
68 |
69 | # 关闭token更新服务
70 | if Config.ENABLE_TOKEN_AUTO_UPDATE and Config._token_updater:
71 | Config._token_updater.stop()
72 | logger.info("Token自动更新服务已停止")
73 |
74 | logger.info("K2Think API Proxy 关闭中...")
75 |
76 | # 创建FastAPI应用
77 | app = FastAPI(
78 | title="K2Think API Proxy",
79 | description="OpenAI兼容的K2Think API代理服务",
80 | version="2.0.0",
81 | lifespan=lifespan
82 | )
83 |
84 | # CORS配置
85 | app.add_middleware(
86 | CORSMiddleware,
87 | allow_origins=Config.CORS_ORIGINS,
88 | allow_credentials=True,
89 | allow_methods=["*"],
90 | allow_headers=["*"],
91 | )
92 |
93 | # 初始化API处理器
94 | api_handler = APIHandler(Config)
95 |
96 | @app.get("/")
97 | async def homepage():
98 | """首页 - 返回服务状态"""
99 | return JSONResponse(content={
100 | "status": "success",
101 | "message": "K2Think API Proxy is running",
102 | "service": "K2Think API Gateway",
103 | "model": APIConstants.MODEL_ID,
104 | "version": "2.1.0",
105 | "features": [
106 | "Token轮询和负载均衡",
107 | "自动失效检测和重试",
108 | "Token池管理",
109 | "OpenAI Function Calling 工具调用"
110 | ],
111 | "endpoints": {
112 | "chat": "/v1/chat/completions",
113 | "models": "/v1/models",
114 | "health": "/health",
115 | "admin": {
116 | "token_stats": "/admin/tokens/stats",
117 | "reset_token": "/admin/tokens/reset/{token_index}",
118 | "reset_all": "/admin/tokens/reset-all",
119 | "reload_tokens": "/admin/tokens/reload",
120 | "consecutive_failures": "/admin/tokens/consecutive-failures",
121 | "reset_consecutive": "/admin/tokens/reset-consecutive",
122 | "updater_status": "/admin/tokens/updater/status",
123 | "force_update": "/admin/tokens/updater/force-update",
124 | "cleanup_temp_files": "/admin/tokens/updater/cleanup-temp"
125 | }
126 | }
127 | })
128 |
129 | @app.get("/health")
130 | async def health_check():
131 | """健康检查"""
132 | token_manager = Config.get_token_manager()
133 | token_stats = token_manager.get_token_stats()
134 |
135 | return JSONResponse(content={
136 | "status": "healthy",
137 | "timestamp": int(time.time()),
138 | "config": {
139 | "debug_logging": Config.DEBUG_LOGGING,
140 | "toolify_enabled": Config.ENABLE_TOOLIFY,
141 | "note": "思考内容输出现在通过模型名控制"
142 | },
143 | "tokens": {
144 | "total": token_stats["total_tokens"],
145 | "active": token_stats["active_tokens"],
146 | "inactive": token_stats["inactive_tokens"],
147 | "consecutive_failures": token_manager.get_consecutive_failures(),
148 | "auto_update_enabled": Config.ENABLE_TOKEN_AUTO_UPDATE
149 | }
150 | })
151 |
152 | @app.get("/favicon.ico")
153 | async def favicon():
154 | """返回favicon"""
155 | return Response(content="", media_type="image/x-icon")
156 |
157 | @app.get("/v1/models")
158 | async def get_models():
159 | """获取模型列表"""
160 | return await api_handler.get_models()
161 |
162 | @app.post("/v1/chat/completions")
163 | async def chat_completions(request: ChatCompletionRequest, auth_request: Request):
164 | """处理聊天补全请求"""
165 | return await api_handler.chat_completions(request, auth_request)
166 |
167 | @app.get("/admin/tokens/stats")
168 | async def get_token_stats():
169 | """获取token池统计信息"""
170 | token_manager = Config.get_token_manager()
171 | stats = token_manager.get_token_stats()
172 | # 添加连续失效信息
173 | stats["consecutive_failures"] = token_manager.get_consecutive_failures()
174 | stats["consecutive_failure_threshold"] = token_manager.consecutive_failure_threshold
175 | # 添加上游服务错误信息
176 | stats["consecutive_upstream_errors"] = token_manager.get_consecutive_upstream_errors()
177 | stats["upstream_error_threshold"] = token_manager.upstream_error_threshold
178 | return JSONResponse(content={
179 | "status": "success",
180 | "data": stats
181 | })
182 |
183 | @app.post("/admin/tokens/reset/{token_index}")
184 | async def reset_token(token_index: int):
185 | """重置指定索引的token"""
186 | token_manager = Config.get_token_manager()
187 | success = token_manager.reset_token(token_index)
188 | if success:
189 | return JSONResponse(content={
190 | "status": "success",
191 | "message": f"Token {token_index} 已重置"
192 | })
193 | else:
194 | return JSONResponse(
195 | status_code=400,
196 | content={
197 | "status": "error",
198 | "message": f"无效的token索引: {token_index}"
199 | }
200 | )
201 |
202 | @app.post("/admin/tokens/reset-all")
203 | async def reset_all_tokens():
204 | """重置所有token"""
205 | token_manager = Config.get_token_manager()
206 | token_manager.reset_all_tokens()
207 | return JSONResponse(content={
208 | "status": "success",
209 | "message": "所有token已重置"
210 | })
211 |
212 | @app.post("/admin/tokens/reload")
213 | async def reload_tokens():
214 | """重新加载token文件"""
215 | try:
216 | Config.reload_tokens()
217 | token_manager = Config.get_token_manager()
218 | stats = token_manager.get_token_stats()
219 | return JSONResponse(content={
220 | "status": "success",
221 | "message": "Token文件已重新加载",
222 | "data": stats
223 | })
224 | except Exception as e:
225 | return JSONResponse(
226 | status_code=500,
227 | content={
228 | "status": "error",
229 | "message": f"重新加载失败: {str(e)}"
230 | }
231 | )
232 |
233 | @app.get("/admin/tokens/consecutive-failures")
234 | async def get_consecutive_failures():
235 | """获取连续失效信息"""
236 | token_manager = Config.get_token_manager()
237 | return JSONResponse(content={
238 | "status": "success",
239 | "data": {
240 | "consecutive_failures": token_manager.get_consecutive_failures(),
241 | "threshold": token_manager.consecutive_failure_threshold,
242 | "consecutive_upstream_errors": token_manager.get_consecutive_upstream_errors(),
243 | "upstream_error_threshold": token_manager.upstream_error_threshold,
244 | "last_upstream_error_time": token_manager.last_upstream_error_time.isoformat() if token_manager.last_upstream_error_time else None,
245 | "token_pool_size": len(token_manager.tokens),
246 | "auto_refresh_enabled": Config.ENABLE_TOKEN_AUTO_UPDATE and len(token_manager.tokens) > 2,
247 | "last_check": "实时检测"
248 | }
249 | })
250 |
251 | @app.post("/admin/tokens/reset-consecutive")
252 | async def reset_consecutive_failures():
253 | """重置连续失效计数"""
254 | token_manager = Config.get_token_manager()
255 | old_count = token_manager.get_consecutive_failures()
256 | token_manager.reset_consecutive_failures()
257 | return JSONResponse(content={
258 | "status": "success",
259 | "message": f"连续失效计数已重置: {old_count} -> 0",
260 | "data": {
261 | "previous_count": old_count,
262 | "current_count": 0
263 | }
264 | })
265 |
266 | @app.get("/admin/tokens/updater/status")
267 | async def get_updater_status():
268 | """获取token更新器状态"""
269 | if not Config.ENABLE_TOKEN_AUTO_UPDATE:
270 | return JSONResponse(content={
271 | "status": "disabled",
272 | "message": "Token自动更新未启用"
273 | })
274 |
275 | token_updater = Config.get_token_updater()
276 | status = token_updater.get_status()
277 | return JSONResponse(content={
278 | "status": "success",
279 | "data": status
280 | })
281 |
282 | @app.post("/admin/tokens/updater/force-update")
283 | async def force_update_tokens():
284 | """强制更新tokens"""
285 | if not Config.ENABLE_TOKEN_AUTO_UPDATE:
286 | return JSONResponse(
287 | status_code=400,
288 | content={
289 | "status": "error",
290 | "message": "Token自动更新未启用"
291 | }
292 | )
293 |
294 | token_updater = Config.get_token_updater()
295 | success = await token_updater.force_update_async()
296 |
297 | if success:
298 | # 更新成功后重新加载token管理器
299 | Config.reload_tokens()
300 | token_manager = Config.get_token_manager()
301 | stats = token_manager.get_token_stats()
302 |
303 | return JSONResponse(content={
304 | "status": "success",
305 | "message": "Token强制更新成功",
306 | "data": stats
307 | })
308 | else:
309 | return JSONResponse(
310 | status_code=500,
311 | content={
312 | "status": "error",
313 | "message": "Token强制更新失败"
314 | }
315 | )
316 |
317 | @app.post("/admin/tokens/updater/cleanup-temp")
318 | async def cleanup_temp_files():
319 | """清理临时文件"""
320 | if not Config.ENABLE_TOKEN_AUTO_UPDATE:
321 | return JSONResponse(
322 | status_code=400,
323 | content={
324 | "status": "error",
325 | "message": "Token自动更新未启用"
326 | }
327 | )
328 |
329 | token_updater = Config.get_token_updater()
330 | cleaned_count = token_updater.cleanup_all_temp_files()
331 |
332 | return JSONResponse(content={
333 | "status": "success",
334 | "message": f"临时文件清理完成,共清理 {cleaned_count} 个文件",
335 | "data": {
336 | "cleaned_files": cleaned_count
337 | }
338 | })
339 |
340 | @app.exception_handler(K2ThinkProxyError)
341 | async def proxy_exception_handler(request: Request, exc: K2ThinkProxyError):
342 | """处理自定义代理异常"""
343 | return JSONResponse(
344 | status_code=exc.status_code,
345 | content={
346 | "error": {
347 | "message": exc.message,
348 | "type": exc.error_type
349 | }
350 | }
351 | )
352 |
353 | @app.exception_handler(404)
354 | async def not_found_handler(request: Request, exc):
355 | """处理404错误"""
356 | return JSONResponse(
357 | status_code=404,
358 | content={"error": "Not Found"}
359 | )
360 |
361 | if __name__ == "__main__":
362 | import uvicorn
363 |
364 | # 配置日志级别
365 | log_level = "debug" if Config.DEBUG_LOGGING else "info"
366 |
367 | logger.info(f"启动服务器: {Config.HOST}:{Config.PORT}")
368 | logger.info("思考内容输出: 通过模型名控制 (MBZUAI-IFM/K2-Think vs MBZUAI-IFM/K2-Think-nothink)")
369 |
370 | uvicorn.run(
371 | app,
372 | host=Config.HOST,
373 | port=Config.PORT,
374 | access_log=Config.ENABLE_ACCESS_LOG,
375 | log_level=log_level
376 | )
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # K2Think API Proxy
2 |
3 | 基于 FastAPI 构建的 K2Think AI 模型代理服务,提供 OpenAI 兼容的 API 接口。
4 |
5 | ## 核心功能特性
6 |
7 | - 🧠 **MBZUAI K2-Think 模型**: 支持 MBZUAI 开发的 K2-Think 推理模型
8 | - 🔄 **OpenAI 兼容**: 完全兼容 OpenAI API 格式,无缝对接现有应用
9 | - ⚡ **流式响应**: 支持实时流式聊天响应,支持控制thinking输出
10 | - 🛠️ **工具调用**: 支持 OpenAI Function Calling,可集成外部工具和API
11 | - 📊 **文件上传**: 支持文件、图像上传
12 |
13 | ## 智能Token管理系统
14 |
15 | ### 🔄 Token轮询与负载均衡
16 |
17 | - 多token轮流使用,自动故障转移
18 | - 支持大规模token池(支持数百个token)
19 |
20 | ### 🛡️ 智能失效检测与自愈
21 |
22 | - **自动失效检测**: 三次失败后自动禁用失效token
23 | - **连续失效自动刷新**: 当连续两个token失效时,自动触发强制刷新(仅在token池数量>2时生效)
24 | - **智能重试机制**: 失效token会被跳过,确保服务连续性
25 |
26 | ### 📈 Token池管理
27 |
28 | - 完整的管理API查看状态、重置token等
29 | - 实时监控token使用情况和失效统计
30 | - 支持手动重置和重新加载
31 |
32 | ### 🔄 Token自动更新
33 |
34 | - 定期从账户文件自动生成新的token池
35 | - **原子性更新**: 零停机时间,更新过程中服务保持可用
36 | - **智能触发**: 支持定时更新和连续失效触发的强制更新
37 |
38 | ### 🌐 网络适应性
39 |
40 | - 支持HTTP/HTTPS代理配置,适应不同网络环境
41 | - 🚀 **高性能**: 异步处理架构,支持高并发请求
42 | - 🐳 **容器化**: 支持 Docker 部署
43 |
44 | ## 快速开始
45 |
46 | ### 本地运行
47 |
48 | 1. **安装依赖**
49 |
50 | ```bash
51 | pip install -r requirements.txt
52 | ```
53 |
54 | 2. **配置环境变量**
55 |
56 | ```bash
57 | cp .env.example .env
58 | # 编辑 .env 文件,配置你的API密钥和其他选项
59 | ```
60 |
61 | 3. **准备Token文件**
62 |
63 | 有两种方式管理Token:
64 |
65 | **方式一:手动管理(传统方式)**
66 |
67 | ```bash
68 | # 复制token示例文件并编辑
69 | cd data
70 | cp tokens.example.txt tokens.txt
71 | # 编辑tokens.txt文件,添加你的实际K2Think tokens
72 | ```
73 |
74 | **方式二:自动更新(推荐)**
75 |
76 | ```bash
77 | # 准备账户文件
78 | echo '{"email": "your-email@example.com", "k2_password": "your-password"}' > accounts.txt
79 | # 可以添加多个账户,每行一个JSON对象
80 | ```
81 |
82 | 4. **启动服务**
83 |
84 | ```bash
85 | python k2think_proxy.py
86 | ```
87 |
88 | 服务将在 `http://localhost:8001` 启动。
89 |
90 | ### Docker 部署
91 |
92 | #### 使用 docker-compose(推荐)
93 |
94 | ```bash
95 | # 准备配置文件
96 | cp .env.example .env
97 | cd data
98 | cp accounts.example.txt accounts.txt
99 |
100 | # 编辑配置
101 | # 编辑 .env 文件配置API密钥等
102 | # 编辑 accounts.txt 添加K2Think账户信息,格式:{"email": "xxx@yyy.zzz", "k2_password": "xxx"},一行一个
103 |
104 | # 启动服务
105 | docker-compose up -d
106 |
107 | # 检查服务状态
108 | docker-compose logs -f k2think-api
109 | ```
110 |
111 | #### 手动构建部署
112 |
113 | ```bash
114 | # 构建镜像
115 | docker build -t k2think-api .
116 |
117 | # 运行容器
118 | docker run -d \
119 | --name k2think-api \
120 | -p 8001:8001 \
121 | -v $(pwd)/tokens.txt:/app/tokens.txt \
122 | -v $(pwd)/accounts.txt:/app/accounts.txt:ro \
123 | -v $(pwd)/.env:/app/.env:ro \
124 | k2think-api
125 | ```
126 |
127 | ## API 接口
128 |
129 | ### 聊天补全
130 |
131 | **POST** `/v1/chat/completions`
132 |
133 | ```bash
134 | curl -X POST http://localhost:8001/v1/chat/completions \
135 | -H "Content-Type: application/json" \
136 | -H "Authorization: Bearer sk-k2think" \
137 | -d '{
138 | "model": "MBZUAI-IFM/K2-Think",
139 | "messages": [
140 | {"role": "user", "content": "你擅长什么?"}
141 | ],
142 | "stream": false
143 | }'
144 | ```
145 |
146 | ### 模型列表
147 |
148 | **GET** `/v1/models`
149 |
150 | ```bash
151 | curl http://localhost:8001/v1/models \
152 | -H "Authorization: Bearer sk-k2think"
153 | ```
154 |
155 | ### Token管理接口
156 |
157 | 查看token池状态:
158 |
159 | ```bash
160 | curl http://localhost:8001/admin/tokens/stats
161 | ```
162 |
163 | 查看连续失效状态:
164 |
165 | ```bash
166 | curl http://localhost:8001/admin/tokens/consecutive-failures
167 | ```
168 |
169 | 重置连续失效计数:
170 |
171 | ```bash
172 | curl -X POST http://localhost:8001/admin/tokens/reset-consecutive
173 | ```
174 |
175 | 重置指定token:
176 |
177 | ```bash
178 | curl -X POST http://localhost:8001/admin/tokens/reset/0
179 | ```
180 |
181 | 重置所有token:
182 |
183 | ```bash
184 | curl -X POST http://localhost:8001/admin/tokens/reset-all
185 | ```
186 |
187 | 重新加载token文件:
188 |
189 | ```bash
190 | curl -X POST http://localhost:8001/admin/tokens/reload
191 | ```
192 |
193 | 查看token更新器状态(仅在启用自动更新时可用):
194 |
195 | ```bash
196 | curl http://localhost:8001/admin/tokens/updater/status
197 | ```
198 |
199 | 强制更新tokens(仅在启用自动更新时可用):
200 |
201 | ```bash
202 | curl -X POST http://localhost:8001/admin/tokens/updater/force-update
203 | ```
204 |
205 | ### 健康检查
206 |
207 | ```bash
208 | curl http://localhost:8001/health
209 | ```
210 |
211 | ## 环境变量配置
212 |
213 | ### 基础配置
214 |
215 | | 变量名 | 默认值 | 说明 |
216 | | ------------------- | ------------------------------------------- | -------------------- |
217 | | `VALID_API_KEY` | 无默认值 | API 访问密钥(必需) |
218 | | `K2THINK_API_URL` | https://www.k2think.ai/api/chat/completions | K2Think API端点 |
219 |
220 | ### Token管理配置
221 |
222 | | 变量名 | 默认值 | 说明 |
223 | | ---------------------- | -------------- | ----------------- |
224 | | `TOKENS_FILE` | `tokens.txt` | Token文件路径 |
225 | | `MAX_TOKEN_FAILURES` | `3` | Token最大失败次数 |
226 |
227 | ### Token自动更新配置
228 |
229 | | 变量名 | 默认值 | 说明 |
230 | | ---------------------------- | ----------------- | --------------------------------------- |
231 | | `ENABLE_TOKEN_AUTO_UPDATE` | `false` | 是否启用token自动更新 |
232 | | `TOKEN_UPDATE_INTERVAL` | `86400` | token更新间隔(秒),默认24小时 |
233 | | `ACCOUNTS_FILE` | `accounts.txt` | 账户文件路径 |
234 | | `GET_TOKENS_SCRIPT` | `get_tokens.py` | token获取脚本路径 |
235 | | `PROXY_URL` | 空 | HTTP/HTTPS代理地址(用于get_tokens.py) |
236 |
237 | ### 服务器配置
238 |
239 | | 变量名 | 默认值 | 说明 |
240 | | -------- | ----------- | ------------ |
241 | | `HOST` | `0.0.0.0` | 服务监听地址 |
242 | | `PORT` | `8001` | 服务端口 |
243 |
244 | ### 工具调用配置
245 |
246 | | 变量名 | 默认值 | 说明 |
247 | | ------------------------- | -------- | -------------------------------- |
248 | | `ENABLE_TOOLIFY` | `true` | 是否启用工具调用功能 |
249 | | `TOOLIFY_CUSTOM_PROMPT` | `""` | 自定义工具调用提示词模板(可选) |
250 |
251 | 详细配置说明请参考 `.env.example` 文件。
252 |
253 | ## 智能Token管理系统详解
254 |
255 | ### 连续失效自动刷新机制
256 |
257 | 这是系统的核心自愈功能,当检测到连续的token失效时,自动触发强制刷新:
258 |
259 | #### 工作原理
260 |
261 | 1. **连续失效检测**
262 |
263 | - 系统跟踪连续失效的token数量
264 | - 当连续两个token失效时触发自动刷新
265 | - 仅在token池数量大于2时启用(避免小规模token池误触发)
266 | 2. **智能触发条件**
267 |
268 | - 连续失效阈值:2个token
269 | - 最小token池大小:3个token
270 | - 自动更新必须启用:`ENABLE_TOKEN_AUTO_UPDATE=true`
271 | 3. **自动刷新过程**
272 |
273 | - 异步执行,不阻塞当前API请求
274 | - 使用原子性更新机制
275 | - 刷新成功后自动重新加载token池
276 | - 重置连续失效计数器
277 |
278 | #### 监控和管理
279 |
280 | ```bash
281 | # 查看连续失效状态
282 | curl http://localhost:8001/admin/tokens/consecutive-failures
283 |
284 | # 响应示例
285 | {
286 | "status": "success",
287 | "data": {
288 | "consecutive_failures": 1,
289 | "threshold": 2,
290 | "token_pool_size": 710,
291 | "auto_refresh_enabled": true,
292 | "last_check": "实时检测"
293 | }
294 | }
295 |
296 | # 手动重置连续失效计数
297 | curl -X POST http://localhost:8001/admin/tokens/reset-consecutive
298 | ```
299 |
300 | ### Token自动更新机制
301 |
302 | #### 功能说明
303 |
304 | Token自动更新机制允许系统定期从账户文件自动生成新的token池,无需手动维护tokens.txt文件。
305 |
306 | #### 配置步骤
307 |
308 | 1. **准备账户文件**
309 |
310 | 创建 `accounts.txt` 文件,每行一个JSON格式的账户信息:
311 |
312 | ```json
313 | {"email": "user1@example.com", "k2_password": "password1"}
314 | {"email": "user2@example.com", "k2_password": "password2"}
315 | {"email": "user3@example.com", "k2_password": "password3"}
316 | ```
317 |
318 | 2. **启用自动更新**
319 |
320 | 在 `.env` 文件中配置:
321 |
322 | ```bash
323 | # 启用token自动更新
324 | ENABLE_TOKEN_AUTO_UPDATE=true
325 |
326 | # 设置更新间隔(秒)
327 | TOKEN_UPDATE_INTERVAL=86400 # 每24小时更新一次
328 |
329 | # 配置文件路径
330 | ACCOUNTS_FILE=accounts.txt
331 | TOKENS_FILE=tokens.txt
332 | GET_TOKENS_SCRIPT=get_tokens.py
333 |
334 | # 可选:配置代理(如果需要)
335 | PROXY_URL=http://username:password@proxy_host:proxy_port
336 | ```
337 |
338 | 3. **更新触发方式**
339 |
340 | 系统支持多种更新触发方式:
341 |
342 | - **定时更新**: 按照设置的间隔定期更新
343 | - **连续失效触发**: 当连续两个token失效时自动触发
344 | - **手动强制更新**: 通过API手动触发更新
345 | - **启动时更新**: 如果token文件为空或无效,启动时立即更新
346 |
347 | #### 原子性更新机制
348 |
349 | 为了确保token更新过程中服务的连续性,系统采用了原子性更新机制:
350 |
351 | 1. **临时文件生成**: 新token首先写入 `tokens.txt.tmp` 临时文件
352 | 2. **验证检查**: 确认临时文件存在且不为空
353 | 3. **备份当前文件**: 将现有 `tokens.txt` 重命名为 `tokens.txt.backup`
354 | 4. **原子性替换**: 将临时文件重命名为 `tokens.txt`
355 | 5. **重新加载**: 通知token管理器重新加载新的token池
356 |
357 | #### 更新状态监控
358 |
359 | 通过管理接口可以实时监控更新状态:
360 |
361 | ```bash
362 | # 查看详细更新状态
363 | curl http://localhost:8001/admin/tokens/updater/status
364 |
365 | # 响应示例
366 | {
367 | "status": "success",
368 | "data": {
369 | "is_running": true,
370 | "is_updating": false,
371 | "update_interval": 86400,
372 | "last_update": "2024-01-01T12:00:00",
373 | "update_count": 5,
374 | "error_count": 0,
375 | "last_error": null,
376 | "next_update": "2024-01-01T13:00:00",
377 | "files": {
378 | "get_tokens_script": true,
379 | "accounts_file": true,
380 | "tokens_file": true
381 | }
382 | }
383 | }
384 | ```
385 |
386 | #### 服务保障特性
387 |
388 | - ✅ **零停机时间**: 更新过程中API服务保持可用
389 | - ✅ **请求不中断**: 正在处理的请求不会受到影响
390 | - ✅ **自动恢复**: 连续失效时自动触发刷新
391 | - ✅ **回滚机制**: 更新失败时保留原有token文件
392 | - ✅ **状态透明**: 可实时查看更新进度和状态
393 | - ✅ **错误处理**: 更新失败时记录详细错误信息
394 |
395 | ## 工具调用功能
396 |
397 | K2Think API 代理支持 OpenAI Function Calling 规范的工具调用功能。
398 |
399 | ### 功能特性
400 |
401 | - ✅ 支持 OpenAI 标准的 `tools` 和 `tool_choice` 参数
402 | - ✅ 自动工具提示注入和消息处理
403 | - ✅ 流式和非流式响应中的工具调用检测
404 | - ✅ 智能 JSON 解析和工具调用提取
405 | - ✅ 支持多种工具调用格式(JSON 代码块、内联 JSON、自然语言)
406 |
407 | ### 使用示例
408 |
409 | ```python
410 | import openai
411 |
412 | client = openai.OpenAI(
413 | base_url="http://localhost:8001/v1",
414 | api_key="sk-k2think"
415 | )
416 |
417 | # 定义工具
418 | tools = [
419 | {
420 | "type": "function",
421 | "function": {
422 | "name": "get_weather",
423 | "description": "获取指定城市的天气信息",
424 | "parameters": {
425 | "type": "object",
426 | "properties": {
427 | "city": {
428 | "type": "string",
429 | "description": "城市名称,例如:北京、上海"
430 | },
431 | "unit": {
432 | "type": "string",
433 | "enum": ["celsius", "fahrenheit"],
434 | "description": "温度单位"
435 | }
436 | },
437 | "required": ["city"]
438 | }
439 | }
440 | }
441 | ]
442 |
443 | # 发送工具调用请求
444 | response = client.chat.completions.create(
445 | model="MBZUAI-IFM/K2-Think",
446 | messages=[
447 | {"role": "user", "content": "北京今天天气怎么样?"}
448 | ],
449 | tools=tools,
450 | tool_choice="auto" # auto, none, required 或指定特定工具
451 | )
452 |
453 | # 处理响应
454 | if response.choices[0].message.tool_calls:
455 | for tool_call in response.choices[0].message.tool_calls:
456 | function_name = tool_call.function.name
457 | function_args = tool_call.function.arguments
458 | print(f"调用工具: {function_name}")
459 | print(f"参数: {function_args}")
460 | ```
461 |
462 | ### tool_choice 参数说明
463 |
464 | - `"auto"`: 让模型自动决定是否使用工具(推荐)
465 | - `"none"`: 禁用工具调用
466 | - `"required"`: 强制模型使用工具
467 | - `{"type": "function", "function": {"name": "tool_name"}}`: 强制使用特定工具
468 |
469 | ## Python SDK 使用示例
470 |
471 | ```python
472 | import openai
473 |
474 | # 配置客户端
475 | client = openai.OpenAI(
476 | base_url="http://localhost:8001/v1",
477 | api_key="sk-k2think"
478 | )
479 |
480 | # 发送聊天请求
481 | response = client.chat.completions.create(
482 | model="MBZUAI-IFM/K2-Think",
483 | messages=[
484 | {"role": "user", "content": "解释一下量子计算的基本原理"}
485 | ],
486 | stream=False
487 | )
488 |
489 | print(response.choices[0].message.content)
490 |
491 | # 流式聊天
492 | stream = client.chat.completions.create(
493 | model="MBZUAI-IFM/K2-Think",
494 | messages=[
495 | {"role": "user", "content": "写一首关于人工智能的诗"}
496 | ],
497 | stream=True
498 | )
499 |
500 | for chunk in stream:
501 | if chunk.choices[0].delta.content is not None:
502 | print(chunk.choices[0].delta.content, end="")
503 | ```
504 |
505 | ## 模型特性
506 |
507 | K2-Think 模型具有以下特点:
508 |
509 | - **推理能力**: 模型会先进行思考过程,然后给出答案
510 | - **响应格式**: 使用 `` 和 `` 标签结构化输出
511 | - **思考内容控制**:
512 | - `MBZUAI-IFM/K2-Think`: 包含完整的思考过程
513 | - `MBZUAI-IFM/K2-Think-nothink`: 仅输出最终答案
514 | - **多语言支持**: 支持中文、英文等多种语言
515 | - **专业领域**: 在数学、科学、编程等领域表现优秀
516 |
517 | ## 完整配置示例
518 |
519 | ### .env 文件示例
520 |
521 | ```bash
522 | # 基础配置
523 | VALID_API_KEY=sk-k2think
524 | HOST=0.0.0.0
525 | PORT=8001
526 |
527 | # Token管理
528 | TOKENS_FILE=tokens.txt
529 | MAX_TOKEN_FAILURES=3
530 |
531 | # Token自动更新(推荐)
532 | ENABLE_TOKEN_AUTO_UPDATE=true
533 | TOKEN_UPDATE_INTERVAL=86400 # 24小时
534 | ACCOUNTS_FILE=accounts.txt
535 | GET_TOKENS_SCRIPT=get_tokens.py
536 |
537 | # 代理配置(可选)
538 | PROXY_URL=http://username:password@proxy.example.com:8080
539 |
540 | # 功能开关
541 | ENABLE_TOOLIFY=true
542 | DEBUG_LOGGING=false
543 |
544 | # 工具调用配置(可选)
545 | # TOOLIFY_CUSTOM_PROMPT="自定义提示词模板"
546 | ```
547 |
548 | ### accounts.txt 文件示例
549 |
550 | ```json
551 | {"email": "user1@example.com", "k2_password": "password1"}
552 | {"email": "user2@example.com", "k2_password": "password2"}
553 | ```
554 |
555 | ## 故障排除
556 |
557 | ### 常见问题
558 |
559 | 1. **Token 相关问题**
560 |
561 | - **所有token失效**: 访问 `/admin/tokens/stats` 查看token状态,使用 `/admin/tokens/reset-all` 重置所有token
562 | - **连续失效**: 查看 `/admin/tokens/consecutive-failures` 了解连续失效状态,系统会自动触发刷新
563 | - **添加新token**:
564 | - 手动模式:编辑 `tokens.txt` 文件添加新token,然后访问 `/admin/tokens/reload` 重新加载
565 | - 自动模式:编辑 `accounts.txt` 添加新账户,然后访问 `/admin/tokens/updater/force-update` 强制更新
566 | - **查看token状态**: 访问 `/health` 端点查看简要统计,或 `/admin/tokens/stats` 查看详细信息
567 | - **自动更新问题**:
568 | - 访问 `/admin/tokens/updater/status` 查看更新器状态和错误信息
569 | - 检查 `is_updating` 字段确认是否正在更新中
570 | - 查看 `last_error` 字段了解最近的错误信息
571 | 2. **端口冲突**
572 |
573 | - 修改 `PORT` 环境变量
574 | - 或使用 Docker 端口映射
575 |
576 | ### 日志查看
577 |
578 | ```bash
579 | # Docker 容器日志
580 | docker logs k2think-api
581 |
582 | # docker-compose日志
583 | docker-compose logs -f k2think-api
584 |
585 | # 本地运行日志
586 | # 日志会直接输出到控制台
587 | ```
588 |
589 | ### 配置检查
590 |
591 | 使用配置检查脚本验证你的环境变量设置:
592 |
593 | ```bash
594 | # 检查当前配置
595 | python check_config_simple.py
596 |
597 | # 查看配置示例
598 | python check_config_simple.py --example
599 | ```
600 |
601 | ### Docker部署注意事项
602 |
603 | 1. **文件映射**
604 |
605 | - `tokens.txt` 通过volume映射到容器内,支持动态更新
606 | - 如果启用自动更新,`tokens.txt` 不能设置为只读(`:ro`)
607 | - `accounts.txt` 映射为只读,包含账户信息用于自动更新
608 | - `.env` 文件包含所有环境变量配置
609 | 2. **健康检查**
610 |
611 | - Docker容器包含健康检查机制
612 | - 可通过 `docker ps` 查看健康状态
613 | 3. **安全考虑**
614 |
615 | - 容器以非root用户运行
616 | - 敏感文件通过volume挂载而非打包到镜像中
617 |
618 | ## 许可证
619 |
620 | MIT License
621 |
622 | ## 贡献
623 |
624 | 欢迎提交 Issue 和 Pull Request!
625 |
--------------------------------------------------------------------------------
/src/token_manager.py:
--------------------------------------------------------------------------------
1 | """
2 | Token管理模块
3 | 负责管理K2Think的token池,实现轮询、负载均衡和失效标记
4 | """
5 | import os
6 | import json
7 | import logging
8 | import threading
9 |
10 | from typing import List, Dict, Optional, Tuple
11 | from datetime import datetime, timedelta
12 |
13 | logger = logging.getLogger(__name__)
14 |
15 | # 导入安全日志函数
16 | try:
17 | from src.utils import safe_log_error, safe_log_info, safe_log_warning
18 | except ImportError:
19 | # 如果导入失败,提供简单的替代函数
20 | def safe_log_error(logger, msg, exc=None):
21 | try:
22 | if exc:
23 | logger.error(f"{msg}: {str(exc)}")
24 | else:
25 | logger.error(msg)
26 | except:
27 | print(f"Log error: {msg}")
28 |
29 | def safe_log_info(logger, msg):
30 | try:
31 | logger.info(msg)
32 | except:
33 | print(f"Log info: {msg}")
34 |
35 | def safe_log_warning(logger, msg):
36 | try:
37 | logger.warning(msg)
38 | except:
39 | print(f"Log warning: {msg}")
40 |
41 | class TokenManager:
42 | """Token管理器 - 支持轮询、负载均衡和失效标记"""
43 |
44 | def __init__(self, tokens_file: str = "tokens.txt", max_failures: int = 3, allow_empty: bool = False):
45 | """
46 | 初始化token管理器
47 |
48 | Args:
49 | tokens_file: token文件路径
50 | max_failures: 最大失败次数,超过后标记为失效
51 | allow_empty: 是否允许空的token文件(用于自动更新模式)
52 | """
53 | self.tokens_file = tokens_file
54 | self.max_failures = max_failures
55 | self.tokens: List[Dict] = []
56 | self.current_index = 0
57 | self.lock = threading.Lock()
58 | self.allow_empty = allow_empty
59 |
60 | # 连续失效检测
61 | self.consecutive_failures = 0
62 | self.consecutive_failure_threshold = 2 # 连续失效阈值
63 | self.force_refresh_callback = None # 强制刷新回调函数
64 |
65 | # 上游服务连续报错检测
66 | self.consecutive_upstream_errors = 0
67 | self.upstream_error_threshold = 2 # 上游服务连续报错阈值
68 | self.last_upstream_error_time = None
69 |
70 | # 加载tokens
71 | self.load_tokens()
72 |
73 | if not self.tokens and not allow_empty:
74 | raise ValueError(f"未找到有效的token,请检查文件: {tokens_file}")
75 |
76 | def load_tokens(self) -> None:
77 | """从文件加载token列表"""
78 | try:
79 | if not os.path.exists(self.tokens_file):
80 | raise FileNotFoundError(f"Token文件不存在: {self.tokens_file}")
81 |
82 | with open(self.tokens_file, 'r', encoding='utf-8') as f:
83 | lines = f.readlines()
84 |
85 | self.tokens = []
86 | valid_token_index = 0
87 | for line in lines:
88 | token = line.strip()
89 | # 忽略空行和注释行
90 | if token and not token.startswith('#'):
91 | self.tokens.append({
92 | 'token': token,
93 | 'failures': 0,
94 | 'is_active': True,
95 | 'last_used': None,
96 | 'last_failure': None,
97 | 'index': valid_token_index
98 | })
99 | valid_token_index += 1
100 |
101 | safe_log_info(logger, f"成功加载 {len(self.tokens)} 个token")
102 |
103 | except Exception as e:
104 | safe_log_error(logger, "加载token文件失败", e)
105 | raise
106 |
107 |
108 | def get_next_token(self) -> Optional[str]:
109 | """
110 | 获取下一个可用的token(轮询算法)
111 |
112 | Returns:
113 | 可用的token字符串,如果没有可用token则返回None
114 | """
115 | with self.lock:
116 | active_tokens = [t for t in self.tokens if t['is_active']]
117 |
118 | if not active_tokens:
119 | if self.allow_empty:
120 | safe_log_warning(logger, "没有可用的token,可能正在等待自动更新")
121 | else:
122 | safe_log_warning(logger, "没有可用的token")
123 | return None
124 |
125 | # 轮询算法:从当前索引开始寻找下一个可用token
126 | attempts = 0
127 | while attempts < len(self.tokens):
128 | token_info = self.tokens[self.current_index]
129 |
130 | if token_info['is_active']:
131 | # 更新使用时间
132 | token_info['last_used'] = datetime.now()
133 | token = token_info['token']
134 |
135 | # 移动到下一个索引
136 | self.current_index = (self.current_index + 1) % len(self.tokens)
137 |
138 | logger.debug(f"分配token (索引: {token_info['index']}, 失败次数: {token_info['failures']})")
139 | return token
140 |
141 | # 移动到下一个token
142 | self.current_index = (self.current_index + 1) % len(self.tokens)
143 | attempts += 1
144 |
145 | safe_log_warning(logger, "所有token都已失效")
146 | return None
147 |
148 | def mark_token_failure(self, token: str, error_message: str = "") -> bool:
149 | """
150 | 标记token使用失败
151 |
152 | Args:
153 | token: 失败的token
154 | error_message: 错误信息
155 |
156 | Returns:
157 | 如果token被标记为失效返回True,否则返回False
158 | """
159 | with self.lock:
160 | for token_info in self.tokens:
161 | if token_info['token'] == token:
162 | token_info['failures'] += 1
163 | token_info['last_failure'] = datetime.now()
164 |
165 | # 检查是否是上游服务错误(401等认证错误)
166 | is_upstream_error = self._is_upstream_error(error_message)
167 |
168 | if is_upstream_error:
169 | # 增加上游服务连续报错计数
170 | self.consecutive_upstream_errors += 1
171 | self.last_upstream_error_time = datetime.now()
172 |
173 | safe_log_warning(logger, f"🔒 上游服务认证错误 (索引: {token_info['index']}, "
174 | f"失败次数: {token_info['failures']}/{self.max_failures}, "
175 | f"连续上游错误: {self.consecutive_upstream_errors}): {error_message}")
176 |
177 | # 401错误立即触发强制刷新(不等连续错误阈值)
178 | if "401" in error_message and self.force_refresh_callback:
179 | safe_log_warning(logger, f"🚨 检测到401认证错误,立即触发token强制刷新")
180 | self._trigger_force_refresh("401认证失败")
181 | # 重置连续计数,避免重复触发
182 | self.consecutive_upstream_errors = 0
183 | else:
184 | # 其他上游错误按原逻辑处理
185 | self._check_consecutive_upstream_errors()
186 | else:
187 | # 增加连续失效计数
188 | self.consecutive_failures += 1
189 |
190 | safe_log_warning(logger, f"Token失败 (索引: {token_info['index']}, "
191 | f"失败次数: {token_info['failures']}/{self.max_failures}, "
192 | f"连续失效: {self.consecutive_failures}): {error_message}")
193 |
194 | # 检查连续失效触发条件
195 | self._check_consecutive_failures()
196 |
197 | # 检查是否达到最大失败次数
198 | if token_info['failures'] >= self.max_failures:
199 | token_info['is_active'] = False
200 | safe_log_error(logger, f"Token已失效 (索引: {token_info['index']}, "
201 | f"失败次数: {token_info['failures']})")
202 | return True
203 |
204 | return False
205 |
206 | safe_log_warning(logger, "未找到匹配的token进行失败标记")
207 | return False
208 |
209 | def mark_token_success(self, token: str) -> None:
210 | """
211 | 标记token使用成功(重置失败计数)
212 |
213 | Args:
214 | token: 成功的token
215 | """
216 | with self.lock:
217 | for token_info in self.tokens:
218 | if token_info['token'] == token:
219 | if token_info['failures'] > 0:
220 | safe_log_info(logger, f"Token恢复 (索引: {token_info['index']}, "
221 | f"重置失败次数: {token_info['failures']} -> 0)")
222 | token_info['failures'] = 0
223 |
224 | # 成功请求重置上游服务错误计数
225 | if self.consecutive_upstream_errors > 0:
226 | safe_log_info(logger, f"重置上游服务连续错误计数: {self.consecutive_upstream_errors} -> 0")
227 | self.consecutive_upstream_errors = 0
228 |
229 | # 注意:不再自动重置连续失效计数,只有手动重置或强制刷新成功后才重置
230 | return
231 |
232 | def get_token_stats(self) -> Dict:
233 | """
234 | 获取token池统计信息
235 |
236 | Returns:
237 | 包含统计信息的字典
238 | """
239 | with self.lock:
240 | total = len(self.tokens)
241 | active = sum(1 for t in self.tokens if t['is_active'])
242 | inactive = total - active
243 |
244 | failure_distribution = {}
245 | for token_info in self.tokens:
246 | failures = token_info['failures']
247 | failure_distribution[failures] = failure_distribution.get(failures, 0) + 1
248 |
249 | return {
250 | 'total_tokens': total,
251 | 'active_tokens': active,
252 | 'inactive_tokens': inactive,
253 | 'current_index': self.current_index,
254 | 'failure_distribution': failure_distribution,
255 | 'max_failures': self.max_failures
256 | }
257 |
258 | def reset_token(self, token_index: int) -> bool:
259 | """
260 | 重置指定索引的token(清除失败计数,重新激活)
261 |
262 | Args:
263 | token_index: token索引
264 |
265 | Returns:
266 | 重置成功返回True,否则返回False
267 | """
268 | with self.lock:
269 | if 0 <= token_index < len(self.tokens):
270 | token_info = self.tokens[token_index]
271 | old_failures = token_info['failures']
272 | old_active = token_info['is_active']
273 |
274 | token_info['failures'] = 0
275 | token_info['is_active'] = True
276 | token_info['last_failure'] = None
277 |
278 | safe_log_info(logger, f"Token重置 (索引: {token_index}, "
279 | f"失败次数: {old_failures} -> 0, "
280 | f"状态: {old_active} -> True)")
281 | return True
282 |
283 | safe_log_warning(logger, f"无效的token索引: {token_index}")
284 | return False
285 |
286 | def reset_all_tokens(self) -> None:
287 | """重置所有token(清除所有失败计数,重新激活所有token)"""
288 | with self.lock:
289 | reset_count = 0
290 | for token_info in self.tokens:
291 | if token_info['failures'] > 0 or not token_info['is_active']:
292 | token_info['failures'] = 0
293 | token_info['is_active'] = True
294 | token_info['last_failure'] = None
295 | reset_count += 1
296 |
297 | safe_log_info(logger, f"重置了 {reset_count} 个token,当前活跃token数: {len(self.tokens)}")
298 |
299 | def reload_tokens(self) -> None:
300 | """重新加载token文件"""
301 | safe_log_info(logger, "重新加载token文件...")
302 | old_count = len(self.tokens)
303 | self.load_tokens()
304 | new_count = len(self.tokens)
305 |
306 | safe_log_info(logger, f"Token重新加载完成: {old_count} -> {new_count}")
307 |
308 | def get_token_by_index(self, index: int) -> Optional[Dict]:
309 | """根据索引获取token信息"""
310 | with self.lock:
311 | if 0 <= index < len(self.tokens):
312 | return self.tokens[index].copy()
313 | return None
314 |
315 | def set_force_refresh_callback(self, callback):
316 | """
317 | 设置强制刷新回调函数
318 |
319 | Args:
320 | callback: 当需要强制刷新时调用的异步函数
321 | """
322 | self.force_refresh_callback = callback
323 | safe_log_info(logger, "已设置强制刷新回调函数")
324 |
325 | def _is_upstream_error(self, error_message: str) -> bool:
326 | """
327 | 判断是否为上游服务错误
328 |
329 | Args:
330 | error_message: 错误信息
331 |
332 | Returns:
333 | 如果是上游服务错误返回True,否则返回False
334 | """
335 | # 检查常见的上游服务错误标识
336 | upstream_error_indicators = [
337 | "上游服务错误: 401",
338 | "上游服务错误: 403",
339 | "401",
340 | "403",
341 | "unauthorized",
342 | "forbidden",
343 | "invalid token",
344 | "authentication failed",
345 | "token expired",
346 | "authentication error",
347 | "invalid_request_error",
348 | "authentication_error"
349 | ]
350 |
351 | error_lower = error_message.lower()
352 | is_upstream = any(indicator.lower() in error_lower for indicator in upstream_error_indicators)
353 |
354 | # 特别检查HTTP状态码模式
355 | import re
356 | # 匹配 "上游服务错误: xxx" 或 "HTTP状态错误: xxx" 等格式中的401/403
357 | status_code_pattern = r'(?:上游服务错误|http状态错误|状态码):\s*(?:40[13])'
358 | if re.search(status_code_pattern, error_lower):
359 | is_upstream = True
360 |
361 | if is_upstream:
362 | safe_log_info(logger, f"检测到上游服务认证错误: {error_message}")
363 |
364 | return is_upstream
365 |
366 | def _check_consecutive_upstream_errors(self):
367 | """
368 | 检查上游服务连续报错情况,触发强制刷新机制
369 | """
370 | if self.consecutive_upstream_errors >= self.upstream_error_threshold:
371 | safe_log_warning(logger, f"🚨 检测到连续{self.consecutive_upstream_errors}个上游服务认证错误(401/403),触发自动刷新token池")
372 |
373 | # 重置上游错误计数,避免重复触发
374 | self.consecutive_upstream_errors = 0
375 |
376 | if self.force_refresh_callback:
377 | self._trigger_force_refresh("上游服务连续认证失败 (401/403)")
378 | else:
379 | safe_log_warning(logger, "⚠️ 未设置强制刷新回调函数,无法自动刷新token池")
380 |
381 | def _check_consecutive_failures(self):
382 | """
383 | 检查连续失效情况,触发强制刷新机制
384 | """
385 | # 只有在token池数量大于2时才检查连续失效
386 | if len(self.tokens) <= 2:
387 | logger.debug(f"Token池数量({len(self.tokens)})不足,跳过连续失效检查")
388 | return
389 |
390 | if self.consecutive_failures >= self.consecutive_failure_threshold:
391 | safe_log_warning(logger, f"检测到连续{self.consecutive_failures}个token失效,触发强制刷新机制")
392 |
393 | if self.force_refresh_callback:
394 | self._trigger_force_refresh("连续token失效")
395 | else:
396 | safe_log_warning(logger, "未设置强制刷新回调函数,无法自动刷新token池")
397 |
398 | def _trigger_force_refresh(self, reason: str):
399 | """
400 | 触发强制刷新
401 |
402 | Args:
403 | reason: 触发原因
404 | """
405 | try:
406 | # 异步调用强制刷新
407 | import asyncio
408 | import threading
409 |
410 | def run_async_callback():
411 | try:
412 | # 创建新的事件循环(如果当前线程没有)
413 | try:
414 | loop = asyncio.get_event_loop()
415 | except RuntimeError:
416 | loop = asyncio.new_event_loop()
417 | asyncio.set_event_loop(loop)
418 |
419 | # 运行强制刷新(现在是同步函数)
420 | self.force_refresh_callback()
421 |
422 | safe_log_info(logger, f"🔄 强制刷新tokens.txt已触发 - 原因: {reason}")
423 |
424 | except Exception as e:
425 | safe_log_error(logger, "执行强制刷新回调失败", e)
426 |
427 | # 在新线程中执行,避免阻塞当前操作
428 | refresh_thread = threading.Thread(target=run_async_callback, daemon=True)
429 | refresh_thread.start()
430 |
431 | except Exception as e:
432 | safe_log_error(logger, "启动强制刷新线程失败", e)
433 |
434 | def get_consecutive_failures(self) -> int:
435 | """获取当前连续失效次数"""
436 | return self.consecutive_failures
437 |
438 | def get_consecutive_upstream_errors(self) -> int:
439 | """获取当前上游服务连续错误次数"""
440 | return self.consecutive_upstream_errors
441 |
442 | def reset_consecutive_failures(self):
443 | """重置连续失效计数"""
444 | with self.lock:
445 | old_count = self.consecutive_failures
446 | old_upstream_count = self.consecutive_upstream_errors
447 |
448 | self.consecutive_failures = 0
449 | self.consecutive_upstream_errors = 0
450 |
451 | if old_count > 0:
452 | safe_log_info(logger, f"手动重置连续失效计数: {old_count} -> 0")
453 | if old_upstream_count > 0:
454 | safe_log_info(logger, f"手动重置上游服务连续错误计数: {old_upstream_count} -> 0")
455 |
456 |
457 |
--------------------------------------------------------------------------------
/src/response_processor.py:
--------------------------------------------------------------------------------
1 | """
2 | 响应处理模块
3 | 处理流式和非流式响应的所有逻辑
4 | """
5 | import json
6 | import time
7 | import asyncio
8 | import logging
9 | import uuid
10 | from datetime import datetime
11 | from typing import Dict, AsyncGenerator, Tuple, Optional
12 | import pytz
13 | import httpx
14 |
15 | from src.constants import (
16 | APIConstants, ResponseConstants, ContentConstants,
17 | NumericConstants, TimeConstants, HeaderConstants
18 | )
19 | from src.exceptions import UpstreamError, TimeoutError as ProxyTimeoutError
20 | from src.utils import safe_log_error, safe_log_info, safe_log_warning
21 | from src.toolify_config import get_toolify
22 | from src.toolify.detector import StreamingFunctionCallDetector
23 |
24 | logger = logging.getLogger(__name__)
25 |
26 | class ResponseProcessor:
27 | """响应处理器"""
28 |
29 | def __init__(self, config):
30 | self.config = config
31 |
32 | def extract_answer_content(self, full_content: str, output_thinking: bool = True) -> str:
33 | """删除第一个标签和最后一个标签,保留内容"""
34 | if not full_content:
35 | return full_content
36 |
37 | # 完全通过模型名控制思考内容输出,默认显示思考内容
38 | should_output_thinking = output_thinking
39 |
40 | if should_output_thinking:
41 | # 删除第一个
42 | answer_start = full_content.find(ContentConstants.ANSWER_START_TAG)
43 | if answer_start != -1:
44 | full_content = full_content[:answer_start] + full_content[answer_start + len(ContentConstants.ANSWER_START_TAG):]
45 |
46 | # 删除最后一个
47 | answer_end = full_content.rfind(ContentConstants.ANSWER_END_TAG)
48 | if answer_end != -1:
49 | full_content = full_content[:answer_end] + full_content[answer_end + len(ContentConstants.ANSWER_END_TAG):]
50 |
51 | return full_content.strip()
52 | else:
53 | # 删除部分(包括标签)
54 | think_start = full_content.find(ContentConstants.THINK_START_TAG)
55 | think_end = full_content.find(ContentConstants.THINK_END_TAG)
56 | if think_start != -1 and think_end != -1:
57 | full_content = full_content[:think_start] + full_content[think_end + len(ContentConstants.THINK_END_TAG):]
58 |
59 | # 删除标签及其内容之外的部分
60 | answer_start = full_content.find(ContentConstants.ANSWER_START_TAG)
61 | answer_end = full_content.rfind(ContentConstants.ANSWER_END_TAG)
62 | if answer_start != -1 and answer_end != -1:
63 | content = full_content[answer_start + len(ContentConstants.ANSWER_START_TAG):answer_end]
64 | return content.strip()
65 |
66 | return full_content.strip()
67 |
68 | def calculate_dynamic_chunk_size(self, content_length: int) -> int:
69 | """
70 | 动态计算流式输出的chunk大小
71 | 确保总输出时间不超过MAX_STREAM_TIME秒
72 |
73 | Args:
74 | content_length: 待输出内容的总长度
75 |
76 | Returns:
77 | int: 动态计算的chunk大小,最小为50
78 | """
79 | if content_length <= 0:
80 | return self.config.STREAM_CHUNK_SIZE
81 |
82 | # 计算需要的总chunk数量以满足时间限制
83 | # 总时间 = chunk数量 * STREAM_DELAY
84 | # chunk数量 = content_length / chunk_size
85 | # 所以:总时间 = (content_length / chunk_size) * STREAM_DELAY
86 | # 解出:chunk_size = (content_length * STREAM_DELAY) / MAX_STREAM_TIME
87 |
88 | calculated_chunk_size = int((content_length * self.config.STREAM_DELAY) / self.config.MAX_STREAM_TIME)
89 |
90 | # 确保chunk_size不小于最小值
91 | dynamic_chunk_size = max(calculated_chunk_size, NumericConstants.MIN_CHUNK_SIZE)
92 |
93 | # 如果计算出的chunk_size太大(比如内容很短),使用默认值
94 | if dynamic_chunk_size > content_length:
95 | dynamic_chunk_size = min(self.config.STREAM_CHUNK_SIZE, content_length)
96 |
97 | logger.debug(f"动态chunk_size计算: 内容长度={content_length}, 计算值={calculated_chunk_size}, 最终值={dynamic_chunk_size}")
98 |
99 | return dynamic_chunk_size
100 |
101 | def content_to_multimodal(self, content) -> str | list[dict]:
102 | """将内容转换为多模态格式用于K2Think API"""
103 | if content is None:
104 | return ""
105 | if isinstance(content, str):
106 | return content
107 | if isinstance(content, list):
108 | # 检查是否包含图像内容
109 | has_image = False
110 | result_parts = []
111 |
112 | for p in content:
113 | if hasattr(p, 'type'): # ContentPart object
114 | if getattr(p, 'type') == ContentConstants.TEXT_TYPE and getattr(p, 'text', None):
115 | result_parts.append({
116 | "type": ContentConstants.TEXT_TYPE,
117 | "text": getattr(p, 'text')
118 | })
119 | elif getattr(p, 'type') == ContentConstants.IMAGE_URL_TYPE and getattr(p, 'image_url', None):
120 | has_image = True
121 | image_url_obj = getattr(p, 'image_url')
122 | if hasattr(image_url_obj, 'url'):
123 | url = getattr(image_url_obj, 'url')
124 | else:
125 | url = image_url_obj.get('url') if isinstance(image_url_obj, dict) else str(image_url_obj)
126 |
127 | result_parts.append({
128 | "type": ContentConstants.IMAGE_URL_TYPE,
129 | "image_url": {
130 | "url": url
131 | }
132 | })
133 | elif isinstance(p, dict):
134 | if p.get("type") == ContentConstants.TEXT_TYPE and p.get("text"):
135 | result_parts.append({
136 | "type": ContentConstants.TEXT_TYPE,
137 | "text": p.get("text")
138 | })
139 | elif p.get("type") == ContentConstants.IMAGE_URL_TYPE and p.get("image_url"):
140 | has_image = True
141 | result_parts.append({
142 | "type": ContentConstants.IMAGE_URL_TYPE,
143 | "image_url": p.get("image_url")
144 | })
145 | elif isinstance(p, str):
146 | result_parts.append({
147 | "type": ContentConstants.TEXT_TYPE,
148 | "text": p
149 | })
150 |
151 | # 如果包含图像,返回多模态格式;否则返回纯文本
152 | if has_image and result_parts:
153 | return result_parts
154 | else:
155 | # 提取所有文本内容
156 | text_parts = []
157 | for part in result_parts:
158 | if part.get("type") == ContentConstants.TEXT_TYPE:
159 | text_parts.append(part.get("text", ""))
160 | return " ".join(text_parts)
161 |
162 | # 处理其他类型
163 | try:
164 | return str(content)
165 | except:
166 | return ""
167 |
168 | def get_current_datetime_info(self) -> Dict[str, str]:
169 | """获取当前时间信息"""
170 | # 设置时区为上海
171 | tz = pytz.timezone(ContentConstants.DEFAULT_TIMEZONE)
172 | now = datetime.now(tz)
173 |
174 | return {
175 | "{{USER_NAME}}": ContentConstants.DEFAULT_USER_NAME,
176 | "{{USER_LOCATION}}": ContentConstants.DEFAULT_USER_LOCATION,
177 | "{{CURRENT_DATETIME}}": now.strftime(TimeConstants.DATETIME_FORMAT),
178 | "{{CURRENT_DATE}}": now.strftime(TimeConstants.DATE_FORMAT),
179 | "{{CURRENT_TIME}}": now.strftime(TimeConstants.TIME_FORMAT),
180 | "{{CURRENT_WEEKDAY}}": now.strftime(TimeConstants.WEEKDAY_FORMAT),
181 | "{{CURRENT_TIMEZONE}}": ContentConstants.DEFAULT_TIMEZONE,
182 | "{{USER_LANGUAGE}}": ContentConstants.DEFAULT_USER_LANGUAGE
183 | }
184 |
185 | def generate_session_id(self) -> str:
186 | """生成会话ID"""
187 | return str(uuid.uuid4())
188 |
189 | def generate_chat_id(self) -> str:
190 | """生成聊天ID"""
191 | return str(uuid.uuid4())
192 |
193 | async def create_http_client(self) -> httpx.AsyncClient:
194 | """创建HTTP客户端"""
195 | base_kwargs = {
196 | "timeout": httpx.Timeout(timeout=None, connect=10.0),
197 | "limits": httpx.Limits(
198 | max_keepalive_connections=self.config.MAX_KEEPALIVE_CONNECTIONS,
199 | max_connections=self.config.MAX_CONNECTIONS
200 | ),
201 | "follow_redirects": True
202 | }
203 |
204 | try:
205 | return httpx.AsyncClient(**base_kwargs)
206 | except Exception as e:
207 | safe_log_error(logger, "创建客户端失败", e)
208 | raise e
209 |
210 | async def make_request(
211 | self,
212 | method: str,
213 | url: str,
214 | headers: dict,
215 | json_data: dict = None,
216 | stream: bool = False
217 | ) -> httpx.Response:
218 | """发送HTTP请求"""
219 | client = None
220 |
221 | try:
222 | client = await self.create_http_client()
223 |
224 | if stream:
225 | # 流式请求返回context manager
226 | return client.stream(method, url, headers=headers, json=json_data, timeout=None)
227 | else:
228 | response = await client.request(
229 | method, url, headers=headers, json=json_data,
230 | timeout=self.config.REQUEST_TIMEOUT
231 | )
232 |
233 | # 详细记录非200响应
234 | if response.status_code != APIConstants.HTTP_OK:
235 | safe_log_error(logger, f"上游API返回错误状态码: {response.status_code}")
236 | safe_log_error(logger, f"响应头: {dict(response.headers)}")
237 | try:
238 | error_body = response.text
239 | safe_log_error(logger, f"错误响应体: {error_body}")
240 | except:
241 | safe_log_error(logger, "无法读取错误响应体")
242 |
243 | response.raise_for_status()
244 | return response
245 |
246 | except httpx.HTTPStatusError as e:
247 | safe_log_error(logger, f"HTTP状态错误: {e.response.status_code} - {e.response.text}")
248 | if client and not stream:
249 | await client.aclose()
250 | raise UpstreamError(f"上游服务错误: {e.response.status_code}", e.response.status_code)
251 | except httpx.TimeoutException as e:
252 | safe_log_error(logger, "请求超时", e)
253 | if client and not stream:
254 | await client.aclose()
255 | raise ProxyTimeoutError("请求超时")
256 | except Exception as e:
257 | safe_log_error(logger, "请求异常", e)
258 | if client and not stream:
259 | await client.aclose()
260 | raise e
261 |
262 | async def process_non_stream_response(self, k2think_payload: dict, headers: dict, output_thinking: bool = None) -> Tuple[str, dict]:
263 | """处理非流式响应"""
264 | try:
265 | response = await self.make_request(
266 | "POST",
267 | self.config.K2THINK_API_URL,
268 | headers,
269 | k2think_payload,
270 | stream=False
271 | )
272 |
273 | # K2Think 非流式请求返回标准JSON格式
274 | result = response.json()
275 |
276 | # 提取内容
277 | full_content = ""
278 | if result.get('choices') and len(result['choices']) > 0:
279 | choice = result['choices'][0]
280 | if choice.get('message') and choice['message'].get('content'):
281 | raw_content = choice['message']['content']
282 | # 提取标签中的内容,去除标签
283 | full_content = self.extract_answer_content(raw_content, output_thinking)
284 |
285 | # 提取token信息
286 | token_info = result.get('usage', {
287 | "prompt_tokens": NumericConstants.DEFAULT_PROMPT_TOKENS,
288 | "completion_tokens": NumericConstants.DEFAULT_COMPLETION_TOKENS,
289 | "total_tokens": NumericConstants.DEFAULT_TOTAL_TOKENS
290 | })
291 |
292 | await response.aclose()
293 | return full_content, token_info
294 |
295 | except Exception as e:
296 | safe_log_error(logger, "处理非流式响应错误", e)
297 | raise
298 |
299 | async def process_stream_response(
300 | self,
301 | k2think_payload: dict,
302 | headers: dict,
303 | output_thinking: bool = None,
304 | original_model: str = None,
305 | enable_toolify: bool = False
306 | ) -> AsyncGenerator[str, None]:
307 | """处理流式响应"""
308 | try:
309 | # 发送开始chunk
310 | start_chunk = self._create_chunk_data(
311 | delta={"role": "assistant", "content": ""},
312 | finish_reason=None,
313 | model=original_model
314 | )
315 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(start_chunk)}\n\n"
316 |
317 | # 优化的模拟流式输出 - 立即开始获取响应并流式发送
318 | k2think_payload_copy = k2think_payload.copy()
319 | k2think_payload_copy["stream"] = False
320 |
321 | headers_copy = headers.copy()
322 | headers_copy[HeaderConstants.ACCEPT] = HeaderConstants.APPLICATION_JSON
323 |
324 | # 获取完整响应
325 | full_content, token_info = await self.process_non_stream_response(k2think_payload_copy, headers_copy, output_thinking)
326 |
327 | if not full_content:
328 | yield ResponseConstants.STREAM_DONE_MARKER
329 | return
330 |
331 | # 检测工具调用(如果启用)
332 | toolify_detector = None
333 | if enable_toolify:
334 | toolify = get_toolify()
335 | if toolify:
336 | toolify_detector = StreamingFunctionCallDetector(toolify.trigger_signal)
337 | safe_log_info(logger, "[TOOLIFY] 流式工具调用检测器已初始化")
338 |
339 | # 发送内容(支持工具调用检测)
340 | if toolify_detector:
341 | # 使用工具调用检测器处理内容
342 | async for chunk in self._stream_content_with_tool_detection(
343 | full_content, original_model, toolify_detector, k2think_payload.get("chat_id", "")
344 | ):
345 | yield chunk
346 | else:
347 | # 正常流式发送
348 | async for chunk in self._stream_content(full_content, original_model):
349 | yield chunk
350 |
351 | # 发送结束chunk
352 | end_chunk = self._create_chunk_data(
353 | delta={},
354 | finish_reason=ResponseConstants.FINISH_REASON_STOP,
355 | model=original_model
356 | )
357 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(end_chunk)}\n\n"
358 | yield ResponseConstants.STREAM_DONE_MARKER
359 |
360 | except Exception as e:
361 | safe_log_error(logger, "流式响应处理错误", e)
362 |
363 | # 发送错误信息作为流式响应的一部分,而不是抛出异常
364 | if "401" in str(e) or "unauthorized" in str(e).lower():
365 | # 401错误:显示tokens强制刷新消息
366 | error_message = "🔄 tokens强制刷新已启动,请稍后再试"
367 | safe_log_info(logger, "检测到401错误,向客户端发送强制刷新提示")
368 | else:
369 | # 其他错误:显示一般错误信息
370 | error_message = f"请求处理失败: {str(e)}"
371 |
372 | # 发送错误内容作为正常的流式响应
373 | error_chunk = self._create_chunk_data(
374 | delta={"content": f"\n\n{error_message}"},
375 | finish_reason=None,
376 | model=original_model
377 | )
378 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(error_chunk)}\n\n"
379 |
380 | # 发送结束chunk
381 | end_chunk = self._create_chunk_data(
382 | delta={},
383 | finish_reason=ResponseConstants.FINISH_REASON_ERROR,
384 | model=original_model
385 | )
386 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(end_chunk)}\n\n"
387 | yield ResponseConstants.STREAM_DONE_MARKER
388 |
389 | # 重新抛出异常以便上层处理token失败(在发送友好消息之后)
390 | # 上层会捕获这个异常并调用token_manager.mark_token_failure
391 | raise e
392 |
393 | async def _stream_content(self, content: str, model: str = None) -> AsyncGenerator[str, None]:
394 | """流式发送内容"""
395 | chunk_size = self.calculate_dynamic_chunk_size(len(content))
396 |
397 | for i in range(0, len(content), chunk_size):
398 | chunk_content = content[i:i + chunk_size]
399 |
400 | chunk = self._create_chunk_data(
401 | delta={"content": chunk_content},
402 | finish_reason=None,
403 | model=model
404 | )
405 |
406 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(chunk)}\n\n"
407 | # 添加延迟模拟真实流式效果
408 | await asyncio.sleep(self.config.STREAM_DELAY)
409 |
410 | async def _stream_content_with_tool_detection(
411 | self,
412 | content: str,
413 | model: str,
414 | detector: StreamingFunctionCallDetector,
415 | chat_id: str
416 | ) -> AsyncGenerator[str, None]:
417 | """流式发送内容并检测工具调用"""
418 | chunk_size = self.calculate_dynamic_chunk_size(len(content))
419 |
420 | for i in range(0, len(content), chunk_size):
421 | chunk_content = content[i:i + chunk_size]
422 |
423 | # 使用检测器处理chunk
424 | is_tool_detected, content_to_yield = detector.process_chunk(chunk_content)
425 |
426 | if is_tool_detected:
427 | safe_log_info(logger, "[TOOLIFY] 检测到工具调用触发信号")
428 |
429 | # 输出处理后的内容
430 | if content_to_yield:
431 | chunk = self._create_chunk_data(
432 | delta={"content": content_to_yield},
433 | finish_reason=None,
434 | model=model
435 | )
436 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(chunk)}\n\n"
437 |
438 | await asyncio.sleep(self.config.STREAM_DELAY)
439 |
440 | # 流结束时的最终处理
441 | parsed_tools, remaining_content = detector.finalize()
442 |
443 | # 输出剩余内容
444 | if remaining_content:
445 | safe_log_info(logger, f"[TOOLIFY] 输出缓冲区剩余内容: {len(remaining_content)}字符")
446 | chunk = self._create_chunk_data(
447 | delta={"content": remaining_content},
448 | finish_reason=None,
449 | model=model
450 | )
451 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(chunk)}\n\n"
452 |
453 | # 如果检测到工具调用,输出工具调用结果
454 | if parsed_tools:
455 | safe_log_info(logger, f"[TOOLIFY] 检测到 {len(parsed_tools)} 个工具调用")
456 | from src.toolify_handler import format_toolify_response_for_stream
457 | tool_chunks = format_toolify_response_for_stream(parsed_tools, model, chat_id)
458 | for chunk in tool_chunks:
459 | yield chunk
460 | else:
461 | # 没有工具调用,正常结束
462 | end_chunk = self._create_chunk_data(
463 | delta={},
464 | finish_reason=ResponseConstants.FINISH_REASON_STOP,
465 | model=model
466 | )
467 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(end_chunk)}\n\n"
468 | yield ResponseConstants.STREAM_DONE_MARKER
469 |
470 | def _create_chunk_data(self, delta: dict, finish_reason: Optional[str], model: str = None) -> dict:
471 | """创建流式响应chunk数据"""
472 | return {
473 | "id": f"chatcmpl-{int(time.time() * 1000)}",
474 | "object": ResponseConstants.CHAT_COMPLETION_CHUNK_OBJECT,
475 | "created": int(time.time()),
476 | "model": model or APIConstants.MODEL_ID,
477 | "choices": [{
478 | "index": 0,
479 | "delta": delta,
480 | "finish_reason": finish_reason
481 | }]
482 | }
483 |
484 | def create_completion_response(
485 | self,
486 | content: Optional[str],
487 | token_info: Optional[dict] = None,
488 | model: str = None
489 | ) -> dict:
490 | """创建完整的聊天补全响应"""
491 | message = {
492 | "role": "assistant",
493 | "content": content,
494 | }
495 |
496 | return {
497 | "id": f"chatcmpl-{int(time.time())}",
498 | "object": ResponseConstants.CHAT_COMPLETION_OBJECT,
499 | "created": int(time.time()),
500 | "model": model or APIConstants.MODEL_ID,
501 | "choices": [{
502 | "index": 0,
503 | "message": message,
504 | "finish_reason": ResponseConstants.FINISH_REASON_STOP
505 | }],
506 | "usage": token_info or {
507 | "prompt_tokens": NumericConstants.DEFAULT_PROMPT_TOKENS,
508 | "completion_tokens": NumericConstants.DEFAULT_COMPLETION_TOKENS,
509 | "total_tokens": NumericConstants.DEFAULT_TOTAL_TOKENS
510 | }
511 | }
--------------------------------------------------------------------------------
/src/api_handler.py:
--------------------------------------------------------------------------------
1 | """
2 | API处理模块
3 | 处理主要的API路由逻辑
4 | """
5 | import json
6 | import time
7 | import asyncio
8 | import logging
9 | from typing import Dict, List
10 | from fastapi import HTTPException, Request
11 | from fastapi.responses import StreamingResponse, JSONResponse
12 |
13 | from src.config import Config
14 | from src.constants import (
15 | APIConstants, ResponseConstants, LogMessages,
16 | ErrorMessages, HeaderConstants
17 | )
18 | from src.exceptions import (
19 | AuthenticationError, SerializationError,
20 | K2ThinkProxyError, UpstreamError
21 | )
22 | from src.models import ChatCompletionRequest, ModelsResponse, ModelInfo
23 | from src.response_processor import ResponseProcessor
24 | from src.token_manager import TokenManager
25 | from src.utils import safe_log_error, safe_log_info, safe_log_warning
26 | from src.toolify_handler import should_enable_toolify, prepare_toolify_request
27 |
28 | logger = logging.getLogger(__name__)
29 |
30 | class APIHandler:
31 | """API处理器"""
32 |
33 | def __init__(self, config: Config):
34 | self.config = config
35 | self.response_processor = ResponseProcessor(config)
36 | self.token_manager = config.get_token_manager()
37 |
38 | def validate_api_key(self, authorization: str) -> bool:
39 | """验证API密钥"""
40 | if not authorization or not authorization.startswith(APIConstants.BEARER_PREFIX):
41 | return False
42 | api_key = authorization[APIConstants.BEARER_PREFIX_LENGTH:] # 移除 "Bearer " 前缀
43 | return api_key == self.config.VALID_API_KEY
44 |
45 | def should_output_thinking(self, model_name: str) -> bool:
46 | """根据模型名判断是否应该输出思考内容"""
47 | return model_name != APIConstants.MODEL_ID_NOTHINK
48 |
49 | def get_actual_model_id(self, model_name: str) -> str:
50 | """获取实际的模型ID(将nothink版本映射回原始模型)"""
51 | if model_name == APIConstants.MODEL_ID_NOTHINK:
52 | return APIConstants.MODEL_ID
53 | return model_name
54 |
55 | async def get_models(self) -> ModelsResponse:
56 | """获取模型列表"""
57 | model_info_standard = ModelInfo(
58 | id=APIConstants.MODEL_ID,
59 | created=int(time.time()),
60 | owned_by=APIConstants.MODEL_OWNER,
61 | root=APIConstants.MODEL_ROOT
62 | )
63 | model_info_nothink = ModelInfo(
64 | id=APIConstants.MODEL_ID_NOTHINK,
65 | created=int(time.time()),
66 | owned_by=APIConstants.MODEL_OWNER,
67 | root=APIConstants.MODEL_ROOT
68 | )
69 | return ModelsResponse(data=[model_info_standard, model_info_nothink])
70 |
71 | async def chat_completions(self, request: ChatCompletionRequest, auth_request: Request):
72 | """处理聊天补全请求"""
73 | # 验证API密钥
74 | authorization = auth_request.headers.get(HeaderConstants.AUTHORIZATION, "")
75 | if not self.validate_api_key(authorization):
76 | raise AuthenticationError()
77 |
78 | # 判断是否应该输出思考内容
79 | output_thinking = self.should_output_thinking(request.model)
80 | actual_model_id = self.get_actual_model_id(request.model)
81 |
82 | try:
83 | # 处理消息
84 | raw_messages = self._process_raw_messages(request.messages)
85 |
86 | # 检查是否需要启用工具调用
87 | request_dict = request.model_dump()
88 | enable_toolify = should_enable_toolify(request_dict)
89 |
90 | # 如果启用工具调用,预处理消息并注入提示词
91 | if enable_toolify:
92 | safe_log_info(logger, "[TOOLIFY] 工具调用功能已启用")
93 | raw_messages, _ = prepare_toolify_request(request_dict, raw_messages)
94 |
95 | self._log_request_info(raw_messages)
96 |
97 | # 构建K2Think请求
98 | k2think_payload = self._build_k2think_payload(
99 | request, raw_messages, actual_model_id
100 | )
101 |
102 | # 验证JSON序列化
103 | self._validate_json_serialization(k2think_payload)
104 |
105 | # 处理响应(带重试机制)
106 | if request.stream:
107 | return await self._handle_stream_response_with_retry(
108 | request, k2think_payload, output_thinking, enable_toolify
109 | )
110 | else:
111 | return await self._handle_non_stream_response_with_retry(
112 | request, k2think_payload, output_thinking, enable_toolify
113 | )
114 |
115 | except K2ThinkProxyError:
116 | # 重新抛出自定义异常
117 | raise
118 | except Exception as e:
119 | safe_log_error(logger, "API转发错误", e)
120 | raise HTTPException(
121 | status_code=APIConstants.HTTP_INTERNAL_ERROR,
122 | detail={
123 | "error": {
124 | "message": str(e),
125 | "type": ErrorMessages.API_ERROR
126 | }
127 | }
128 | )
129 |
130 | def _process_raw_messages(self, messages: List) -> List[Dict]:
131 | """处理原始消息"""
132 | raw_messages = []
133 | for msg in messages:
134 | try:
135 | raw_messages.append({
136 | "role": msg.role,
137 | "content": msg.content # 保持原始格式,稍后再转换
138 | })
139 | except Exception as e:
140 | safe_log_error(logger, f"处理消息时出错, 消息: {msg}", e)
141 | # 使用默认值
142 | raw_messages.append({
143 | "role": msg.role,
144 | "content": str(msg.content) if msg.content else ""
145 | })
146 | return raw_messages
147 |
148 | def _log_request_info(self, raw_messages: List[Dict]):
149 | """记录请求信息"""
150 | safe_log_info(logger, LogMessages.MESSAGE_RECEIVED.format(len(raw_messages)))
151 |
152 | # 记录原始消息的角色分布
153 | role_count = {}
154 | for msg in raw_messages:
155 | role = msg.get("role", "unknown")
156 | role_count[role] = role_count.get(role, 0) + 1
157 | safe_log_info(logger, LogMessages.ROLE_DISTRIBUTION.format("原始", role_count))
158 |
159 | def _build_k2think_payload(
160 | self,
161 | request: ChatCompletionRequest,
162 | processed_messages: List[Dict],
163 | actual_model_id: str = None
164 | ) -> Dict:
165 | """构建K2Think请求负载"""
166 | # 构建K2Think格式的请求体 - 支持多模态内容
167 | k2think_messages = []
168 | for msg in processed_messages:
169 | try:
170 | # 使用多模态内容转换函数
171 | content = self.response_processor.content_to_multimodal(msg.get("content", ""))
172 | k2think_messages.append({
173 | "role": msg["role"],
174 | "content": content
175 | })
176 | except Exception as e:
177 | safe_log_error(logger, f"构建K2Think消息时出错, 消息: {msg}", e)
178 | # 使用安全的默认值
179 | fallback_content = str(msg.get("content", ""))
180 | k2think_messages.append({
181 | "role": msg.get("role", "user"),
182 | "content": fallback_content
183 | })
184 |
185 | # 使用实际的模型ID
186 | model_id = actual_model_id or APIConstants.MODEL_ID
187 |
188 | return {
189 | "stream": request.stream,
190 | "model": model_id,
191 | "messages": k2think_messages,
192 | "params": {},
193 | "tool_servers": [],
194 | "features": {
195 | "image_generation": False,
196 | "code_interpreter": False,
197 | "web_search": False
198 | },
199 | "variables": self.response_processor.get_current_datetime_info(),
200 | "model_item": {
201 | "id": model_id,
202 | "object": ResponseConstants.MODEL_OBJECT,
203 | "owned_by": APIConstants.MODEL_OWNER,
204 | "root": APIConstants.MODEL_ROOT,
205 | "parent": None,
206 | "status": "active",
207 | "connection_type": "external",
208 | "name": model_id
209 | },
210 | "background_tasks": {
211 | "title_generation": True,
212 | "tags_generation": True
213 | },
214 | "chat_id": self.response_processor.generate_chat_id(),
215 | "id": self.response_processor.generate_session_id(),
216 | "session_id": self.response_processor.generate_session_id()
217 | }
218 |
219 | def _validate_json_serialization(self, k2think_payload: Dict):
220 | """验证JSON序列化"""
221 | try:
222 | # 测试JSON序列化
223 | json.dumps(k2think_payload, ensure_ascii=False)
224 | safe_log_info(logger, LogMessages.JSON_VALIDATION_SUCCESS)
225 | except Exception as e:
226 | safe_log_error(logger, LogMessages.JSON_VALIDATION_FAILED.format(e))
227 | # 尝试修复序列化问题
228 | try:
229 | k2think_payload = json.loads(json.dumps(k2think_payload, default=str, ensure_ascii=False))
230 | safe_log_info(logger, LogMessages.JSON_FIXED)
231 | except Exception as fix_error:
232 | safe_log_error(logger, "无法修复序列化问题", fix_error)
233 | raise SerializationError()
234 |
235 | def _build_request_headers(self, request: ChatCompletionRequest, k2think_payload: Dict, token: str) -> Dict[str, str]:
236 | """构建请求头"""
237 | return {
238 | HeaderConstants.ACCEPT: (
239 | HeaderConstants.EVENT_STREAM_JSON if request.stream
240 | else HeaderConstants.APPLICATION_JSON
241 | ),
242 | HeaderConstants.CONTENT_TYPE: HeaderConstants.APPLICATION_JSON,
243 | HeaderConstants.AUTHORIZATION: f"{APIConstants.BEARER_PREFIX}{token}",
244 | HeaderConstants.ORIGIN: "https://www.k2think.ai",
245 | HeaderConstants.REFERER: "https://www.k2think.ai/c/" + k2think_payload["chat_id"],
246 | HeaderConstants.USER_AGENT: HeaderConstants.DEFAULT_USER_AGENT
247 | }
248 |
249 | async def _handle_stream_response(
250 | self,
251 | k2think_payload: Dict,
252 | headers: Dict[str, str],
253 | output_thinking: bool = True,
254 | original_model: str = None
255 | ) -> StreamingResponse:
256 | """处理流式响应"""
257 | return StreamingResponse(
258 | self.response_processor.process_stream_response(
259 | k2think_payload, headers, output_thinking, original_model
260 | ),
261 | media_type=HeaderConstants.TEXT_EVENT_STREAM,
262 | headers={
263 | HeaderConstants.CACHE_CONTROL: HeaderConstants.NO_CACHE,
264 | HeaderConstants.CONNECTION: HeaderConstants.KEEP_ALIVE,
265 | HeaderConstants.X_ACCEL_BUFFERING: HeaderConstants.NO_BUFFERING
266 | }
267 | )
268 |
269 | async def _handle_non_stream_response(
270 | self,
271 | k2think_payload: Dict,
272 | headers: Dict[str, str],
273 | output_thinking: bool = True,
274 | original_model: str = None
275 | ) -> JSONResponse:
276 | """处理非流式响应"""
277 | full_content, token_info = await self.response_processor.process_non_stream_response(
278 | k2think_payload, headers, output_thinking
279 | )
280 |
281 | openai_response = self.response_processor.create_completion_response(
282 | full_content, token_info, original_model
283 | )
284 |
285 | return JSONResponse(content=openai_response)
286 |
287 | async def _handle_stream_response_with_retry(
288 | self,
289 | request: ChatCompletionRequest,
290 | k2think_payload: Dict,
291 | output_thinking: bool = True,
292 | enable_toolify: bool = False,
293 | max_retries: int = 3
294 | ) -> StreamingResponse:
295 | """处理流式响应(带重试机制)"""
296 | last_exception = None
297 |
298 | for attempt in range(max_retries):
299 | # 获取下一个可用token
300 | token = self.token_manager.get_next_token()
301 | if not token:
302 | # 根据是否启用自动更新提供不同的错误信息
303 | if Config.ENABLE_TOKEN_AUTO_UPDATE:
304 | error_message = "Token池暂时为空,可能正在自动更新中。请稍后重试或检查自动更新服务状态。"
305 | safe_log_warning(logger, "没有可用的token,可能正在自动更新中")
306 | else:
307 | error_message = "所有token都已失效,请检查token配置或重新加载token文件。"
308 | safe_log_error(logger, "没有可用的token")
309 |
310 | raise HTTPException(
311 | status_code=APIConstants.HTTP_SERVICE_UNAVAILABLE,
312 | detail={
313 | "error": {
314 | "message": error_message,
315 | "type": ErrorMessages.API_ERROR
316 | }
317 | }
318 | )
319 |
320 | # 构建请求头
321 | headers = self._build_request_headers(request, k2think_payload, token)
322 |
323 | try:
324 | safe_log_info(logger, f"尝试流式请求 (第{attempt + 1}次)")
325 |
326 | # 创建流式生成器,内部处理token成功/失败标记
327 | async def stream_generator():
328 | try:
329 | async for chunk in self.response_processor.process_stream_response(
330 | k2think_payload, headers, output_thinking, request.model, enable_toolify
331 | ):
332 | yield chunk
333 | # 流式响应成功完成,标记token成功
334 | self.token_manager.mark_token_success(token)
335 | except Exception as e:
336 | # 流式响应过程中出现错误,标记token失败
337 | safe_log_warning(logger, f"🔍 流式响应异常被捕获,准备标记token失败: {str(e)}")
338 |
339 | # 标记token失败(这会触发自动刷新逻辑)
340 | token_failed = self.token_manager.mark_token_failure(token, str(e))
341 |
342 | # 特别处理401错误
343 | if "401" in str(e) or "unauthorized" in str(e).lower():
344 | safe_log_warning(logger, f"🔒 流式响应中检测到401认证错误,token标记失败: {token_failed}")
345 | safe_log_info(logger, f"🚨 已调用mark_token_failure,应该触发自动刷新")
346 | else:
347 | safe_log_warning(logger, f"流式响应中检测到其他错误: {str(e)}")
348 |
349 | # 注意:不重新抛出异常,避免"response already started"错误
350 | # 错误信息已经通过response_processor发送给客户端
351 |
352 | return StreamingResponse(
353 | stream_generator(),
354 | media_type=HeaderConstants.TEXT_EVENT_STREAM,
355 | headers={
356 | HeaderConstants.CACHE_CONTROL: HeaderConstants.NO_CACHE,
357 | HeaderConstants.CONNECTION: HeaderConstants.KEEP_ALIVE,
358 | HeaderConstants.X_ACCEL_BUFFERING: HeaderConstants.NO_BUFFERING
359 | }
360 | )
361 | except (UpstreamError, Exception) as e:
362 | # 这里只处理流式响应启动前的异常(主要是连接错误)
363 | # 401等上游服务错误现在在流式响应内部处理,不会到达这里
364 | last_exception = e
365 | safe_log_warning(logger, f"流式请求启动失败 (第{attempt + 1}次): {e}")
366 |
367 | # 标记token失败
368 | token_failed = self.token_manager.mark_token_failure(token, str(e))
369 | if token_failed:
370 | safe_log_error(logger, f"Token已被标记为失效")
371 |
372 | # 如果是最后一次尝试,抛出异常
373 | if attempt == max_retries - 1:
374 | break
375 |
376 | # 短暂延迟后重试
377 | await asyncio.sleep(0.5)
378 |
379 | # 所有重试都失败了
380 | safe_log_error(logger, "所有流式请求重试都失败了,最后错误", last_exception)
381 | raise HTTPException(
382 | status_code=APIConstants.HTTP_INTERNAL_ERROR,
383 | detail={
384 | "error": {
385 | "message": f"流式请求失败: {str(last_exception)}",
386 | "type": ErrorMessages.API_ERROR
387 | }
388 | }
389 | )
390 |
391 | async def _handle_non_stream_response_with_retry(
392 | self,
393 | request: ChatCompletionRequest,
394 | k2think_payload: Dict,
395 | output_thinking: bool = True,
396 | enable_toolify: bool = False,
397 | max_retries: int = 3
398 | ) -> JSONResponse:
399 | """处理非流式响应(带重试机制)"""
400 | last_exception = None
401 |
402 | for attempt in range(max_retries):
403 | # 获取下一个可用token
404 | token = self.token_manager.get_next_token()
405 | if not token:
406 | # 根据是否启用自动更新提供不同的错误信息
407 | if Config.ENABLE_TOKEN_AUTO_UPDATE:
408 | error_message = "Token池暂时为空,可能正在自动更新中。请稍后重试或检查自动更新服务状态。"
409 | safe_log_warning(logger, "没有可用的token,可能正在自动更新中")
410 | else:
411 | error_message = "所有token都已失效,请检查token配置或重新加载token文件。"
412 | safe_log_error(logger, "没有可用的token")
413 |
414 | raise HTTPException(
415 | status_code=APIConstants.HTTP_SERVICE_UNAVAILABLE,
416 | detail={
417 | "error": {
418 | "message": error_message,
419 | "type": ErrorMessages.API_ERROR
420 | }
421 | }
422 | )
423 |
424 | # 构建请求头
425 | headers = self._build_request_headers(request, k2think_payload, token)
426 |
427 | try:
428 | safe_log_info(logger, f"尝试非流式请求 (第{attempt + 1}次)")
429 |
430 | # 处理响应
431 | full_content, token_info = await self.response_processor.process_non_stream_response(
432 | k2think_payload, headers, output_thinking
433 | )
434 |
435 | # 标记token成功
436 | self.token_manager.mark_token_success(token)
437 |
438 | # 检查是否有工具调用
439 | tool_response = None
440 | if enable_toolify:
441 | from src.toolify_handler import parse_toolify_response
442 | tool_response = parse_toolify_response(full_content, request.model)
443 |
444 | if tool_response:
445 | # 返回包含tool_calls的响应
446 | openai_response = {
447 | "id": f"chatcmpl-{int(time.time())}",
448 | "object": ResponseConstants.CHAT_COMPLETION_OBJECT,
449 | "created": int(time.time()),
450 | "model": request.model,
451 | "choices": [{
452 | "index": 0,
453 | "message": tool_response,
454 | "finish_reason": "tool_calls"
455 | }],
456 | "usage": token_info or {
457 | "prompt_tokens": 0,
458 | "completion_tokens": 0,
459 | "total_tokens": 0
460 | }
461 | }
462 | else:
463 | openai_response = self.response_processor.create_completion_response(
464 | full_content, token_info, request.model
465 | )
466 |
467 | return JSONResponse(content=openai_response)
468 |
469 | except (UpstreamError, Exception) as e:
470 | last_exception = e
471 |
472 | # 特别处理401错误
473 | if "401" in str(e) or "unauthorized" in str(e).lower():
474 | safe_log_warning(logger, f"🔒 非流式请求遇到401认证错误 (第{attempt + 1}次): {e}")
475 |
476 | # 对于401错误,如果是第一次尝试,返回友好消息而不重试
477 | if attempt == 0:
478 | # 标记token失败以触发自动刷新
479 | self.token_manager.mark_token_failure(token, str(e))
480 |
481 | # 返回友好的刷新提示消息
482 | openai_response = self.response_processor.create_completion_response(
483 | content="🔄 tokens强制刷新已启动,请稍后再试",
484 | token_info={
485 | "prompt_tokens": 0,
486 | "completion_tokens": 10,
487 | "total_tokens": 10
488 | },
489 | model=request.model
490 | )
491 | return JSONResponse(content=openai_response)
492 | else:
493 | safe_log_warning(logger, f"非流式请求失败 (第{attempt + 1}次): {e}")
494 |
495 | # 标记token失败
496 | token_failed = self.token_manager.mark_token_failure(token, str(e))
497 | if token_failed:
498 | safe_log_error(logger, f"Token已被标记为失效")
499 |
500 | # 如果是最后一次尝试,抛出异常
501 | if attempt == max_retries - 1:
502 | break
503 |
504 | # 短暂延迟后重试
505 | await asyncio.sleep(0.5)
506 |
507 | # 所有重试都失败了
508 | safe_log_error(logger, "所有非流式请求重试都失败了,最后错误", last_exception)
509 | raise HTTPException(
510 | status_code=APIConstants.HTTP_INTERNAL_ERROR,
511 | detail={
512 | "error": {
513 | "message": f"非流式请求失败: {str(last_exception)}",
514 | "type": ErrorMessages.API_ERROR
515 | }
516 | }
517 | )
--------------------------------------------------------------------------------