├── src
    ├── __init__.py
    ├── toolify
    │   ├── __init__.py
    │   ├── parser.py
    │   ├── detector.py
    │   ├── prompt.py
    │   └── core.py
    ├── toolify_config.py
    ├── exceptions.py
    ├── models.py
    ├── constants.py
    ├── utils.py
    ├── toolify_handler.py
    ├── config.py
    ├── token_updater.py
    ├── token_manager.py
    ├── response_processor.py
    └── api_handler.py
├── data
    ├── accounts.example.txt
    └── tokens.example.txt
├── requirements.txt
├── .gitignore
├── .dockerignore
├── docker-compose.yml
├── Dockerfile
├── .github
    └── workflows
    │   ├── docker-test.yml
    │   └── docker-build-push.yml
├── .env.example
├── get_tokens.py
├── tests
    └── test_tool_calling.py
├── k2think_proxy.py
└── README.md


/src/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | K2Think API Proxy 源代码包
3 | """


--------------------------------------------------------------------------------
/data/accounts.example.txt:
--------------------------------------------------------------------------------
1 | # {"email": "user1@example.com", "k2_password": "password1"}


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | uvicorn[standard]
3 | httpx
4 | pydantic
5 | python-dotenv
6 | pytz
7 | requests


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | 工具调用参考/
 3 | test/
 4 | utils/
 5 | 
 6 | .vscode/
 7 | 
 8 | .env
 9 | tokens.txt
10 | accounts.txt
11 | tokens.txt.backup
12 | *.pyc
13 | *.log


--------------------------------------------------------------------------------
/src/toolify/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Toolify 插件 - 为 LLM 提供工具调用能力
 3 | 从 Toolify 项目提取的核心功能模块
 4 | """
 5 | 
 6 | from .core import ToolifyCore
 7 | from .parser import parse_function_calls_xml, remove_think_blocks
 8 | from .detector import StreamingFunctionCallDetector
 9 | from .prompt import generate_function_prompt
10 | 
11 | __all__ = [
12 |     'ToolifyCore',
13 |     'parse_function_calls_xml',
14 |     'remove_think_blocks',
15 |     'StreamingFunctionCallDetector',
16 |     'generate_function_prompt',
17 | ]
18 | 
19 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Git相关
 2 | .git
 3 | .gitignore
 4 | 
 5 | # Python相关
 6 | __pycache__/
 7 | *.py[cod]
 8 | *$py.class
 9 | *.so
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # 虚拟环境
28 | env/
29 | venv/
30 | ENV/
31 | env.bak/
32 | venv.bak/
33 | 
34 | # IDE相关
35 | .vscode/
36 | .idea/
37 | *.swp
38 | *.swo
39 | *~
40 | 
41 | # 配置和数据文件（这些通过volume挂载）
42 | .env
43 | .env.local
44 | .env.example
45 | tokens.txt
46 | tokens.example.txt
47 | tokens.txt.backup
48 | 
49 | # 日志文件
50 | *.log
51 | logs/
52 | 
53 | # 测试相关
54 | .pytest_cache/
55 | .coverage
56 | htmlcov/
57 | test/
58 | 
59 | # Docker相关
60 | Dockerfile*
61 | docker-compose*.yml
62 | .dockerignore
63 | 
64 | # 文档
65 | README.md
66 | *.md
67 | 
68 | # 其他
69 | .DS_Store
70 | Thumbs.db


--------------------------------------------------------------------------------
/data/tokens.example.txt:
--------------------------------------------------------------------------------
 1 | # K2Think Token文件示例
 2 | # 每行一个token，以下为示例格式（请替换为实际的token）
 3 | 
 4 | # 注意事项：
 5 | # 1. 每行只能有一个token
 6 | # 2. 空行和以#开头的注释行会被忽略
 7 | # 3. Token失效时会自动标记，可通过API管理界面重置
 8 | # 4. 建议至少配置2-3个token以确保高可用性
 9 | 
10 | # 以下放置几个可用token，随时可能失效，请自行替换自己的token
11 | eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IjlhMGE1ZDY5LWQ0ZDgtNGFiMC1hYjhjLTQ5ODNmY2NhZDM4NyIsImV4cCI6MTc1ODIwMjg4NX0.mTDsIrtO0iVTE5hhLcX1bTgmJHMydsHQqGKUsucEg_0
12 | eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6ImY2NmU3MWUwLWUyYTUtNGIwMi04MGY1LWE0Y2RiYjJjZTM1OSIsImV4cCI6MTc1ODIwMjg4Nn0.zcNU3ylq5YXFSFidgzQOXwoicqAefUnf9x1HtKFpY2I
13 | eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IjMxZWZmMWU3LTQwNTUtNDg3ZS04MzA1LWFiMDU5MTE1OTc0OSIsImV4cCI6MTc1ODIwMjkxM30.3lgvrmPo6esDsfAbVlkl37vRsN3EKYs6BXq45bvu9-E
14 | eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6ImIwZDY2YTZmLTAwN2MtNDNkNS1hZWExLWRkMzM2NjM1ZmUyNCIsImV4cCI6MTc1ODIwMjk0NX0.urcVmh_lBivvE6tNnCmVeDW5vW52GXoYFoqFh196T1I


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   k2think-api:
 5 |     image: julienol/k2think2api:latest
 6 |     container_name: k2think-api
 7 |     ports:
 8 |       - "${HOST_PORT:-8001}:8001"
 9 |     volumes:
10 |       # 使用目录挂载而非文件挂载，避免文件锁定问题
11 |       - ./data:/app/data
12 |       # 或者使用命名卷（推荐用于生产环境）
13 |       # - k2think_data:/app/data
14 |     # 直接以root用户运行，简化权限管理
15 |     env_file:
16 |       - .env
17 |     environment:
18 |       - PYTHONUNBUFFERED=1
19 |       - PYTHONIOENCODING=utf-8
20 |       - PYTHONLEGACYWINDOWSSTDIO=0
21 |       - LC_ALL=C.UTF-8
22 |       - LANG=C.UTF-8
23 |       # 更新配置文件路径指向data目录
24 |       - TOKENS_FILE=/app/data/tokens.txt
25 |       - ACCOUNTS_FILE=/app/data/accounts.txt
26 |     restart: unless-stopped
27 |     # 健康检查
28 |     healthcheck:
29 |       test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
30 |       interval: 30s
31 |       timeout: 10s
32 |       retries: 3
33 |       start_period: 10s
34 | 
35 | # 可选：使用命名卷（推荐用于生产环境）
36 | # volumes:
37 | #   k2think_data:
38 | #     driver: local


--------------------------------------------------------------------------------
/src/toolify_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Toolify 配置模块
 3 | 管理工具调用功能的配置和实例
 4 | """
 5 | 
 6 | import logging
 7 | from typing import Optional
 8 | from src.toolify import ToolifyCore
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | # 全局 Toolify 实例
13 | _toolify_instance: Optional[ToolifyCore] = None
14 | 
15 | 
16 | def get_toolify() -> Optional[ToolifyCore]:
17 |     """
18 |     获取 Toolify 实例（单例模式）
19 |     
20 |     Returns:
21 |         ToolifyCore实例，如果功能未启用则返回None
22 |     """
23 |     global _toolify_instance
24 |     
25 |     # 延迟导入配置以避免循环依赖
26 |     from src.config import Config
27 |     
28 |     if not Config.ENABLE_TOOLIFY:
29 |         logger.debug("[TOOLIFY] 工具调用功能已禁用")
30 |         return None
31 |     
32 |     if _toolify_instance is None:
33 |         _toolify_instance = ToolifyCore(enable_function_calling=True)
34 |         logger.info("[TOOLIFY] 工具调用功能已启用并初始化")
35 |     
36 |     return _toolify_instance
37 | 
38 | 
39 | def is_toolify_enabled() -> bool:
40 |     """检查 Toolify 功能是否启用"""
41 |     from src.config import Config
42 |     return Config.ENABLE_TOOLIFY
43 | 
44 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.12-slim
 2 | 
 3 | # 安装curl用于健康检查
 4 | RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
 5 | 
 6 | # 设置环境变量 - 强化编码支持
 7 | ENV PYTHONUNBUFFERED=1
 8 | ENV PYTHONIOENCODING=utf-8
 9 | ENV PYTHONLEGACYWINDOWSSTDIO=0
10 | ENV LC_ALL=C.UTF-8
11 | ENV LANG=C.UTF-8
12 | 
13 | # 设置工作目录
14 | WORKDIR /app
15 | 
16 | # 复制依赖文件并安装
17 | COPY requirements.txt .
18 | RUN pip install --no-cache-dir -r requirements.txt
19 | 
20 | # 复制应用代码
21 | COPY k2think_proxy.py .
22 | COPY get_tokens.py .
23 | COPY src/ ./src/
24 | 
25 | # 创建数据目录和默认文件
26 | RUN mkdir -p /app/data && \
27 |     touch /app/data/tokens.txt && \
28 |     echo "# Token文件将由自动更新服务生成" > /app/data/tokens.txt && \
29 |     touch /app/data/accounts.txt && \
30 |     echo "# 请通过volume挂载实际的accounts.txt文件" > /app/data/accounts.txt
31 | 
32 | # 创建简单的启动脚本
33 | RUN echo '#!/bin/bash\n\
34 | # 确保数据目录存在\n\
35 | mkdir -p /app/data\n\
36 | # 直接运行应用\n\
37 | exec "$@"' > /app/entrypoint.sh && \
38 |     chmod +x /app/entrypoint.sh
39 | 
40 | # 暴露端口
41 | EXPOSE 8001
42 | 
43 | # 健康检查
44 | HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
45 |   CMD curl -f http://localhost:8001/health || exit 1
46 | 
47 | # 设置entrypoint和默认命令
48 | ENTRYPOINT ["/app/entrypoint.sh"]
49 | CMD ["python", "k2think_proxy.py"]


--------------------------------------------------------------------------------
/.github/workflows/docker-test.yml:
--------------------------------------------------------------------------------
 1 | name: Test Docker Build
 2 | 
 3 | on:
 4 |   # 对PR进行测试构建，但不推送
 5 |   pull_request:
 6 |     branches:
 7 |       - main
 8 |       - master
 9 |     paths-ignore:
10 |       - 'README.md'
11 |       - '*.md'
12 |       - '.gitignore'
13 |       - 'LICENSE'
14 | 
15 | jobs:
16 |   test-build:
17 |     runs-on: ubuntu-latest
18 |     
19 |     steps:
20 |     - name: Checkout repository
21 |       uses: actions/checkout@v4
22 |     
23 |     - name: Set up Docker Buildx
24 |       uses: docker/setup-buildx-action@v3
25 |     
26 |     - name: Test Docker build
27 |       uses: docker/build-push-action@v5
28 |       with:
29 |         context: .
30 |         file: ./Dockerfile
31 |         push: false
32 |         tags: test:latest
33 |         cache-from: type=gha
34 |         cache-to: type=gha,mode=max
35 |     
36 |     - name: Test container startup
37 |       run: |
38 |         # 创建测试环境变量文件
39 |         cat > .env.test << EOF
40 |         VALID_API_KEY=test-key
41 |         K2THINK_TOKEN=test-token
42 |         TOOL_SUPPORT=true
43 |         DEBUG_LOGGING=true
44 |         HOST=0.0.0.0
45 |         PORT=8001
46 |         EOF
47 |         
48 |         # 启动容器
49 |         docker run -d --name test-container -p 8001:8001 --env-file .env.test test:latest
50 |         
51 |         # 等待容器启动
52 |         sleep 10
53 |         
54 |         # 测试健康检查
55 |         curl -f http://localhost:8001/health || exit 1
56 |         
57 |         # 测试模型接口
58 |         curl -f http://localhost:8001/v1/models || exit 1
59 |         
60 |         # 停止容器
61 |         docker stop test-container
62 |         docker rm test-container
63 |         
64 |         echo "✅ Docker container test passed!"
65 | 


--------------------------------------------------------------------------------
/src/exceptions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 自定义异常类模块
 3 | 统一管理所有自定义异常
 4 | """
 5 | 
 6 | class K2ThinkProxyError(Exception):
 7 |     """K2Think代理服务基础异常类"""
 8 |     def __init__(self, message: str, error_type: str = "api_error", status_code: int = 500):
 9 |         self.message = message
10 |         self.error_type = error_type
11 |         self.status_code = status_code
12 |         super().__init__(self.message)
13 | 
14 | class ConfigurationError(K2ThinkProxyError):
15 |     """配置错误异常"""
16 |     def __init__(self, message: str):
17 |         super().__init__(message, "configuration_error", 500)
18 | 
19 | class AuthenticationError(K2ThinkProxyError):
20 |     """认证错误异常"""
21 |     def __init__(self, message: str = "Invalid API key provided"):
22 |         super().__init__(message, "authentication_error", 401)
23 | 
24 | class UpstreamError(K2ThinkProxyError):
25 |     """上游服务错误异常"""
26 |     def __init__(self, message: str, status_code: int = 502):
27 |         super().__init__(message, "upstream_error", status_code)
28 | 
29 | class TimeoutError(K2ThinkProxyError):
30 |     """超时错误异常"""
31 |     def __init__(self, message: str = "请求超时"):
32 |         super().__init__(message, "timeout_error", 504)
33 | 
34 | class SerializationError(K2ThinkProxyError):
35 |     """序列化错误异常"""
36 |     def __init__(self, message: str = "请求数据序列化失败"):
37 |         super().__init__(message, "serialization_error", 400)
38 | 
39 | class ToolProcessingError(K2ThinkProxyError):
40 |     """工具处理错误异常"""
41 |     def __init__(self, message: str):
42 |         super().__init__(message, "tool_processing_error", 400)
43 | 
44 | class ContentProcessingError(K2ThinkProxyError):
45 |     """内容处理错误异常"""
46 |     def __init__(self, message: str):
47 |         super().__init__(message, "content_processing_error", 400)


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | # K2Think API 代理服务环境变量配置文件
 2 | # 复制此文件为 .env 并根据实际情况修改配置值
 3 | # 思考内容输出控制: MBZUAI-IFM/K2-Think(显示思考) / MBZUAI-IFM/K2-Think-nothink(不显示思考)
 4 | 
 5 | # API认证配置
 6 | VALID_API_KEY=sk-k2think # 客户端访问API时使用的密钥
 7 | # K2THINK_TOKEN=your_k2think_jwt_token_here # 从K2Think官网获取的JWT Token
 8 | 
 9 | # 服务器配置
10 | HOST=0.0.0.0 # 监听地址，0.0.0.0为所有接口，127.0.0.1为仅本地
11 | PORT=8001 # 服务监听端口
12 | 
13 | # 上游API配置
14 | K2THINK_API_URL=https://www.k2think.ai/api/chat/completions # K2Think API完整URL
15 | 
16 | # 工具调用配置
17 | ENABLE_TOOLIFY=true
18 | 
19 | # Token管理配置
20 | # Token文件路径（每行一个token）
21 | TOKENS_FILE=data/tokens.txt
22 | 
23 | # Token最大失败次数（超过后将被标记为失效）
24 | MAX_TOKEN_FAILURES=3
25 | 
26 | # Token自动更新配置
27 | ENABLE_TOKEN_AUTO_UPDATE=false # 是否启用token自动更新
28 | TOKEN_UPDATE_INTERVAL=3600 # token更新间隔（秒），默认1小时
29 | ACCOUNTS_FILE=data/accounts.txt # 账户文件路径
30 | GET_TOKENS_SCRIPT=get_tokens.py # token获取脚本路径
31 | 
32 | # 代理配置（用于get_tokens.py）
33 | # PROXY_URL=http://username:password@proxy_host:proxy_port # HTTP/HTTPS代理地址，留空则不使用代理
34 | PROXY_URL="" # 示例: http://admin:sk-123456@192.168.10.100:8282
35 | 
36 | # 调试配置
37 | LOG_LEVEL=INFO # 调试日志级别: DEBUG/INFO/WARNING/ERROR
38 | DEBUG_LOGGING=false # 是否启用详细请求日志
39 | 
40 | # 高级配置
41 | REQUEST_TIMEOUT=60 # HTTP请求超时时间(秒)
42 | MAX_KEEPALIVE_CONNECTIONS=20 # 最大保持连接数
43 | MAX_CONNECTIONS=100 # 最大连接数
44 | 
45 | # 性能配置
46 | STREAM_DELAY=0.05 # 流式响应模拟延迟(秒)
47 | STREAM_CHUNK_SIZE=50 # 流式响应块大小(字符数)
48 | MAX_STREAM_TIME=6 # 流式响应块最大用时(秒)
49 | 
50 | # 部署配置
51 | APP_ENV=development # 应用环境: development/production/testing
52 | ENABLE_ACCESS_LOG=true # 是否启用访问日志
53 | CORS_ORIGINS=* # CORS允许的源
54 | 
55 | # 使用说明:
56 | # 1. 必须配置: VALID_API_KEY, TOKENS_FILE (tokens.txt文件，每行一个token)
57 | # 2. 推荐配置: HOST, PORT
58 | # 3. 可选配置: TOOL_SUPPORT, DEBUG_LOGGING
59 | # 4. Token自动更新: 设置ENABLE_TOKEN_AUTO_UPDATE=true并提供accounts.txt文件
60 | 


--------------------------------------------------------------------------------
/.github/workflows/docker-build-push.yml:
--------------------------------------------------------------------------------
 1 | name: Build and Push Docker Image
 2 | 
 3 | on:
 4 |   # 自动触发：当推送到main分支时
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |       - master
 9 |     # 忽略README等文档文件的更改
10 |     paths-ignore:
11 |       - 'README.md'
12 |       - '*.md'
13 |       - '.gitignore'
14 |       - 'LICENSE'
15 |   
16 |   # 手动触发
17 |   workflow_dispatch:
18 |     inputs:
19 |       tag:
20 |         description: 'Docker image tag (default: latest)'
21 |         required: false
22 |         default: 'latest'
23 | 
24 | env:
25 |   REGISTRY: docker.io
26 |   IMAGE_NAME: julienol/k2think2api
27 | 
28 | jobs:
29 |   build-and-push:
30 |     runs-on: ubuntu-latest
31 |     
32 |     steps:
33 |     - name: Checkout repository
34 |       uses: actions/checkout@v4
35 |     
36 |     - name: Set up Docker Buildx
37 |       uses: docker/setup-buildx-action@v3
38 |     
39 |     - name: Log in to Docker Hub
40 |       uses: docker/login-action@v3
41 |       with:
42 |         username: ${{ secrets.DOCKER_USERNAME }}
43 |         password: ${{ secrets.DOCKER_PASSWORD }}
44 |     
45 |     - name: Extract metadata
46 |       id: meta
47 |       uses: docker/metadata-action@v5
48 |       with:
49 |         images: ${{ env.IMAGE_NAME }}
50 |         tags: |
51 |           type=ref,event=branch
52 |           type=ref,event=pr
53 |           type=sha,prefix={{branch}}-
54 |           type=raw,value=latest,enable={{is_default_branch}}
55 |           type=raw,value=${{ github.event.inputs.tag }},enable=${{ github.event_name == 'workflow_dispatch' }}
56 |     
57 |     - name: Build and push Docker image
58 |       uses: docker/build-push-action@v5
59 |       with:
60 |         context: .
61 |         file: ./Dockerfile
62 |         push: true
63 |         tags: ${{ steps.meta.outputs.tags }}
64 |         labels: ${{ steps.meta.outputs.labels }}
65 |         platforms: linux/amd64,linux/arm64
66 |         cache-from: type=gha
67 |         cache-to: type=gha,mode=max
68 |     
69 |     - name: Update Docker Hub description
70 |       uses: peter-evans/dockerhub-description@v4
71 |       with:
72 |         username: ${{ secrets.DOCKER_USERNAME }}
73 |         password: ${{ secrets.DOCKER_PASSWORD }}
74 |         repository: ${{ env.IMAGE_NAME }}
75 |         readme-filepath: ./README.md


--------------------------------------------------------------------------------
/src/models.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 数据模型定义
 3 | 定义所有API请求和响应的数据模型
 4 | """
 5 | from pydantic import BaseModel
 6 | from typing import List, Dict, Optional, Union, Any
 7 | 
 8 | class ImageUrl(BaseModel):
 9 |     """Image URL model for vision content"""
10 |     url: str
11 |     detail: Optional[str] = "auto"
12 | 
13 | class ContentPart(BaseModel):
14 |     """Content part model for OpenAI's new content format"""
15 |     type: str
16 |     text: Optional[str] = None
17 |     image_url: Optional[ImageUrl] = None
18 | 
19 | class Message(BaseModel):
20 |     role: str
21 |     content: Optional[Union[str, List[ContentPart]]] = None
22 |     tool_call_id: Optional[str] = None  # 用于tool消息
23 |     tool_calls: Optional[List[Dict[str, Any]]] = None  # 用于assistant消息
24 | 
25 | class FunctionParameters(BaseModel):
26 |     """Function parameters schema"""
27 |     type: str = "object"
28 |     properties: Dict[str, Any] = {}
29 |     required: Optional[List[str]] = None
30 |     
31 | class FunctionDefinition(BaseModel):
32 |     """Function definition"""
33 |     name: str
34 |     description: Optional[str] = None
35 |     parameters: Optional[FunctionParameters] = None
36 | 
37 | class ToolDefinition(BaseModel):
38 |     """Tool definition"""
39 |     type: str = "function"
40 |     function: FunctionDefinition
41 | 
42 | class ToolChoice(BaseModel):
43 |     """Tool choice specification"""
44 |     type: str = "function"
45 |     function: Dict[str, str]  # {"name": "tool_name"}
46 | 
47 | class ChatCompletionRequest(BaseModel):
48 |     model: str = "MBZUAI-IFM/K2-Think"
49 |     messages: List[Message]
50 |     stream: bool = False
51 |     temperature: float = 0.7
52 |     max_tokens: Optional[int] = None
53 |     top_p: Optional[float] = None
54 |     frequency_penalty: Optional[float] = None
55 |     presence_penalty: Optional[float] = None
56 |     stop: Optional[Union[str, List[str]]] = None
57 |     # 工具调用相关字段
58 |     tools: Optional[List[ToolDefinition]] = None
59 |     tool_choice: Optional[Union[str, ToolChoice]] = None  # "auto", "none", 或指定工具
60 | 
61 | class ModelInfo(BaseModel):
62 |     id: str
63 |     object: str = "model"
64 |     created: int
65 |     owned_by: str
66 |     permission: List[Dict] = []
67 |     root: str
68 |     parent: Optional[str] = None
69 | 
70 | class ModelsResponse(BaseModel):
71 |     object: str = "list"
72 |     data: List[ModelInfo]


--------------------------------------------------------------------------------
/src/constants.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 常量定义模块
  3 | 统一管理所有魔法数字和硬编码字符串
  4 | """
  5 | 
  6 | # API相关常量
  7 | class APIConstants:
  8 |     MODEL_ID = "MBZUAI-IFM/K2-Think"
  9 |     MODEL_ID_NOTHINK = "MBZUAI-IFM/K2-Think-nothink"
 10 |     MODEL_OWNER = "MBZUAI"
 11 |     MODEL_ROOT = "mbzuai-k2-think-2508"
 12 |     
 13 |     # HTTP状态码
 14 |     HTTP_OK = 200
 15 |     HTTP_UNAUTHORIZED = 401
 16 |     HTTP_NOT_FOUND = 404
 17 |     HTTP_INTERNAL_ERROR = 500
 18 |     HTTP_GATEWAY_TIMEOUT = 504
 19 |     
 20 |     # 认证相关
 21 |     BEARER_PREFIX = "Bearer "
 22 |     BEARER_PREFIX_LENGTH = 7
 23 | 
 24 | # 响应相关常量
 25 | class ResponseConstants:
 26 |     CHAT_COMPLETION_OBJECT = "chat.completion"
 27 |     CHAT_COMPLETION_CHUNK_OBJECT = "chat.completion.chunk"
 28 |     MODEL_OBJECT = "model"
 29 |     LIST_OBJECT = "list"
 30 |     
 31 |     # 完成原因
 32 |     FINISH_REASON_STOP = "stop"
 33 |     FINISH_REASON_ERROR = "error"
 34 |     
 35 |     # 流式响应标记
 36 |     STREAM_DONE_MARKER = "data: [DONE]\n\n"
 37 |     STREAM_DATA_PREFIX = "data: "
 38 | 
 39 | # 内容处理相关常量
 40 | class ContentConstants:
 41 |     # XML标签
 42 |     THINK_START_TAG = "<think>"
 43 |     THINK_END_TAG = "</think>"
 44 |     ANSWER_START_TAG = "<answer>"
 45 |     ANSWER_END_TAG = "</answer>"
 46 |     
 47 |     # 内容类型
 48 |     TEXT_TYPE = "text"
 49 |     IMAGE_URL_TYPE = "image_url"
 50 |     
 51 |     # 图像占位符
 52 |     IMAGE_PLACEHOLDER = "[图像内容]"
 53 |     
 54 |     # 默认值
 55 |     DEFAULT_USER_NAME = "User"
 56 |     DEFAULT_USER_LOCATION = "Unknown"
 57 |     DEFAULT_USER_LANGUAGE = "en-US"
 58 |     DEFAULT_TIMEZONE = "Asia/Shanghai"
 59 | 
 60 | # 错误消息常量
 61 | class ErrorMessages:
 62 |     INVALID_API_KEY = "Invalid API key provided"
 63 |     AUTHENTICATION_ERROR = "authentication_error"
 64 |     UPSTREAM_ERROR = "upstream_error"
 65 |     TIMEOUT_ERROR = "timeout_error"
 66 |     API_ERROR = "api_error"
 67 |     
 68 |     # 中文错误消息
 69 |     REQUEST_TIMEOUT = "请求超时"
 70 |     SERIALIZATION_FAILED = "请求数据序列化失败"
 71 |     UPSTREAM_SERVICE_ERROR = "上游服务错误"
 72 | 
 73 | # 日志消息常量
 74 | class LogMessages:
 75 |     MESSAGE_RECEIVED = "📥 接收到的原始消息数: {}"
 76 |     ROLE_DISTRIBUTION = "📊 {}消息角色分布: {}"
 77 |     JSON_VALIDATION_SUCCESS = "✅ K2Think请求体JSON序列化验证通过"
 78 |     JSON_VALIDATION_FAILED = "❌ K2Think请求体JSON序列化失败: {}"
 79 |     JSON_FIXED = "🔧 使用default=str修复了序列化问题"
 80 |     
 81 |     # 动态chunk计算日志
 82 |     DYNAMIC_CHUNK_CALC = "动态chunk_size计算: 内容长度={}, 计算值={}, 最终值={}"
 83 | 
 84 | # HTTP头常量
 85 | class HeaderConstants:
 86 |     AUTHORIZATION = "Authorization"
 87 |     CONTENT_TYPE = "Content-Type"
 88 |     ACCEPT = "Accept"
 89 |     ORIGIN = "Origin"
 90 |     REFERER = "Referer"
 91 |     USER_AGENT = "User-Agent"
 92 |     CACHE_CONTROL = "Cache-Control"
 93 |     CONNECTION = "Connection"
 94 |     X_ACCEL_BUFFERING = "X-Accel-Buffering"
 95 |     
 96 |     # 值
 97 |     APPLICATION_JSON = "application/json"
 98 |     TEXT_EVENT_STREAM = "text/event-stream"
 99 |     EVENT_STREAM_JSON = "text/event-stream,application/json"
100 |     NO_CACHE = "no-cache"
101 |     KEEP_ALIVE = "keep-alive"
102 |     NO_BUFFERING = "no"
103 |     
104 |     # User-Agent值
105 |     DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0"
106 | 
107 | # 时间相关常量
108 | class TimeConstants:
109 |     # 时间格式
110 |     DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
111 |     DATE_FORMAT = "%Y-%m-%d"
112 |     TIME_FORMAT = "%H:%M:%S"
113 |     WEEKDAY_FORMAT = "%A"
114 |     
115 |     # 微秒转换
116 |     MICROSECONDS_MULTIPLIER = 1000000
117 | 
118 | # 数值常量
119 | class NumericConstants:
120 |     # chunk大小限制
121 |     MIN_CHUNK_SIZE = 50
122 |     
123 |     # 内容预览长度
124 |     CONTENT_PREVIEW_LENGTH = 200
125 |     CONTENT_PREVIEW_SUFFIX = "..."
126 |     
127 |     # 默认token使用量
128 |     DEFAULT_PROMPT_TOKENS = 0
129 |     DEFAULT_COMPLETION_TOKENS = 0
130 |     DEFAULT_TOTAL_TOKENS = 0


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 工具函数模块
  3 | 提供通用的工具函数
  4 | """
  5 | import logging
  6 | import sys
  7 | 
  8 | def safe_log_error(logger: logging.Logger, message: str, exception: Exception = None):
  9 |     """
 10 |     安全地记录错误日志，避免编码问题
 11 |     
 12 |     Args:
 13 |         logger: 日志记录器
 14 |         message: 错误消息
 15 |         exception: 异常对象（可选）
 16 |     """
 17 |     try:
 18 |         # 确保消息是字符串类型
 19 |         if not isinstance(message, str):
 20 |             message = str(message)
 21 |         
 22 |         if exception:
 23 |             # 安全地处理异常信息，避免编码问题
 24 |             try:
 25 |                 error_msg = str(exception)
 26 |                 # 处理可能的编码问题
 27 |                 if isinstance(error_msg, bytes):
 28 |                     error_msg = error_msg.decode('utf-8', errors='replace')
 29 |                 else:
 30 |                     error_msg = error_msg.encode('utf-8', errors='replace').decode('utf-8')
 31 |             except Exception:
 32 |                 error_msg = repr(exception)
 33 |             
 34 |             full_message = f"{message}: {error_msg}"
 35 |         else:
 36 |             full_message = message
 37 |         
 38 |         # 确保消息本身也是安全的
 39 |         try:
 40 |             if isinstance(full_message, bytes):
 41 |                 safe_message = full_message.decode('utf-8', errors='replace')
 42 |             else:
 43 |                 safe_message = full_message.encode('utf-8', errors='replace').decode('utf-8')
 44 |         except Exception:
 45 |             safe_message = repr(full_message)
 46 |         
 47 |         logger.error(safe_message)
 48 |         
 49 |     except Exception as e:
 50 |         # 如果连安全日志都失败了，使用最基本的方式记录
 51 |         try:
 52 |             fallback_msg = f"Logging error: {repr(e)}, Original: {repr(message)}"
 53 |             logger.error(fallback_msg)
 54 |         except Exception:
 55 |             # 最后的保险措施 - 直接打印到控制台
 56 |             try:
 57 |                 print(f"CRITICAL LOGGING FAILURE: {repr(message)}", file=sys.stderr)
 58 |             except Exception:
 59 |                 pass  # 如果连print都失败了，就放弃
 60 | 
 61 | def safe_log_info(logger: logging.Logger, message: str):
 62 |     """
 63 |     安全地记录信息日志，避免编码问题
 64 |     
 65 |     Args:
 66 |         logger: 日志记录器
 67 |         message: 信息消息
 68 |     """
 69 |     try:
 70 |         # 确保消息是字符串类型
 71 |         if not isinstance(message, str):
 72 |             message = str(message)
 73 |         
 74 |         # 确保消息是安全的
 75 |         try:
 76 |             if isinstance(message, bytes):
 77 |                 safe_message = message.decode('utf-8', errors='replace')
 78 |             else:
 79 |                 safe_message = message.encode('utf-8', errors='replace').decode('utf-8')
 80 |         except Exception:
 81 |             safe_message = repr(message)
 82 |         
 83 |         logger.info(safe_message)
 84 |         
 85 |     except Exception as e:
 86 |         try:
 87 |             fallback_msg = f"Logging info error: {repr(e)}, Original: {repr(message)}"
 88 |             logger.info(fallback_msg)
 89 |         except Exception:
 90 |             try:
 91 |                 print(f"CRITICAL INFO LOGGING FAILURE: {repr(message)}", file=sys.stderr)
 92 |             except Exception:
 93 |                 pass
 94 | 
 95 | def safe_log_warning(logger: logging.Logger, message: str):
 96 |     """
 97 |     安全地记录警告日志，避免编码问题
 98 |     
 99 |     Args:
100 |         logger: 日志记录器
101 |         message: 警告消息
102 |     """
103 |     try:
104 |         # 确保消息是字符串类型
105 |         if not isinstance(message, str):
106 |             message = str(message)
107 |         
108 |         # 确保消息是安全的
109 |         try:
110 |             if isinstance(message, bytes):
111 |                 safe_message = message.decode('utf-8', errors='replace')
112 |             else:
113 |                 safe_message = message.encode('utf-8', errors='replace').decode('utf-8')
114 |         except Exception:
115 |             safe_message = repr(message)
116 |         
117 |         logger.warning(safe_message)
118 |         
119 |     except Exception as e:
120 |         try:
121 |             fallback_msg = f"Logging warning error: {repr(e)}, Original: {repr(message)}"
122 |             logger.warning(fallback_msg)
123 |         except Exception:
124 |             try:
125 |                 print(f"CRITICAL WARNING LOGGING FAILURE: {repr(message)}", file=sys.stderr)
126 |             except Exception:
127 |                 pass
128 | 
129 | def safe_str(obj) -> str:
130 |     """
131 |     安全地将对象转换为字符串，避免编码问题
132 |     
133 |     Args:
134 |         obj: 要转换的对象
135 |         
136 |     Returns:
137 |         str: 安全的字符串表示
138 |     """
139 |     try:
140 |         if isinstance(obj, bytes):
141 |             return obj.decode('utf-8', errors='replace')
142 |         elif isinstance(obj, str):
143 |             return obj.encode('utf-8', errors='replace').decode('utf-8')
144 |         else:
145 |             return str(obj).encode('utf-8', errors='replace').decode('utf-8')
146 |     except Exception:
147 |         return repr(obj)


--------------------------------------------------------------------------------
/src/toolify/parser.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Toolify XML 解析器
  3 | 解析模型响应中的工具调用XML格式
  4 | """
  5 | 
  6 | import re
  7 | import json
  8 | import logging
  9 | from typing import List, Dict, Any, Optional
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | def remove_think_blocks(text: str) -> str:
 15 |     """
 16 |     临时移除所有 <think>...</think> 块用于XML解析
 17 |     支持嵌套think标签
 18 |     注意：此函数仅用于临时解析，不影响返回给用户的原始内容
 19 |     """
 20 |     while '<think>' in text and '</think>' in text:
 21 |         start_pos = text.find('<think>')
 22 |         if start_pos == -1:
 23 |             break
 24 |         
 25 |         pos = start_pos + 7
 26 |         depth = 1
 27 |         
 28 |         while pos < len(text) and depth > 0:
 29 |             if text[pos:pos+7] == '<think>':
 30 |                 depth += 1
 31 |                 pos += 7
 32 |             elif text[pos:pos+8] == '</think>':
 33 |                 depth -= 1
 34 |                 pos += 8
 35 |             else:
 36 |                 pos += 1
 37 |         
 38 |         if depth == 0:
 39 |             text = text[:start_pos] + text[pos:]
 40 |         else:
 41 |             break
 42 |     
 43 |     return text
 44 | 
 45 | 
 46 | def parse_function_calls_xml(xml_string: str, trigger_signal: str) -> Optional[List[Dict[str, Any]]]:
 47 |     """
 48 |     增强型XML解析函数，支持动态触发信号
 49 |     
 50 |     1. 保留 <think>...</think> 块（它们应正常返回给用户）
 51 |     2. 解析时临时移除think块，防止干扰XML解析
 52 |     3. 查找触发信号的最后一次出现
 53 |     4. 从最后一个触发信号开始解析function_calls
 54 |     
 55 |     Args:
 56 |         xml_string: 包含XML的响应字符串
 57 |         trigger_signal: 触发信号字符串
 58 |         
 59 |     Returns:
 60 |         解析出的工具调用列表，格式为 [{"name": "tool_name", "args": {...}}, ...]
 61 |         如果没有找到工具调用，返回None
 62 |     """
 63 |     logger.debug(f"[TOOLIFY] 开始解析XML，输入长度: {len(xml_string) if xml_string else 0}")
 64 |     logger.debug(f"[TOOLIFY] 使用触发信号: {trigger_signal[:20]}...")
 65 |     
 66 |     if not xml_string or trigger_signal not in xml_string:
 67 |         logger.debug(f"[TOOLIFY] 输入为空或不包含触发信号")
 68 |         return None
 69 |     
 70 |     # 临时移除think块用于解析
 71 |     cleaned_content = remove_think_blocks(xml_string)
 72 |     logger.debug(f"[TOOLIFY] 移除think块后内容长度: {len(cleaned_content)}")
 73 |     
 74 |     # 查找所有触发信号位置
 75 |     signal_positions = []
 76 |     start_pos = 0
 77 |     while True:
 78 |         pos = cleaned_content.find(trigger_signal, start_pos)
 79 |         if pos == -1:
 80 |             break
 81 |         signal_positions.append(pos)
 82 |         start_pos = pos + 1
 83 |     
 84 |     if not signal_positions:
 85 |         logger.debug(f"[TOOLIFY] 在清理后的内容中未找到触发信号")
 86 |         return None
 87 |     
 88 |     logger.debug(f"[TOOLIFY] 找到 {len(signal_positions)} 个触发信号位置: {signal_positions}")
 89 |     
 90 |     # 使用最后一个触发信号位置
 91 |     last_signal_pos = signal_positions[-1]
 92 |     content_after_signal = cleaned_content[last_signal_pos:]
 93 |     logger.debug(f"[TOOLIFY] 从最后触发信号开始的内容: {repr(content_after_signal[:100])}")
 94 |     
 95 |     # 查找function_calls标签
 96 |     calls_content_match = re.search(r"<function_calls>([\s\S]*?)</function_calls>", content_after_signal)
 97 |     if not calls_content_match:
 98 |         logger.warning(f"[TOOLIFY] 未找到function_calls标签！内容: {repr(content_after_signal[:300])}")
 99 |         # 检查是否有不完整的function_calls开始标签
100 |         if "<function_calls" in content_after_signal:
101 |             logger.warning(f"[TOOLIFY] 发现不完整的function_calls开始标签，但没有结束标签")
102 |         return None
103 |     
104 |     calls_content = calls_content_match.group(1)
105 |     logger.debug(f"[TOOLIFY] function_calls内容: {repr(calls_content)}")
106 |     
107 |     # 解析所有function_call块
108 |     results = []
109 |     call_blocks = re.findall(r"<function_call>([\s\S]*?)</function_call>", calls_content)
110 |     logger.debug(f"[TOOLIFY] 找到 {len(call_blocks)} 个function_call块")
111 |     
112 |     for i, block in enumerate(call_blocks):
113 |         logger.debug(f"[TOOLIFY] 处理function_call #{i+1}: {repr(block)}")
114 |         
115 |         # 提取tool名称
116 |         tool_match = re.search(r"<tool>(.*?)</tool>", block)
117 |         if not tool_match:
118 |             logger.debug(f"[TOOLIFY] 块 #{i+1} 中未找到tool标签")
119 |             continue
120 |         
121 |         name = tool_match.group(1).strip()
122 |         args = {}
123 |         
124 |         # 提取args块
125 |         args_block_match = re.search(r"<args>([\s\S]*?)</args>", block)
126 |         if args_block_match:
127 |             args_content = args_block_match.group(1)
128 |             # 支持包含连字符的参数标签名（如-i, -A）；匹配任何非空格、非'>'、非'/'字符
129 |             arg_matches = re.findall(r"<([^\s>/]+)>([\s\S]*?)</\1>", args_content)
130 | 
131 |             def _coerce_value(v: str):
132 |                 """尝试将字符串值转换为JSON对象"""
133 |                 try:
134 |                     return json.loads(v)
135 |                 except Exception:
136 |                     pass
137 |                 return v
138 | 
139 |             for k, v in arg_matches:
140 |                 args[k] = _coerce_value(v)
141 |         
142 |         result = {"name": name, "args": args}
143 |         results.append(result)
144 |         logger.debug(f"[TOOLIFY] 添加工具调用: {result}")
145 |     
146 |     logger.debug(f"[TOOLIFY] 最终解析结果: {results}")
147 |     return results if results else None
148 | 
149 | 


--------------------------------------------------------------------------------
/src/toolify_handler.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Toolify 请求和响应处理模块
  3 | 处理工具调用相关的请求预处理和响应解析
  4 | """
  5 | 
  6 | import json
  7 | import logging
  8 | import uuid
  9 | from typing import Dict, Any, List, Optional
 10 | 
 11 | from src.toolify_config import get_toolify, is_toolify_enabled
 12 | from src.toolify.prompt import generate_function_prompt, safe_process_tool_choice
 13 | from src.toolify.parser import parse_function_calls_xml
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | def should_enable_toolify(request_dict: Dict[str, Any]) -> bool:
 19 |     """
 20 |     判断是否应该为当前请求启用工具调用功能
 21 |     
 22 |     Args:
 23 |         request_dict: 请求字典
 24 |         
 25 |     Returns:
 26 |         是否启用工具调用
 27 |     """
 28 |     if not is_toolify_enabled():
 29 |         return False
 30 |     
 31 |     # 检查请求中是否包含tools
 32 |     has_tools = request_dict.get("tools") and len(request_dict.get("tools", [])) > 0
 33 |     
 34 |     return has_tools
 35 | 
 36 | 
 37 | def prepare_toolify_request(request_dict: Dict[str, Any], messages: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], bool]:
 38 |     """
 39 |     准备带工具调用的请求
 40 |     
 41 |     Args:
 42 |         request_dict: 原始请求字典
 43 |         messages: 消息列表
 44 |         
 45 |     Returns:
 46 |         (处理后的消息列表, 是否启用了工具调用)
 47 |     """
 48 |     toolify = get_toolify()
 49 |     if not toolify:
 50 |         return messages, False
 51 |     
 52 |     tools = request_dict.get("tools")
 53 |     if not tools or len(tools) == 0:
 54 |         return messages, False
 55 |     
 56 |     logger.info(f"[TOOLIFY] 检测到 {len(tools)} 个工具定义，启用工具调用功能")
 57 |     
 58 |     # 预处理消息（转换tool和tool_calls）
 59 |     processed_messages = toolify.preprocess_messages(messages)
 60 |     logger.debug(f"[TOOLIFY] 消息预处理完成: {len(messages)} -> {len(processed_messages)}")
 61 |     
 62 |     # 生成工具调用提示词
 63 |     from src.config import Config
 64 |     function_prompt, trigger_signal = generate_function_prompt(
 65 |         tools,
 66 |         toolify.trigger_signal,
 67 |         Config.TOOLIFY_CUSTOM_PROMPT
 68 |     )
 69 |     
 70 |     # 处理 tool_choice
 71 |     tool_choice = request_dict.get("tool_choice")
 72 |     tool_choice_prompt = safe_process_tool_choice(tool_choice)
 73 |     if tool_choice_prompt:
 74 |         function_prompt += tool_choice_prompt
 75 |     
 76 |     # 在消息开头注入系统提示词
 77 |     system_message = {"role": "system", "content": function_prompt}
 78 |     processed_messages.insert(0, system_message)
 79 |     
 80 |     logger.debug(f"[TOOLIFY] 已注入工具调用系统提示词，消息数: {len(processed_messages)}")
 81 |     
 82 |     return processed_messages, True
 83 | 
 84 | 
 85 | def parse_toolify_response(content: str, model: str) -> Optional[Dict[str, Any]]:
 86 |     """
 87 |     解析响应中的工具调用
 88 |     
 89 |     Args:
 90 |         content: 响应内容
 91 |         model: 模型名称
 92 |         
 93 |     Returns:
 94 |         如果检测到工具调用，返回包含tool_calls的响应字典；否则返回None
 95 |     """
 96 |     toolify = get_toolify()
 97 |     if not toolify:
 98 |         return None
 99 |     
100 |     logger.debug(f"[TOOLIFY] 开始解析响应中的工具调用，内容长度: {len(content)}")
101 |     
102 |     # 解析 XML 格式的工具调用
103 |     parsed_tools = parse_function_calls_xml(content, toolify.trigger_signal)
104 |     
105 |     if not parsed_tools:
106 |         logger.debug("[TOOLIFY] 未检测到工具调用")
107 |         return None
108 |     
109 |     logger.info(f"[TOOLIFY] 检测到 {len(parsed_tools)} 个工具调用")
110 |     
111 |     # 转换为 OpenAI 格式
112 |     tool_calls = toolify.convert_parsed_tools_to_openai_format(parsed_tools)
113 |     
114 |     return {
115 |         "tool_calls": tool_calls,
116 |         "content": None,
117 |         "role": "assistant"
118 |     }
119 | 
120 | 
121 | def format_toolify_response_for_stream(parsed_tools: List[Dict[str, Any]], model: str, chat_id: str) -> List[str]:
122 |     """
123 |     格式化工具调用为流式响应块
124 |     
125 |     Args:
126 |         parsed_tools: 解析出的工具列表
127 |         model: 模型名称
128 |         chat_id: 会话ID
129 |         
130 |     Returns:
131 |         SSE格式的响应块列表
132 |     """
133 |     toolify = get_toolify()
134 |     if not toolify:
135 |         return []
136 |     
137 |     tool_calls = toolify.convert_parsed_tools_to_openai_format(parsed_tools)
138 |     chunks: List[str] = []
139 |     
140 |     # 初始块 - 发送角色和tool_calls
141 |     initial_chunk = {
142 |         "id": chat_id,
143 |         "object": "chat.completion.chunk",
144 |         "created": int(uuid.uuid4().time_low),
145 |         "model": model,
146 |         "choices": [{
147 |             "index": 0,
148 |             "delta": {
149 |                 "role": "assistant",
150 |                 "content": None,
151 |                 "tool_calls": tool_calls
152 |             },
153 |             "finish_reason": None
154 |         }],
155 |     }
156 |     chunks.append(f"data: {json.dumps(initial_chunk)}\n\n")
157 |     
158 |     # 结束块
159 |     final_chunk = {
160 |         "id": chat_id,
161 |         "object": "chat.completion.chunk",
162 |         "created": int(uuid.uuid4().time_low),
163 |         "model": model,
164 |         "choices": [{
165 |             "index": 0,
166 |             "delta": {},
167 |             "finish_reason": "tool_calls"
168 |         }],
169 |     }
170 |     chunks.append(f"data: {json.dumps(final_chunk)}\n\n")
171 |     chunks.append("data: [DONE]\n\n")
172 |     
173 |     return chunks
174 | 
175 | 


--------------------------------------------------------------------------------
/src/toolify/detector.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Toolify 流式检测器
  3 | 用于在流式响应中检测工具调用
  4 | """
  5 | 
  6 | import logging
  7 | from typing import Optional, List, Dict, Any
  8 | from .parser import parse_function_calls_xml
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | class StreamingFunctionCallDetector:
 14 |     """
 15 |     增强型流式函数调用检测器，支持动态触发信号，避免在<think>标签内误判
 16 |     
 17 |     核心特性：
 18 |     1. 避免在<think>块内触发工具调用检测
 19 |     2. 正常输出<think>块内容给用户
 20 |     3. 支持嵌套think标签
 21 |     """
 22 |     
 23 |     def __init__(self, trigger_signal: str):
 24 |         self.trigger_signal = trigger_signal
 25 |         self.reset()
 26 |     
 27 |     def reset(self):
 28 |         """重置检测器状态"""
 29 |         self.content_buffer = ""
 30 |         self.state = "detecting"  # detecting, signal_detected, tool_parsing
 31 |         self.in_think_block = False
 32 |         self.think_depth = 0
 33 |         self.signal = self.trigger_signal
 34 |         self.signal_len = len(self.signal)
 35 |         self.signal_position = -1  # 记录触发信号的位置
 36 |     
 37 |     def process_chunk(self, delta_content: str) -> tuple[bool, str]:
 38 |         """
 39 |         处理流式内容块
 40 |         
 41 |         Args:
 42 |             delta_content: 新的内容块
 43 |             
 44 |         Returns:
 45 |             (is_tool_call_detected, content_to_yield): 是否检测到工具调用，以及应该输出的内容
 46 |         """
 47 |         if not delta_content:
 48 |             return False, ""
 49 |         
 50 |         self.content_buffer += delta_content
 51 |         content_to_yield = ""
 52 |         
 53 |         if self.state == "tool_parsing":
 54 |             # 已经在解析工具调用，继续累积内容
 55 |             logger.debug(f"[TOOLIFY-DETECTOR] 状态已是tool_parsing，继续累积，缓冲区长度: {len(self.content_buffer)}")
 56 |             return False, ""
 57 |         
 58 |         if self.state == "signal_detected":
 59 |             # 已检测到触发信号，等待<function_calls>标签
 60 |             logger.debug(f"[TOOLIFY-DETECTOR] 状态是signal_detected，检查是否有<function_calls>，缓冲区长度: {len(self.content_buffer)}")
 61 |             if "<function_calls>" in self.content_buffer:
 62 |                 logger.debug(f"[TOOLIFY-DETECTOR] 确认有<function_calls>标签，进入tool_parsing状态")
 63 |                 self.state = "tool_parsing"
 64 |                 return True, ""
 65 |             elif len(self.content_buffer) > 300:
 66 |                 # 触发信号后300字符内还没有<function_calls>，认为是误判
 67 |                 logger.debug(f"[TOOLIFY-DETECTOR] 触发信号后300字符内未发现<function_calls>，视为误判，恢复正常输出")
 68 |                 self.state = "detecting"
 69 |                 # 输出所有缓冲的内容
 70 |                 output = self.content_buffer
 71 |                 self.content_buffer = ""
 72 |                 self.signal_position = -1
 73 |                 return False, output
 74 |             else:
 75 |                 # 继续等待
 76 |                 return False, ""
 77 |         
 78 |         if delta_content:
 79 |             logger.debug(f"[TOOLIFY-DETECTOR] 处理块: {repr(delta_content[:50])}{'...' if len(delta_content) > 50 else ''}, 缓冲区长度: {len(self.content_buffer)}, think状态: {self.in_think_block}")
 80 |         
 81 |         i = 0
 82 |         while i < len(self.content_buffer):
 83 |             # 更新think状态
 84 |             skip_chars = self._update_think_state(i)
 85 |             if skip_chars > 0:
 86 |                 for j in range(skip_chars):
 87 |                     if i + j < len(self.content_buffer):
 88 |                         content_to_yield += self.content_buffer[i + j]
 89 |                 i += skip_chars
 90 |                 continue
 91 |             
 92 |             # 在非think块中检测触发信号
 93 |             if not self.in_think_block and self._can_detect_signal_at(i):
 94 |                 if self.content_buffer[i:i+self.signal_len] == self.signal:
 95 |                     # 检测到触发信号
 96 |                     logger.debug(f"[TOOLIFY-DETECTOR] 在非think块中检测到触发信号! 信号: {self.signal[:20]}...")
 97 |                     logger.debug(f"[TOOLIFY-DETECTOR] 触发信号位置: {i}, think状态: {self.in_think_block}, think深度: {self.think_depth}")
 98 |                     
 99 |                     # 输出触发信号之前的内容
100 |                     # 保留触发信号及之后的内容在缓冲区，进入signal_detected状态等待验证
101 |                     self.state = "signal_detected"
102 |                     self.signal_position = 0  # 触发信号现在在缓冲区开头
103 |                     self.content_buffer = self.content_buffer[i:]
104 |                     logger.debug(f"[TOOLIFY-DETECTOR] 进入signal_detected状态，等待<function_calls>标签")
105 |                     return False, content_to_yield
106 |             
107 |             # 如果剩余内容不足以判断，保留在缓冲区
108 |             remaining_len = len(self.content_buffer) - i
109 |             if remaining_len < self.signal_len or remaining_len < 8:
110 |                 break
111 |             
112 |             content_to_yield += self.content_buffer[i]
113 |             i += 1
114 |         
115 |         self.content_buffer = self.content_buffer[i:]
116 |         return False, content_to_yield
117 |     
118 |     def _update_think_state(self, pos: int):
119 |         """更新think标签状态，支持嵌套"""
120 |         remaining = self.content_buffer[pos:]
121 |         
122 |         if remaining.startswith('<think>'):
123 |             self.think_depth += 1
124 |             self.in_think_block = True
125 |             logger.debug(f"[TOOLIFY-DETECTOR] 进入think块，深度: {self.think_depth}")
126 |             return 7
127 |         
128 |         elif remaining.startswith('</think>'):
129 |             self.think_depth = max(0, self.think_depth - 1)
130 |             self.in_think_block = self.think_depth > 0
131 |             logger.debug(f"[TOOLIFY-DETECTOR] 退出think块，深度: {self.think_depth}")
132 |             return 8
133 |         
134 |         return 0
135 |     
136 |     def _can_detect_signal_at(self, pos: int) -> bool:
137 |         """检查是否可以在指定位置检测信号"""
138 |         return (pos + self.signal_len <= len(self.content_buffer) and 
139 |                 not self.in_think_block)
140 |     
141 |     def finalize(self) -> tuple[Optional[List[Dict[str, Any]]], str]:
142 |         """
143 |         流结束时的最终处理
144 |         
145 |         Returns:
146 |             (parsed_tools, remaining_content): 解析出的工具调用和剩余未输出的内容
147 |         """
148 |         logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 当前状态: {self.state}, 缓冲区长度: {len(self.content_buffer)}")
149 |         
150 |         if self.state == "tool_parsing":
151 |             logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 缓冲区内容前500字符: {repr(self.content_buffer[:500])}")
152 |             result = parse_function_calls_xml(self.content_buffer, self.trigger_signal)
153 |             logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 解析结果: {result}")
154 |             return result, ""
155 |         
156 |         elif self.state == "signal_detected":
157 |             # 流结束时还在等待<function_calls>标签，说明模型输出了触发信号但没有完整的工具调用
158 |             logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 流结束但状态是signal_detected，可能是不完整的工具调用")
159 |             logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 缓冲区内容: {repr(self.content_buffer[:300])}")
160 |             # 尝试解析，如果失败就把缓冲区内容作为普通文本返回
161 |             result = parse_function_calls_xml(self.content_buffer, self.trigger_signal)
162 |             if result:
163 |                 logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 成功解析出工具调用: {result}")
164 |                 return result, ""
165 |             else:
166 |                 logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 解析失败，返回缓冲区内容作为普通文本")
167 |                 return None, self.content_buffer
168 |         
169 |         # detecting状态：没有检测到工具调用，返回缓冲区中剩余的内容
170 |         if self.content_buffer:
171 |             logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 状态是detecting，返回缓冲区内容: {repr(self.content_buffer[:100])}")
172 |         else:
173 |             logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 状态是detecting，缓冲区为空")
174 |         return None, self.content_buffer
175 | 
176 | 


--------------------------------------------------------------------------------
/src/toolify/prompt.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Toolify 提示词生成器
  3 | 生成工具调用的系统提示词
  4 | """
  5 | 
  6 | import json
  7 | import logging
  8 | from typing import List, Dict, Any
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | def get_function_call_prompt_template(trigger_signal: str, custom_template: str = None) -> str:
 14 |     """
 15 |     基于动态触发信号生成提示词模板
 16 |     
 17 |     Args:
 18 |         trigger_signal: 触发信号字符串
 19 |         custom_template: 自定义模板（可选）
 20 |         
 21 |     Returns:
 22 |         提示词模板字符串
 23 |     """
 24 |     if custom_template:
 25 |         logger.info("[TOOLIFY] 使用配置中的自定义提示词模板")
 26 |         return custom_template.format(
 27 |             trigger_signal=trigger_signal,
 28 |             tools_list="{tools_list}"
 29 |         )
 30 |     
 31 |     return f"""
 32 | 你可以访问以下可用工具来帮助解决问题：
 33 | 
 34 | {{tools_list}}
 35 | 
 36 | **重要上下文说明：**
 37 | 1. 如果需要，你可以在单次响应中调用多个工具。
 38 | 2. 对话上下文中可能已包含之前函数调用的工具执行结果。请仔细查看对话历史，避免不必要的重复工具调用。
 39 | 3. 当工具执行结果出现在上下文中时，它们将使用 <tool_result>...</tool_result> 这样的XML标签格式化，便于识别。
 40 | 4. 这是你可以使用的唯一工具调用格式，任何偏差都将导致失败。
 41 | 
 42 | 当你需要使用工具时，你**必须**严格遵循以下格式。不要在工具调用语法的第一行和第二行包含任何额外的文本、解释或对话：
 43 | 
 44 | 1. 开始工具调用时，在新行上准确输出：
 45 | {trigger_signal}
 46 | 不要有前导或尾随空格，完全按照上面显示的输出。触发信号必须单独占一行，且只出现一次。
 47 | 
 48 | 2. 从第二行开始，**立即**紧跟完整的 <function_calls> XML块。
 49 | 
 50 | 3. 对于多个工具调用，在同一个 <function_calls> 包装器中包含多个 <function_call> 块。
 51 | 
 52 | 4. 在结束标签 </function_calls> 后不要添加任何文本或解释。
 53 | 
 54 | 严格的参数键规则：
 55 | - 你必须使用**完全相同**的参数键（区分大小写和标点符号）。不要重命名、添加或删除字符。
 56 | - 如果键以连字符开头（例如 -i, -C），你必须在标签名中保留连字符。例如：<-i>true</-i>, <-C>2</-C>。
 57 | - 永远不要将 "-i" 转换为 "i" 或将 "-C" 转换为 "C"。不要复数化、翻译或给参数键起别名。
 58 | - <tool> 标签必须包含列表中某个工具的确切名称。任何其他工具名称都是无效的。
 59 | - <args> 必须包含该工具的所有必需参数。
 60 | 
 61 | 正确示例（多个工具调用，包括带连字符的键）：
 62 | ...响应内容（可选）...
 63 | {trigger_signal}
 64 | <function_calls>
 65 |     <function_call>
 66 |         <tool>search</tool>
 67 |         <args>
 68 |             <keywords>["Python Document", "how to use python"]</keywords>
 69 |         </args>
 70 |     </function_call>
 71 | </function_calls>
 72 | 
 73 | 现在请准备好严格遵循以上规范。
 74 | """
 75 | 
 76 | 
 77 | def generate_function_prompt(tools: List[Dict[str, Any]], trigger_signal: str, custom_template: str = None) -> tuple[str, str]:
 78 |     """
 79 |     基于客户端请求中的工具定义生成注入的系统提示词
 80 |     
 81 |     Args:
 82 |         tools: 工具定义列表（OpenAI格式）
 83 |         trigger_signal: 触发信号
 84 |         custom_template: 自定义模板（可选）
 85 |         
 86 |     Returns:
 87 |         (prompt_content, trigger_signal): 提示词内容和触发信号
 88 |     """
 89 |     tools_list_str = []
 90 |     for i, tool in enumerate(tools):
 91 |         func = tool.get("function", {})
 92 |         name = func.get("name", "")
 93 |         description = func.get("description", "")
 94 | 
 95 |         # 读取 JSON Schema 字段
 96 |         schema: Dict[str, Any] = func.get("parameters", {}) or {}
 97 |         props: Dict[str, Any] = schema.get("properties", {}) or {}
 98 |         required_list: List[str] = schema.get("required", []) or []
 99 | 
100 |         # 简要摘要行：name (type)
101 |         params_summary = ", ".join([
102 |             f"{p_name} ({(p_info or {}).get('type', 'any')})" for p_name, p_info in props.items()
103 |         ]) or "None"
104 | 
105 |         # 构建详细参数规范
106 |         detail_lines: List[str] = []
107 |         for p_name, p_info in props.items():
108 |             p_info = p_info or {}
109 |             p_type = p_info.get("type", "any")
110 |             is_required = "Yes" if p_name in required_list else "No"
111 |             p_desc = p_info.get("description")
112 |             enum_vals = p_info.get("enum")
113 |             default_val = p_info.get("default")
114 |             examples_val = p_info.get("examples") or p_info.get("example")
115 | 
116 |             # 常见约束和提示
117 |             constraints: Dict[str, Any] = {}
118 |             for key in [
119 |                 "minimum", "maximum", "exclusiveMinimum", "exclusiveMaximum",
120 |                 "minLength", "maxLength", "pattern", "format",
121 |                 "minItems", "maxItems", "uniqueItems"
122 |             ]:
123 |                 if key in p_info:
124 |                     constraints[key] = p_info.get(key)
125 | 
126 |             # 数组项类型提示
127 |             if p_type == "array":
128 |                 items = p_info.get("items") or {}
129 |                 if isinstance(items, dict):
130 |                     itype = items.get("type")
131 |                     if itype:
132 |                         constraints["items.type"] = itype
133 | 
134 |             # 组合详细行
135 |             detail_lines.append(f"- {p_name}:")
136 |             detail_lines.append(f"  - type: {p_type}")
137 |             detail_lines.append(f"  - required: {is_required}")
138 |             if p_desc:
139 |                 detail_lines.append(f"  - description: {p_desc}")
140 |             if enum_vals is not None:
141 |                 try:
142 |                     detail_lines.append(f"  - enum: {json.dumps(enum_vals, ensure_ascii=False)}")
143 |                 except Exception:
144 |                     detail_lines.append(f"  - enum: {enum_vals}")
145 |             if default_val is not None:
146 |                 try:
147 |                     detail_lines.append(f"  - default: {json.dumps(default_val, ensure_ascii=False)}")
148 |                 except Exception:
149 |                     detail_lines.append(f"  - default: {default_val}")
150 |             if examples_val is not None:
151 |                 try:
152 |                     detail_lines.append(f"  - examples: {json.dumps(examples_val, ensure_ascii=False)}")
153 |                 except Exception:
154 |                     detail_lines.append(f"  - examples: {examples_val}")
155 |             if constraints:
156 |                 try:
157 |                     detail_lines.append(f"  - constraints: {json.dumps(constraints, ensure_ascii=False)}")
158 |                 except Exception:
159 |                     detail_lines.append(f"  - constraints: {constraints}")
160 | 
161 |         detail_block = "\n".join(detail_lines) if detail_lines else "(无参数详情)"
162 | 
163 |         desc_block = f"```\n{description}\n```" if description else "None"
164 | 
165 |         tools_list_str.append(
166 |             f"{i + 1}. <tool name=\"{name}\">\n"
167 |             f"   描述:\n{desc_block}\n"
168 |             f"   参数摘要: {params_summary}\n"
169 |             f"   必需参数: {', '.join(required_list) if required_list else 'None'}\n"
170 |             f"   参数详情:\n{detail_block}"
171 |         )
172 |     
173 |     prompt_template = get_function_call_prompt_template(trigger_signal, custom_template)
174 |     prompt_content = prompt_template.replace("{tools_list}", "\n\n".join(tools_list_str))
175 |     
176 |     return prompt_content, trigger_signal
177 | 
178 | 
179 | def safe_process_tool_choice(tool_choice) -> str:
180 |     """
181 |     安全处理tool_choice字段，避免类型错误
182 |     
183 |     Args:
184 |         tool_choice: tool_choice参数（可能是字符串或对象）
185 |         
186 |     Returns:
187 |         附加的提示词内容
188 |     """
189 |     try:
190 |         if tool_choice is None:
191 |             return ""
192 |         
193 |         if isinstance(tool_choice, str):
194 |             if tool_choice == "none":
195 |                 return "\n\n**重要提示：** 本轮你被禁止使用任何工具。请像普通聊天助手一样响应，直接回答用户的问题。"
196 |             else:
197 |                 logger.debug(f"[TOOLIFY] 未知的tool_choice字符串值: {tool_choice}")
198 |                 return ""
199 |         
200 |         elif hasattr(tool_choice, 'function') and hasattr(tool_choice.function, 'name'):
201 |             required_tool_name = tool_choice.function.name
202 |             return f"\n\n**重要提示：** 本轮你必须**仅**使用名为 `{required_tool_name}` 的工具。生成必要的参数并按指定的XML格式输出。"
203 |         
204 |         else:
205 |             logger.debug(f"[TOOLIFY] 不支持的tool_choice类型: {type(tool_choice)}")
206 |             return ""
207 |     
208 |     except Exception as e:
209 |         logger.error(f"[TOOLIFY] 处理tool_choice时出错: {e}")
210 |         return ""
211 | 
212 | 


--------------------------------------------------------------------------------
/src/config.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 配置管理模块
  4 | 统一管理所有环境变量和配置项
  5 | """
  6 | import os
  7 | import logging
  8 | from typing import List
  9 | from dotenv import load_dotenv
 10 | from src.token_manager import TokenManager
 11 | from src.token_updater import TokenUpdater
 12 | 
 13 | # 加载环境变量
 14 | load_dotenv()
 15 | 
 16 | class Config:
 17 |     """应用配置类"""
 18 |     
 19 |     # API认证配置
 20 |     VALID_API_KEY: str = os.getenv("VALID_API_KEY", "")
 21 |     # 移除硬编码的K2THINK_TOKEN，使用token管理器
 22 |     K2THINK_API_URL: str = os.getenv("K2THINK_API_URL", "https://www.k2think.ai/api/chat/completions")
 23 |     
 24 |     # Token管理配置
 25 |     TOKENS_FILE: str = os.getenv("TOKENS_FILE", "tokens.txt")
 26 |     MAX_TOKEN_FAILURES: int = int(os.getenv("MAX_TOKEN_FAILURES", "3"))
 27 |     
 28 |     # Token自动更新配置
 29 |     ENABLE_TOKEN_AUTO_UPDATE: bool = os.getenv("ENABLE_TOKEN_AUTO_UPDATE", "false").lower() == "true"
 30 |     TOKEN_UPDATE_INTERVAL: int = int(os.getenv("TOKEN_UPDATE_INTERVAL", "86400"))  # 默认24小时
 31 |     ACCOUNTS_FILE: str = os.getenv("ACCOUNTS_FILE", "accounts.txt")
 32 |     GET_TOKENS_SCRIPT: str = os.getenv("GET_TOKENS_SCRIPT", "get_tokens.py")
 33 |     
 34 |     # Token管理器实例（延迟初始化）
 35 |     _token_manager: TokenManager = None
 36 |     _token_updater: TokenUpdater = None
 37 |     
 38 |     # 服务器配置
 39 |     HOST: str = os.getenv("HOST", "0.0.0.0")
 40 |     PORT: int = int(os.getenv("PORT", "8001"))
 41 |     
 42 |     # 功能开关
 43 |     DEBUG_LOGGING: bool = os.getenv("DEBUG_LOGGING", "false").lower() == "true"
 44 |     ENABLE_ACCESS_LOG: bool = os.getenv("ENABLE_ACCESS_LOG", "true").lower() == "true"
 45 |     
 46 |     # 性能配置
 47 |     REQUEST_TIMEOUT: float = float(os.getenv("REQUEST_TIMEOUT", "60"))
 48 |     MAX_KEEPALIVE_CONNECTIONS: int = int(os.getenv("MAX_KEEPALIVE_CONNECTIONS", "20"))
 49 |     MAX_CONNECTIONS: int = int(os.getenv("MAX_CONNECTIONS", "100"))
 50 |     STREAM_DELAY: float = float(os.getenv("STREAM_DELAY", "0.05"))
 51 |     STREAM_CHUNK_SIZE: int = int(os.getenv("STREAM_CHUNK_SIZE", "50"))
 52 |     MAX_STREAM_TIME: float = float(os.getenv("MAX_STREAM_TIME", "10.0"))
 53 |     
 54 |     # 日志配置
 55 |     LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO").upper()
 56 |     
 57 |     # CORS配置
 58 |     CORS_ORIGINS: List[str] = (
 59 |         os.getenv("CORS_ORIGINS", "*").split(",") 
 60 |         if os.getenv("CORS_ORIGINS", "*") != "*" 
 61 |         else ["*"]
 62 |     )
 63 |     
 64 |     # 工具调用配置
 65 |     ENABLE_TOOLIFY: bool = os.getenv("ENABLE_TOOLIFY", "true").lower() == "true"
 66 |     TOOLIFY_CUSTOM_PROMPT: str = os.getenv("TOOLIFY_CUSTOM_PROMPT", "")
 67 |     
 68 |     @classmethod
 69 |     def validate(cls) -> None:
 70 |         """验证必需的配置项"""
 71 |         if not cls.VALID_API_KEY:
 72 |             raise ValueError("错误：VALID_API_KEY 环境变量未设置。请在 .env 文件中提供一个安全的API密钥。")
 73 |         
 74 |         # 验证token文件是否存在
 75 |         if not os.path.exists(cls.TOKENS_FILE):
 76 |             if cls.ENABLE_TOKEN_AUTO_UPDATE:
 77 |                 # 如果启用了自动更新，检查必要的文件是否存在
 78 |                 if not os.path.exists(cls.ACCOUNTS_FILE):
 79 |                     raise ValueError(f"错误：启用了token自动更新，但账户文件 {cls.ACCOUNTS_FILE} 不存在。请创建账户文件或禁用自动更新。")
 80 |                 if not os.path.exists(cls.GET_TOKENS_SCRIPT):
 81 |                     raise ValueError(f"错误：启用了token自动更新，但脚本文件 {cls.GET_TOKENS_SCRIPT} 不存在。")
 82 |                 
 83 |                 # 创建一个空的token文件，让token更新服务来处理
 84 |                 print(f"Token文件 {cls.TOKENS_FILE} 不存在，已启用自动更新。创建空token文件，等待更新服务生成...")
 85 |                 try:
 86 |                     with open(cls.TOKENS_FILE, 'w', encoding='utf-8') as f:
 87 |                         f.write("# Token文件将由自动更新服务生成\n")
 88 |                     print("空token文件已创建，服务启动后将自动更新token池。")
 89 |                 except Exception as e:
 90 |                     raise ValueError(f"错误：无法创建token文件 {cls.TOKENS_FILE}: {e}")
 91 |             else:
 92 |                 # 如果没有启用自动更新，则要求手动提供token文件
 93 |                 raise ValueError(f"错误：Token文件 {cls.TOKENS_FILE} 不存在。请手动创建token文件或启用自动更新功能（设置 ENABLE_TOKEN_AUTO_UPDATE=true）。")
 94 |         
 95 |         # 验证数值范围
 96 |         if cls.PORT < 1 or cls.PORT > 65535:
 97 |             raise ValueError(f"错误：PORT 值 {cls.PORT} 不在有效范围内 (1-65535)")
 98 |         
 99 |         if cls.REQUEST_TIMEOUT <= 0:
100 |             raise ValueError(f"错误：REQUEST_TIMEOUT 必须大于0，当前值: {cls.REQUEST_TIMEOUT}")
101 |         
102 |         if cls.STREAM_DELAY < 0:
103 |             raise ValueError(f"错误：STREAM_DELAY 不能为负数，当前值: {cls.STREAM_DELAY}")
104 |     
105 |     @classmethod
106 |     def setup_logging(cls) -> None:
107 |         """设置日志配置"""
108 |         import sys
109 |         
110 |         level_map = {
111 |             "DEBUG": logging.DEBUG,
112 |             "INFO": logging.INFO,
113 |             "WARNING": logging.WARNING,
114 |             "ERROR": logging.ERROR
115 |         }
116 |         
117 |         log_level = level_map.get(cls.LOG_LEVEL, logging.INFO)
118 |         
119 |         # 确保日志输出使用UTF-8编码
120 |         logging.basicConfig(
121 |             level=log_level,
122 |             format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
123 |             handlers=[
124 |                 logging.StreamHandler(sys.stdout)
125 |             ]
126 |         )
127 |         
128 |         # 确保标准输出使用UTF-8编码
129 |         if hasattr(sys.stdout, 'reconfigure'):
130 |             sys.stdout.reconfigure(encoding='utf-8')
131 |         if hasattr(sys.stderr, 'reconfigure'):
132 |             sys.stderr.reconfigure(encoding='utf-8')
133 |     
134 |     @classmethod
135 |     def get_token_manager(cls) -> TokenManager:
136 |         """获取token管理器实例（单例模式）"""
137 |         if cls._token_manager is None:
138 |             cls._token_manager = TokenManager(
139 |                 tokens_file=cls.TOKENS_FILE,
140 |                 max_failures=cls.MAX_TOKEN_FAILURES,
141 |                 allow_empty=cls.ENABLE_TOKEN_AUTO_UPDATE  # 自动更新模式下允许空文件
142 |             )
143 |             # 如果启用了自动更新，设置强制刷新回调
144 |             if cls.ENABLE_TOKEN_AUTO_UPDATE:
145 |                 cls._setup_force_refresh_callback()
146 |         return cls._token_manager
147 |     
148 |     @classmethod
149 |     def get_token_updater(cls) -> TokenUpdater:
150 |         """获取token更新器实例（单例模式）"""
151 |         if cls._token_updater is None:
152 |             cls._token_updater = TokenUpdater(
153 |                 update_interval=cls.TOKEN_UPDATE_INTERVAL,
154 |                 get_tokens_script=cls.GET_TOKENS_SCRIPT,
155 |                 accounts_file=cls.ACCOUNTS_FILE,
156 |                 tokens_file=cls.TOKENS_FILE
157 |             )
158 |             # 如果token_manager已存在且启用了自动更新，建立连接
159 |             if cls._token_manager is not None and cls.ENABLE_TOKEN_AUTO_UPDATE:
160 |                 cls._setup_force_refresh_callback()
161 |         return cls._token_updater
162 |     
163 |     @classmethod
164 |     def reload_tokens(cls) -> None:
165 |         """重新加载token"""
166 |         if cls._token_manager is not None:
167 |             cls._token_manager.reload_tokens()
168 |     
169 |     @classmethod
170 |     def _setup_force_refresh_callback(cls) -> None:
171 |         """设置强制刷新回调函数"""
172 |         if cls._token_manager is not None and cls._token_updater is None:
173 |             # 确保token_updater已被初始化
174 |             cls.get_token_updater()
175 |         
176 |         if cls._token_manager is not None and cls._token_updater is not None:
177 |             # 设置强制刷新回调
178 |             def force_refresh_callback():
179 |                 try:
180 |                     logging.getLogger(__name__).info("🔄 检测到token问题，启动自动刷新")
181 |                     success = cls._token_updater.force_update()
182 |                     if success:
183 |                         # 强制刷新成功后，重新加载token管理器
184 |                         cls._token_manager.reload_tokens()
185 |                         cls._token_manager.reset_consecutive_failures()
186 |                         logging.getLogger(__name__).info("✅ 自动刷新完成，tokens.txt已更新，token池已重新加载")
187 |                     else:
188 |                         logging.getLogger(__name__).error("❌ 自动刷新失败，请检查accounts.txt文件或手动更新token")
189 |                 except Exception as e:
190 |                     logging.getLogger(__name__).error(f"❌ 自动刷新回调执行失败: {e}")
191 |             
192 |             cls._token_manager.set_force_refresh_callback(force_refresh_callback)
193 |             logging.getLogger(__name__).info("已设置连续失效自动强制刷新机制")


--------------------------------------------------------------------------------
/get_tokens.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import os
  5 | import sys
  6 | import requests
  7 | import json
  8 | import time
  9 | import threading
 10 | from concurrent.futures import ThreadPoolExecutor, as_completed
 11 | from typing import Optional
 12 | import re
 13 | from dotenv import load_dotenv
 14 | 
 15 | # 确保使用UTF-8编码
 16 | os.environ.setdefault('PYTHONIOENCODING', 'utf-8')
 17 | os.environ.setdefault('PYTHONLEGACYWINDOWSSTDIO', '0')
 18 | 
 19 | # 强制设置UTF-8编码
 20 | import locale
 21 | try:
 22 |     locale.setlocale(locale.LC_ALL, 'C.UTF-8')
 23 | except locale.Error:
 24 |     try:
 25 |         locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
 26 |     except locale.Error:
 27 |         pass  # 如果设置失败，继续使用默认设置
 28 | 
 29 | # 重新配置标准输入输出流
 30 | if hasattr(sys.stdout, 'reconfigure'):
 31 |     sys.stdout.reconfigure(encoding='utf-8', errors='replace')
 32 | if hasattr(sys.stderr, 'reconfigure'):
 33 |     sys.stderr.reconfigure(encoding='utf-8', errors='replace')
 34 | if hasattr(sys.stdin, 'reconfigure'):
 35 |     sys.stdin.reconfigure(encoding='utf-8', errors='replace')
 36 | 
 37 | # 加载环境变量
 38 | load_dotenv()
 39 | 
 40 | class K2ThinkTokenExtractor:
 41 |     def __init__(self):
 42 |         self.base_url = "https://www.k2think.ai"
 43 |         self.login_url = f"{self.base_url}/api/v1/auths/signin"
 44 |         
 45 |         # 从环境变量读取代理配置
 46 |         proxy_url = os.getenv("PROXY_URL", "")
 47 |         self.proxies = {}
 48 |         if proxy_url:
 49 |             self.proxies = {
 50 |                 'http': proxy_url,
 51 |                 'https': proxy_url
 52 |             }
 53 |             print(f"使用代理: {proxy_url}")
 54 |         else:
 55 |             print("未配置代理，直接连接")
 56 |         
 57 |         # 基于f12调试信息的请求头
 58 |         self.headers = {
 59 |             'Accept': '*/*',
 60 |             'Accept-Encoding': 'gzip, deflate, br, zstd',
 61 |             'Accept-Language': 'zh-CN,zh;q=0.9',
 62 |             'Content-Type': 'application/json',
 63 |             'Origin': 'https://www.k2think.ai',
 64 |             'Priority': 'u=1, i',
 65 |             'Referer': 'https://www.k2think.ai/auth?mode=signin',
 66 |             'Sec-Ch-Ua': '"Chromium";v="140", "Not=A?Brand";v="24", "Microsoft Edge";v="140"',
 67 |             'Sec-Ch-Ua-Mobile': '?0',
 68 |             'Sec-Ch-Ua-Platform': '"Windows"',
 69 |             'Sec-Fetch-Dest': 'empty',
 70 |             'Sec-Fetch-Mode': 'cors',
 71 |             'Sec-Fetch-Site': 'same-origin',
 72 |             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0'
 73 |         }
 74 |         
 75 |         self.lock = threading.Lock()
 76 | 
 77 |     def extract_token_from_set_cookie(self, response: requests.Response) -> Optional[str]:
 78 |         """从响应的Set-Cookie头中提取token"""
 79 |         set_cookie_headers = response.headers.get_list('Set-Cookie') if hasattr(response.headers, 'get_list') else [response.headers.get('Set-Cookie')]
 80 |         
 81 |         # 处理多个Set-Cookie头
 82 |         if set_cookie_headers:
 83 |             for cookie_header in set_cookie_headers:
 84 |                 if cookie_header and 'token=' in cookie_header:
 85 |                     # 使用正则提取token值
 86 |                     match = re.search(r'token=([^;]+)', cookie_header)
 87 |                     if match:
 88 |                         return match.group(1)
 89 |         
 90 |         return None
 91 | 
 92 |     def login_and_get_token(self, email: str, password: str, retry_count: int = 3) -> Optional[str]:
 93 |         """登录并获取token，带重试机制"""
 94 |         login_data = {
 95 |             "email": email,
 96 |             "password": password
 97 |         }
 98 |         
 99 |         for attempt in range(retry_count):
100 |             try:
101 |                 session = requests.Session()
102 |                 session.headers.update(self.headers)
103 |                 
104 |                 response = session.post(
105 |                     self.login_url,
106 |                     json=login_data,
107 |                     proxies=self.proxies if self.proxies else None,
108 |                     timeout=30
109 |                 )
110 |                 
111 |                 if response.status_code == 200:
112 |                     token = self.extract_token_from_set_cookie(response)
113 |                     if token:
114 |                         return token
115 |                 
116 |             except Exception as e:
117 |                 if attempt == retry_count - 1:
118 |                     return None
119 |                 time.sleep(2)  # 重试间隔2秒
120 |                 continue
121 |                 
122 |         return None
123 | 
124 |     def load_accounts(self, file_path: str = "./accounts.txt"):
125 |         """从文件加载账户信息"""
126 |         accounts = []
127 |         try:
128 |             with open(file_path, 'r', encoding='utf-8') as f:
129 |                 for line in f:
130 |                     line = line.strip()
131 |                     if not line:
132 |                         continue
133 |                     
134 |                     try:
135 |                         account_data = json.loads(line)
136 |                         if 'email' in account_data and 'k2_password' in account_data:
137 |                             accounts.append({
138 |                                 'email': account_data['email'],
139 |                                 'password': account_data['k2_password']
140 |                             })
141 |                     except:
142 |                         continue
143 |             
144 |             return accounts
145 |             
146 |         except FileNotFoundError:
147 |             return []
148 |         except Exception:
149 |             return []
150 | 
151 |     def save_token(self, token: str, file_path: str = "./tokens.txt"):
152 |         """保存token到文件"""
153 |         try:
154 |             with self.lock:
155 |                 with open(file_path, 'a', encoding='utf-8') as f:
156 |                     f.write(token + '\n')
157 |         except Exception:
158 |             pass
159 | 
160 |     def clear_tokens_file(self, file_path: str = "./tokens.txt"):
161 |         """清空tokens文件，准备写入新的tokens"""
162 |         try:
163 |             with open(file_path, 'w', encoding='utf-8') as f:
164 |                 f.write('')  # 清空文件
165 |             print(f"已清空tokens文件: {file_path}")
166 |         except Exception as e:
167 |             print(f"清空tokens文件失败: {e}")
168 | 
169 |     def process_account(self, account, tokens_file: str = "./tokens.txt"):
170 |         """处理单个账户"""
171 |         token = self.login_and_get_token(account['email'], account['password'])
172 |         if token:
173 |             self.save_token(token, tokens_file)
174 |             return True
175 |         return False
176 | 
177 |     def process_all_accounts(self, accounts_file: str = "./accounts.txt", tokens_file: str = "./tokens.txt"):
178 |         """使用并发处理所有账户"""
179 |         accounts = self.load_accounts(accounts_file)
180 |         if not accounts:
181 |             print("没有账户需要处理或accounts.txt文件不存在")
182 |             return False
183 |         
184 |         # 清空现有的tokens文件
185 |         self.clear_tokens_file(tokens_file)
186 |         
187 |         print(f"开始处理 {len(accounts)} 个账户，4线程并发...")
188 |         success_count = 0
189 |         failed_count = 0
190 |         
191 |         # 先测试单个账户
192 |         test_account = accounts[0]
193 |         print(f"测试账户: {test_account['email']}")
194 |         
195 |         try:
196 |             token = self.login_and_get_token(test_account['email'], test_account['password'])
197 |             if token:
198 |                 print(f"测试成功，获取token: {token[:50]}...")
199 |             else:
200 |                 print("测试失败，无法获取token")
201 |         except Exception as e:
202 |             print(f"测试异常: {e}")
203 |         
204 |         with ThreadPoolExecutor(max_workers=4) as executor:
205 |             # 提交所有任务
206 |             future_to_account = {executor.submit(self.process_account, account, tokens_file): account for account in accounts}
207 |             
208 |             # 处理结果
209 |             for future in as_completed(future_to_account):
210 |                 account = future_to_account[future]
211 |                 try:
212 |                     if future.result():
213 |                         success_count += 1
214 |                         print(f"✓ {account['email']}")
215 |                     else:
216 |                         failed_count += 1
217 |                         print(f"✗ {account['email']}")
218 |                 except Exception as e:
219 |                     failed_count += 1
220 |                     print(f"✗ {account['email']} - {e}")
221 |         
222 |         print(f"\n处理完成: 成功 {success_count}, 失败 {failed_count}")
223 |         
224 |         # 返回是否有成功获取的token
225 |         return success_count > 0
226 | 
227 | 
228 | def main():
229 |     import sys
230 |     
231 |     # 支持命令行参数
232 |     accounts_file = sys.argv[1] if len(sys.argv) > 1 else "./accounts.txt"
233 |     tokens_file = sys.argv[2] if len(sys.argv) > 2 else "./tokens.txt"
234 |     
235 |     extractor = K2ThinkTokenExtractor()
236 |     success = extractor.process_all_accounts(accounts_file, tokens_file)
237 |     
238 |     # 设置退出码
239 |     sys.exit(0 if success else 1)
240 | 
241 | 
242 | if __name__ == "__main__":
243 |     main()
244 | 


--------------------------------------------------------------------------------
/tests/test_tool_calling.py:
--------------------------------------------------------------------------------
  1 | """
  2 | K2Think API Proxy 工具调用示例
  3 | 演示如何使用工具调用功能
  4 | """
  5 | import json
  6 | from openai import OpenAI
  7 | 
  8 | # 配置客户端
  9 | client = OpenAI(
 10 |     base_url="http://localhost:8001/v1",
 11 |     api_key="sk-123456"
 12 | )
 13 | 
 14 | # 定义工具
 15 | tools = [
 16 |     {
 17 |         "type": "function",
 18 |         "function": {
 19 |             "name": "get_weather",
 20 |             "description": "获取指定城市的天气信息",
 21 |             "parameters": {
 22 |                 "type": "object",
 23 |                 "properties": {
 24 |                     "city": {
 25 |                         "type": "string",
 26 |                         "description": "城市名称，例如：北京、上海、深圳"
 27 |                     },
 28 |                     "unit": {
 29 |                         "type": "string",
 30 |                         "enum": ["celsius", "fahrenheit"],
 31 |                         "description": "温度单位",
 32 |                         "default": "celsius"
 33 |                     }
 34 |                 },
 35 |                 "required": ["city"]
 36 |             }
 37 |         }
 38 |     },
 39 |     {
 40 |         "type": "function",
 41 |         "function": {
 42 |             "name": "search_web",
 43 |             "description": "在互联网上搜索信息",
 44 |             "parameters": {
 45 |                 "type": "object",
 46 |                 "properties": {
 47 |                     "query": {
 48 |                         "type": "string",
 49 |                         "description": "搜索关键词"
 50 |                     },
 51 |                     "num_results": {
 52 |                         "type": "integer",
 53 |                         "description": "返回结果数量",
 54 |                         "default": 5,
 55 |                         "minimum": 1,
 56 |                         "maximum": 10
 57 |                     }
 58 |                 },
 59 |                 "required": ["query"]
 60 |             }
 61 |         }
 62 |     },
 63 |     {
 64 |         "type": "function",
 65 |         "function": {
 66 |             "name": "calculate",
 67 |             "description": "执行数学计算",
 68 |             "parameters": {
 69 |                 "type": "object",
 70 |                 "properties": {
 71 |                     "expression": {
 72 |                         "type": "string",
 73 |                         "description": "数学表达式，例如：2+2, 10*5, sqrt(16)"
 74 |                     }
 75 |                 },
 76 |                 "required": ["expression"]
 77 |             }
 78 |         }
 79 |     }
 80 | ]
 81 | 
 82 | def example_basic_tool_call():
 83 |     """基础工具调用示例"""
 84 |     print("\n=== 基础工具调用示例 ===\n")
 85 |     
 86 |     response = client.chat.completions.create(
 87 |         model="MBZUAI-IFM/K2-Think",
 88 |         messages=[
 89 |             {"role": "user", "content": "北京今天天气怎么样？"}
 90 |         ],
 91 |         tools=tools,
 92 |         tool_choice="auto"
 93 |     )
 94 |     
 95 |     # 处理响应
 96 |     message = response.choices[0].message
 97 |     
 98 |     if message.tool_calls:
 99 |         print("模型请求调用工具：")
100 |         for tool_call in message.tool_calls:
101 |             print(f"\n工具名称: {tool_call.function.name}")
102 |             print(f"工具参数: {tool_call.function.arguments}")
103 |             
104 |             # 模拟执行工具并返回结果
105 |             function_name = tool_call.function.name
106 |             function_args = json.loads(tool_call.function.arguments)
107 |             
108 |             # 模拟工具执行结果
109 |             if function_name == "get_weather":
110 |                 result = {
111 |                     "city": function_args.get("city"),
112 |                     "temperature": 22,
113 |                     "condition": "晴天",
114 |                     "humidity": 45,
115 |                     "unit": function_args.get("unit", "celsius")
116 |                 }
117 |             else:
118 |                 result = {"status": "success", "data": "模拟数据"}
119 |             
120 |             print(f"工具执行结果: {json.dumps(result, ensure_ascii=False)}")
121 |     else:
122 |         print("模型直接回答：")
123 |         print(message.content)
124 | 
125 | 
126 | def example_multi_turn_conversation():
127 |     """多轮对话示例（包含工具调用）"""
128 |     print("\n=== 多轮对话示例 ===\n")
129 |     
130 |     messages = [
131 |         {"role": "user", "content": "查一下上海的天气，然后搜索关于上海的旅游景点"}
132 |     ]
133 |     
134 |     response = client.chat.completions.create(
135 |         model="MBZUAI-IFM/K2-Think",
136 |         messages=messages,
137 |         tools=tools,
138 |         tool_choice="auto"
139 |     )
140 |     
141 |     message = response.choices[0].message
142 |     
143 |     if message.tool_calls:
144 |         print("第一轮 - 模型请求调用工具：")
145 |         messages.append(message)  # 添加助手的响应
146 |         
147 |         # 处理每个工具调用
148 |         for tool_call in message.tool_calls:
149 |             print(f"\n调用工具: {tool_call.function.name}")
150 |             print(f"参数: {tool_call.function.arguments}")
151 |             
152 |             # 模拟工具执行并返回结果
153 |             function_name = tool_call.function.name
154 |             
155 |             if function_name == "get_weather":
156 |                 result = '{"temperature": 25, "condition": "多云", "city": "上海"}'
157 |             elif function_name == "search_web":
158 |                 result = '{"results": ["外滩", "东方明珠", "豫园", "南京路"]}'
159 |             else:
160 |                 result = '{"status": "success"}'
161 |             
162 |             # 添加工具结果到消息历史
163 |             messages.append({
164 |                 "role": "tool",
165 |                 "tool_call_id": tool_call.id,
166 |                 "content": result
167 |             })
168 |         
169 |         # 发送工具结果给模型，获取最终回答
170 |         print("\n第二轮 - 发送工具结果给模型...")
171 |         
172 |         final_response = client.chat.completions.create(
173 |             model="MBZUAI-IFM/K2-Think",
174 |             messages=messages,
175 |             tools=tools
176 |         )
177 |         
178 |         print("\n模型的最终回答：")
179 |         print(final_response.choices[0].message.content)
180 | 
181 | 
182 | def example_forced_tool_call():
183 |     """强制使用特定工具的示例"""
184 |     print("\n=== 强制工具调用示例 ===\n")
185 |     
186 |     response = client.chat.completions.create(
187 |         model="MBZUAI-IFM/K2-Think",
188 |         messages=[
189 |             {"role": "user", "content": "计算 123 * 456"}
190 |         ],
191 |         tools=tools,
192 |         tool_choice={
193 |             "type": "function",
194 |             "function": {"name": "calculate"}
195 |         }
196 |     )
197 |     
198 |     message = response.choices[0].message
199 |     
200 |     if message.tool_calls:
201 |         print("模型被强制使用工具：")
202 |         for tool_call in message.tool_calls:
203 |             print(f"工具: {tool_call.function.name}")
204 |             print(f"参数: {tool_call.function.arguments}")
205 | 
206 | 
207 | def example_stream_with_tools():
208 |     """流式响应中的工具调用示例"""
209 |     print("\n=== 流式工具调用示例 ===\n")
210 |     
211 |     stream = client.chat.completions.create(
212 |         model="MBZUAI-IFM/K2-Think",
213 |         messages=[
214 |             {"role": "user", "content": "帮我搜索一下人工智能的最新发展"}
215 |         ],
216 |         tools=tools,
217 |         stream=True
218 |     )
219 |     
220 |     print("流式响应：")
221 |     for chunk in stream:
222 |         if chunk.choices[0].delta.content:
223 |             print(chunk.choices[0].delta.content, end="", flush=True)
224 |         
225 |         # 检查是否有工具调用
226 |         if hasattr(chunk.choices[0].delta, 'tool_calls') and chunk.choices[0].delta.tool_calls:
227 |             print("\n检测到工具调用：")
228 |             for tool_call in chunk.choices[0].delta.tool_calls:
229 |                 if hasattr(tool_call, 'function'):
230 |                     print(f"\n工具: {tool_call.function.name if hasattr(tool_call.function, 'name') else '未知'}")
231 |         
232 |         # 检查结束原因
233 |         if chunk.choices[0].finish_reason == "tool_calls":
234 |             print("\n[流结束 - 需要工具调用]")
235 |             break
236 |         elif chunk.choices[0].finish_reason == "stop":
237 |             print("\n[流结束]")
238 |             break
239 |     
240 |     print()
241 | 
242 | 
243 | def example_disable_tools():
244 |     """禁用工具调用的示例"""
245 |     print("\n=== 禁用工具调用示例 ===\n")
246 |     
247 |     response = client.chat.completions.create(
248 |         model="MBZUAI-IFM/K2-Think",
249 |         messages=[
250 |             {"role": "user", "content": "北京今天天气怎么样？"}
251 |         ],
252 |         tools=tools,
253 |         tool_choice="none"  # 禁用工具调用
254 |     )
255 |     
256 |     print("模型直接回答（未使用工具）：")
257 |     print(response.choices[0].message.content)
258 | 
259 | 
260 | if __name__ == "__main__":
261 |     print("=" * 60)
262 |     print("K2Think API Proxy - 工具调用功能示例")
263 |     print("=" * 60)
264 |     
265 |     try:
266 |         # 运行示例
267 |         example_basic_tool_call()
268 |         example_forced_tool_call()
269 |         example_stream_with_tools()
270 |         example_disable_tools()
271 |         example_multi_turn_conversation()
272 |         
273 |         print("\n" + "=" * 60)
274 |         print("示例运行完成！")
275 |         print("=" * 60)
276 |         
277 |     except Exception as e:
278 |         print(f"\n错误: {e}")
279 |         print("\n请确保：")
280 |         print("1. K2Think API Proxy 服务正在运行（http://localhost:8001）")
281 |         print("2. 环境变量 ENABLE_TOOLIFY=true")
282 |         print("3. API密钥配置正确")
283 | 
284 | 


--------------------------------------------------------------------------------
/src/toolify/core.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Toolify 核心功能模块
  3 | 提供工具调用的主要功能：请求处理、响应解析、格式转换
  4 | """
  5 | 
  6 | import uuid
  7 | import json
  8 | import secrets
  9 | import string
 10 | import logging
 11 | from typing import List, Dict, Any, Optional
 12 | from collections import OrderedDict
 13 | import time
 14 | import threading
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | def generate_random_trigger_signal() -> str:
 20 |     """生成随机的、自闭合的触发信号，如 <Function_AB1c_Start/>"""
 21 |     chars = string.ascii_letters + string.digits
 22 |     random_str = ''.join(secrets.choice(chars) for _ in range(4))
 23 |     return f"<Function_{random_str}_Start/>"
 24 | 
 25 | 
 26 | class ToolCallMappingManager:
 27 |     """
 28 |     工具调用映射管理器（带TTL和大小限制）
 29 |     
 30 |     功能：
 31 |     1. 自动过期清理 - 条目在指定时间后自动删除
 32 |     2. 大小限制 - 防止内存无限增长
 33 |     3. LRU驱逐 - 达到大小限制时删除最少使用的条目
 34 |     4. 线程安全 - 支持并发访问
 35 |     5. 周期性清理 - 后台线程定期清理过期条目
 36 |     """
 37 |     
 38 |     def __init__(self, max_size: int = 1000, ttl_seconds: int = 3600, cleanup_interval: int = 300):
 39 |         """
 40 |         初始化映射管理器
 41 |         
 42 |         Args:
 43 |             max_size: 最大存储条目数
 44 |             ttl_seconds: 条目生存时间（秒）
 45 |             cleanup_interval: 清理间隔（秒）
 46 |         """
 47 |         self.max_size = max_size
 48 |         self.ttl_seconds = ttl_seconds
 49 |         self.cleanup_interval = cleanup_interval
 50 |         
 51 |         self._data: OrderedDict[str, Dict[str, Any]] = OrderedDict()
 52 |         self._timestamps: Dict[str, float] = {}
 53 |         self._lock = threading.RLock()
 54 |         
 55 |         self._cleanup_thread = threading.Thread(target=self._periodic_cleanup, daemon=True)
 56 |         self._cleanup_thread.start()
 57 |         
 58 |         logger.debug(f"[TOOLIFY] 工具调用映射管理器已启动 - 最大条目: {max_size}, TTL: {ttl_seconds}s")
 59 |     
 60 |     def store(self, tool_call_id: str, name: str, args: dict, description: str = "") -> None:
 61 |         """存储工具调用映射"""
 62 |         with self._lock:
 63 |             current_time = time.time()
 64 |             
 65 |             if tool_call_id in self._data:
 66 |                 del self._data[tool_call_id]
 67 |                 del self._timestamps[tool_call_id]
 68 |             
 69 |             while len(self._data) >= self.max_size:
 70 |                 oldest_key = next(iter(self._data))
 71 |                 del self._data[oldest_key]
 72 |                 del self._timestamps[oldest_key]
 73 |                 logger.debug(f"[TOOLIFY] 因大小限制移除最旧条目: {oldest_key}")
 74 |             
 75 |             self._data[tool_call_id] = {
 76 |                 "name": name,
 77 |                 "args": args,
 78 |                 "description": description,
 79 |                 "created_at": current_time
 80 |             }
 81 |             self._timestamps[tool_call_id] = current_time
 82 |             
 83 |             logger.debug(f"[TOOLIFY] 存储工具调用映射: {tool_call_id} -> {name}")
 84 |     
 85 |     def get(self, tool_call_id: str) -> Optional[Dict[str, Any]]:
 86 |         """获取工具调用映射（更新LRU顺序）"""
 87 |         with self._lock:
 88 |             current_time = time.time()
 89 |             
 90 |             if tool_call_id not in self._data:
 91 |                 logger.debug(f"[TOOLIFY] 未找到工具调用映射: {tool_call_id}")
 92 |                 return None
 93 |             
 94 |             if current_time - self._timestamps[tool_call_id] > self.ttl_seconds:
 95 |                 logger.debug(f"[TOOLIFY] 工具调用映射已过期: {tool_call_id}")
 96 |                 del self._data[tool_call_id]
 97 |                 del self._timestamps[tool_call_id]
 98 |                 return None
 99 |             
100 |             result = self._data[tool_call_id]
101 |             self._data.move_to_end(tool_call_id)
102 |             
103 |             logger.debug(f"[TOOLIFY] 找到工具调用映射: {tool_call_id} -> {result['name']}")
104 |             return result
105 |     
106 |     def cleanup_expired(self) -> int:
107 |         """清理过期条目，返回清理数量"""
108 |         with self._lock:
109 |             current_time = time.time()
110 |             expired_keys = []
111 |             
112 |             for key, timestamp in self._timestamps.items():
113 |                 if current_time - timestamp > self.ttl_seconds:
114 |                     expired_keys.append(key)
115 |             
116 |             for key in expired_keys:
117 |                 del self._data[key]
118 |                 del self._timestamps[key]
119 |             
120 |             if expired_keys:
121 |                 logger.debug(f"[TOOLIFY] 清理了 {len(expired_keys)} 个过期条目")
122 |             
123 |             return len(expired_keys)
124 |     
125 |     def _periodic_cleanup(self) -> None:
126 |         """后台周期性清理线程"""
127 |         while True:
128 |             try:
129 |                 time.sleep(self.cleanup_interval)
130 |                 self.cleanup_expired()
131 |             except Exception as e:
132 |                 logger.error(f"[TOOLIFY] 后台清理线程异常: {e}")
133 | 
134 | 
135 | class ToolifyCore:
136 |     """Toolify 核心类 - 管理工具调用功能"""
137 |     
138 |     def __init__(self, enable_function_calling: bool = True):
139 |         """
140 |         初始化 Toolify 核心
141 |         
142 |         Args:
143 |             enable_function_calling: 是否启用函数调用功能
144 |         """
145 |         self.enable_function_calling = enable_function_calling
146 |         self.mapping_manager = ToolCallMappingManager()
147 |         self.trigger_signal = generate_random_trigger_signal()
148 |         
149 |         logger.info(f"[TOOLIFY] 核心已初始化 - 功能启用: {enable_function_calling}")
150 |         logger.debug(f"[TOOLIFY] 触发信号: {self.trigger_signal}")
151 |     
152 |     def store_tool_call_mapping(self, tool_call_id: str, name: str, args: dict, description: str = ""):
153 |         """存储工具调用ID与调用内容的映射"""
154 |         self.mapping_manager.store(tool_call_id, name, args, description)
155 |     
156 |     def get_tool_call_mapping(self, tool_call_id: str) -> Optional[Dict[str, Any]]:
157 |         """获取工具调用ID对应的调用内容"""
158 |         return self.mapping_manager.get(tool_call_id)
159 |     
160 |     def format_tool_result_for_ai(self, tool_call_id: str, result_content: str) -> str:
161 |         """格式化工具调用结果供AI理解"""
162 |         logger.debug(f"[TOOLIFY] 格式化工具调用结果: tool_call_id={tool_call_id}")
163 |         tool_info = self.get_tool_call_mapping(tool_call_id)
164 |         if not tool_info:
165 |             logger.debug(f"[TOOLIFY] 未找到工具调用映射，使用默认格式")
166 |             return f"Tool execution result:\n<tool_result>\n{result_content}\n</tool_result>"
167 |         
168 |         formatted_text = f"""Tool execution result:
169 | - Tool name: {tool_info['name']}
170 | - Execution result:
171 | <tool_result>
172 | {result_content}
173 | </tool_result>"""
174 |         
175 |         logger.debug(f"[TOOLIFY] 格式化完成，工具名: {tool_info['name']}")
176 |         return formatted_text
177 |     
178 |     def format_assistant_tool_calls_for_ai(self, tool_calls: List[Dict[str, Any]]) -> str:
179 |         """将助手的工具调用格式化为AI可读的字符串格式"""
180 |         logger.debug(f"[TOOLIFY] 格式化助手工具调用. 数量: {len(tool_calls)}")
181 |         
182 |         xml_calls_parts = []
183 |         for tool_call in tool_calls:
184 |             function_info = tool_call.get("function", {})
185 |             name = function_info.get("name", "")
186 |             arguments_json = function_info.get("arguments", "{}")
187 |             
188 |             try:
189 |                 args_dict = json.loads(arguments_json)
190 |             except (json.JSONDecodeError, TypeError):
191 |                 args_dict = {"raw_arguments": arguments_json}
192 | 
193 |             args_parts = []
194 |             for key, value in args_dict.items():
195 |                 json_value = json.dumps(value, ensure_ascii=False)
196 |                 args_parts.append(f"<{key}>{json_value}</{key}>")
197 |             
198 |             args_content = "\n".join(args_parts)
199 |             
200 |             xml_call = f"<function_call>\n<tool>{name}</tool>\n<args>\n{args_content}\n</args>\n</function_call>"
201 |             xml_calls_parts.append(xml_call)
202 | 
203 |         all_calls = "\n".join(xml_calls_parts)
204 |         final_str = f"{self.trigger_signal}\n<function_calls>\n{all_calls}\n</function_calls>"
205 |         
206 |         logger.debug("[TOOLIFY] 助手工具调用格式化成功")
207 |         return final_str
208 |     
209 |     def preprocess_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
210 |         """
211 |         预处理消息，转换工具类型消息为AI可理解格式
212 |         
213 |         Args:
214 |             messages: OpenAI格式的消息列表
215 |             
216 |         Returns:
217 |             处理后的消息列表
218 |         """
219 |         processed_messages = []
220 |         
221 |         for message in messages:
222 |             if isinstance(message, dict):
223 |                 # 处理 tool 角色消息
224 |                 if message.get("role") == "tool":
225 |                     tool_call_id = message.get("tool_call_id")
226 |                     content = message.get("content")
227 |                     
228 |                     if tool_call_id and content:
229 |                         formatted_content = self.format_tool_result_for_ai(tool_call_id, content)
230 |                         processed_message = {
231 |                             "role": "user",
232 |                             "content": formatted_content
233 |                         }
234 |                         processed_messages.append(processed_message)
235 |                         logger.debug(f"[TOOLIFY] 转换tool消息为user消息: tool_call_id={tool_call_id}")
236 |                     else:
237 |                         logger.debug(f"[TOOLIFY] 跳过无效tool消息: tool_call_id={tool_call_id}")
238 |                 
239 |                 # 处理 assistant 角色的 tool_calls
240 |                 elif message.get("role") == "assistant" and "tool_calls" in message and message["tool_calls"]:
241 |                     tool_calls = message.get("tool_calls", [])
242 |                     formatted_tool_calls_str = self.format_assistant_tool_calls_for_ai(tool_calls)
243 |                     
244 |                     # 与原始内容合并
245 |                     original_content = message.get("content") or ""
246 |                     final_content = f"{original_content}\n{formatted_tool_calls_str}".strip()
247 | 
248 |                     processed_message = {
249 |                         "role": "assistant",
250 |                         "content": final_content
251 |                     }
252 |                     # 复制其他字段（除了tool_calls）
253 |                     for key, value in message.items():
254 |                         if key not in ["role", "content", "tool_calls"]:
255 |                             processed_message[key] = value
256 | 
257 |                     processed_messages.append(processed_message)
258 |                     logger.debug(f"[TOOLIFY] 转换assistant的tool_calls为content")
259 |                 else:
260 |                     processed_messages.append(message)
261 |             else:
262 |                 processed_messages.append(message)
263 |         
264 |         return processed_messages
265 |     
266 |     def convert_parsed_tools_to_openai_format(self, parsed_tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
267 |         """
268 |         将解析出的工具调用转换为OpenAI格式的tool_calls
269 |         
270 |         Args:
271 |             parsed_tools: 解析出的工具列表 [{"name": "tool_name", "args": {...}}, ...]
272 |             
273 |         Returns:
274 |             OpenAI格式的tool_calls列表
275 |         """
276 |         tool_calls = []
277 |         for tool in parsed_tools:
278 |             tool_call_id = f"call_{uuid.uuid4().hex}"
279 |             self.store_tool_call_mapping(
280 |                 tool_call_id,
281 |                 tool["name"],
282 |                 tool["args"],
283 |                 f"调用工具 {tool['name']}"
284 |             )
285 |             tool_calls.append({
286 |                 "id": tool_call_id,
287 |                 "type": "function",
288 |                 "function": {
289 |                     "name": tool["name"],
290 |                     "arguments": json.dumps(tool["args"])
291 |                 }
292 |             })
293 |         
294 |         logger.debug(f"[TOOLIFY] 转换了 {len(tool_calls)} 个工具调用")
295 |         return tool_calls
296 | 
297 | 


--------------------------------------------------------------------------------
/src/token_updater.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Token更新服务模块
  4 | 定期运行get_tokens.py来更新token池
  5 | """
  6 | import os
  7 | import time
  8 | import logging
  9 | import threading
 10 | import subprocess
 11 | import shutil
 12 | from typing import Optional
 13 | from datetime import datetime, timedelta
 14 | from src.utils import safe_log_error, safe_log_info, safe_log_warning
 15 | # 移除循环导入，Config在需要时动态导入
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | class TokenUpdater:
 20 |     """Token更新服务 - 定期更新token池"""
 21 |     
 22 |     def __init__(self, 
 23 |                  update_interval: int = 86400,  # 默认24小时更新一次
 24 |                  get_tokens_script: str = "get_tokens.py",
 25 |                  accounts_file: str = "accounts.txt",
 26 |                  tokens_file: str = "tokens.txt"):
 27 |         """
 28 |         初始化Token更新器
 29 |         
 30 |         Args:
 31 |             update_interval: 更新间隔（秒）
 32 |             get_tokens_script: get_tokens.py脚本路径
 33 |             accounts_file: 账户文件路径
 34 |             tokens_file: tokens文件路径
 35 |         """
 36 |         self.update_interval = update_interval
 37 |         self.get_tokens_script = get_tokens_script
 38 |         self.accounts_file = accounts_file
 39 |         self.tokens_file = tokens_file
 40 |         
 41 |         self.is_running = False
 42 |         self.update_thread: Optional[threading.Thread] = None
 43 |         self.last_update: Optional[datetime] = None
 44 |         self.update_count = 0
 45 |         self.error_count = 0
 46 |         self.is_updating = False
 47 |         self.last_error: Optional[str] = None
 48 |         
 49 |         safe_log_info(logger, f"Token更新器初始化完成 - 更新间隔: {update_interval}秒")
 50 |         
 51 |         # 清理可能遗留的临时文件
 52 |         self.cleanup_all_temp_files()
 53 |     
 54 |     def _check_files_exist(self) -> bool:
 55 |         """检查必要文件是否存在"""
 56 |         if not os.path.exists(self.get_tokens_script):
 57 |             safe_log_error(logger, f"get_tokens.py脚本不存在: {self.get_tokens_script}")
 58 |             return False
 59 |         
 60 |         if not os.path.exists(self.accounts_file):
 61 |             safe_log_error(logger, f"账户文件不存在: {self.accounts_file}")
 62 |             return False
 63 |         
 64 |         return True
 65 |     
 66 |     def _run_token_update(self) -> bool:
 67 |         """运行token更新脚本（原子性更新）"""
 68 |         if self.is_updating:
 69 |             safe_log_warning(logger, "Token更新已在进行中，跳过此次更新")
 70 |             return False
 71 |             
 72 |         self.is_updating = True
 73 |         self.last_error = None
 74 |         temp_tokens_file = f"{self.tokens_file}.tmp"
 75 |         
 76 |         try:
 77 |             safe_log_info(logger, "开始更新token池...")
 78 |             
 79 |             # 使用临时文件进行更新，避免服务中断
 80 |             result = subprocess.run(
 81 |                 ["python", self.get_tokens_script, self.accounts_file, temp_tokens_file],
 82 |                 capture_output=True,
 83 |                 encoding='utf-8', 
 84 |                 text=True,
 85 |                 timeout=300  # 5分钟超时
 86 |             )
 87 |             
 88 |             if result.returncode == 0:
 89 |                 # 检查临时文件是否生成且不为空
 90 |                 if os.path.exists(temp_tokens_file) and os.path.getsize(temp_tokens_file) > 0:
 91 |                     try:
 92 |                         # 原子性替换：避免重命名正在使用的文件
 93 |                         if os.path.exists(self.tokens_file):
 94 |                             # 备份当前文件（使用复制而非重命名，避免文件锁定问题）
 95 |                             backup_file = f"{self.tokens_file}.backup"
 96 |                             if os.path.exists(backup_file):
 97 |                                 os.remove(backup_file)  # 删除旧备份
 98 |                             
 99 |                             # 复制当前文件到备份位置
100 |                             shutil.copy2(self.tokens_file, backup_file)
101 |                             logger.debug(f"已备份当前tokens文件到: {backup_file}")
102 |                         
103 |                         # 使用临时文件替换原文件（Windows下更安全的方式）
104 |                         if os.name == 'nt':  # Windows系统
105 |                             # 在Windows下，先删除目标文件再重命名
106 |                             if os.path.exists(self.tokens_file):
107 |                                 os.remove(self.tokens_file)
108 |                             os.rename(temp_tokens_file, self.tokens_file)
109 |                         else:
110 |                             # Unix/Linux系统可以直接重命名覆盖
111 |                             os.rename(temp_tokens_file, self.tokens_file)
112 |                         
113 |                         safe_log_info(logger, "Token更新成功，文件已原子性替换")
114 |                         logger.debug(f"更新输出: {result.stdout}")
115 |                         self.update_count += 1
116 |                         self.last_update = datetime.now()
117 |                         
118 |                         # 通知需要重新加载token管理器
119 |                         self._notify_token_reload()
120 |                         
121 |                         return True
122 |                     except Exception as rename_error:
123 |                         error_msg = f"文件重命名失败: {rename_error}"
124 |                         safe_log_error(logger, error_msg)
125 |                         self.last_error = error_msg
126 |                         self._cleanup_temp_file(temp_tokens_file)
127 |                         self.error_count += 1
128 |                         return False
129 |                 else:
130 |                     error_msg = "Token更新失败 - 临时文件为空或不存在"
131 |                     safe_log_error(logger, error_msg)
132 |                     self.last_error = error_msg
133 |                     self._cleanup_temp_file(temp_tokens_file)
134 |                     self.error_count += 1
135 |                     return False
136 |             else:
137 |                 error_msg = f"Token更新失败 - 返回码: {result.returncode}, 错误: {result.stderr}"
138 |                 safe_log_error(logger, error_msg)
139 |                 self.last_error = error_msg
140 |                 self._cleanup_temp_file(temp_tokens_file)
141 |                 self.error_count += 1
142 |                 return False
143 |                 
144 |         except subprocess.TimeoutExpired:
145 |             error_msg = "Token更新超时"
146 |             safe_log_error(logger, error_msg)
147 |             self.last_error = error_msg
148 |             self._cleanup_temp_file(temp_tokens_file)
149 |             self.error_count += 1
150 |             return False
151 |         except Exception as e:
152 |             error_msg = f"Token更新异常: {e}"
153 |             safe_log_error(logger, error_msg)
154 |             self.last_error = error_msg
155 |             self._cleanup_temp_file(temp_tokens_file)
156 |             self.error_count += 1
157 |             return False
158 |         finally:
159 |             self.is_updating = False
160 |     
161 |     def _cleanup_temp_file(self, temp_file: str):
162 |         """清理临时文件"""
163 |         try:
164 |             if os.path.exists(temp_file):
165 |                 os.remove(temp_file)
166 |                 logger.debug(f"已清理临时文件: {temp_file}")
167 |         except Exception as e:
168 |             safe_log_warning(logger, f"清理临时文件失败: {e}")
169 |     
170 |     def cleanup_all_temp_files(self):
171 |         """清理所有相关的临时文件"""
172 |         temp_patterns = [
173 |             f"{self.tokens_file}.tmp",
174 |             f"{self.tokens_file}.backup"
175 |         ]
176 |         
177 |         cleaned_count = 0
178 |         for pattern in temp_patterns:
179 |             try:
180 |                 if os.path.exists(pattern):
181 |                     os.remove(pattern)
182 |                     safe_log_info(logger, f"已清理遗留文件: {pattern}")
183 |                     cleaned_count += 1
184 |             except Exception as e:
185 |                 safe_log_warning(logger, f"清理遗留文件失败 {pattern}: {e}")
186 |         
187 |         if cleaned_count > 0:
188 |             safe_log_info(logger, f"共清理了 {cleaned_count} 个遗留文件")
189 |         else:
190 |             logger.debug("没有发现需要清理的遗留文件")
191 |         
192 |         return cleaned_count
193 |     
194 |     def _notify_token_reload(self):
195 |         """通知需要重新加载token管理器"""
196 |         try:
197 |             # 导入Config来触发token重新加载
198 |             from src.config import Config
199 |             if Config._token_manager is not None:
200 |                 Config._token_manager.reload_tokens()
201 |                 safe_log_info(logger, "Token管理器已重新加载")
202 |         except Exception as e:
203 |             safe_log_warning(logger, f"通知token重新加载失败: {e}")
204 |     
205 | 
206 |     def _update_loop(self):
207 |         """更新循环"""
208 |         safe_log_info(logger, "Token更新服务启动")
209 |         
210 |         # # 首次启动时，如果tokens.txt中没有token（非#开头），立即更新一次
211 |         # 判断tokens.txt中的token数量
212 |         if os.path.exists(self.tokens_file):
213 |             try:
214 |                 # 读取文件内容并立即关闭文件句柄
215 |                 with open(self.tokens_file, "r", encoding="utf-8") as f:
216 |                     content = f.read()
217 |                 
218 |                 # 在文件句柄关闭后处理内容
219 |                 lines = content.splitlines()
220 |                 valid_lines = [line.strip() for line in lines if line.strip() and not line.strip().startswith("#")]
221 |                 
222 |                 if len(valid_lines) < 1:
223 |                     # 动态导入Config避免循环导入
224 |                     from src.config import Config
225 |                     if Config.ENABLE_TOKEN_AUTO_UPDATE:
226 |                         safe_log_info(logger, "首次启动时，tokens.txt中没有token（非#开头），立即更新一次")
227 |                         # 添加小延迟确保文件句柄完全释放
228 |                         
229 |                         time.sleep(0.1)
230 |                         self._run_token_update()
231 |             except Exception as e:
232 |                 safe_log_warning(logger, f"检查tokens文件时出错: {e}")
233 |         
234 |         while self.is_running:
235 |             try:
236 |                 time.sleep(self.update_interval)
237 |                 
238 |                 if not self.is_running:
239 |                     break
240 |                 
241 |                 if self._check_files_exist():
242 |                     self._run_token_update()
243 |                 else:
244 |                     safe_log_warning(logger, "跳过此次更新 - 必要文件不存在")
245 |                     
246 |             except Exception as e:
247 |                 safe_log_error(logger, "更新循环异常", e)
248 |                 time.sleep(60)  # 异常时等待1分钟再继续
249 |     
250 |     def start(self) -> bool:
251 |         """启动token更新服务"""
252 |         if self.is_running:
253 |             safe_log_warning(logger, "Token更新服务已在运行")
254 |             return False
255 |         
256 |         if not self._check_files_exist():
257 |             safe_log_error(logger, "启动失败 - 必要文件不存在")
258 |             return False
259 |         
260 |         self.is_running = True
261 |         self.update_thread = threading.Thread(target=self._update_loop, daemon=True)
262 |         self.update_thread.start()
263 |         
264 |         safe_log_info(logger, "Token更新服务已启动")
265 |         return True
266 |     
267 |     def stop(self):
268 |         """停止token更新服务"""
269 |         if not self.is_running:
270 |             safe_log_warning(logger, "Token更新服务未在运行")
271 |             return
272 |         
273 |         self.is_running = False
274 |         if self.update_thread and self.update_thread.is_alive():
275 |             self.update_thread.join(timeout=5)
276 |         
277 |         safe_log_info(logger, "Token更新服务已停止")
278 |     
279 |     def force_update(self) -> bool:
280 |         """强制立即更新token"""
281 |         if not self._check_files_exist():
282 |             safe_log_error(logger, "强制更新失败 - 必要文件不存在")
283 |             return False
284 |         
285 |         safe_log_info(logger, "执行强制token更新")
286 |         return self._run_token_update()
287 |     
288 |     async def force_update_async(self) -> bool:
289 |         """异步强制立即更新token"""
290 |         import asyncio
291 |         loop = asyncio.get_event_loop()
292 |         return await loop.run_in_executor(None, self.force_update)
293 |     
294 |     def get_status(self) -> dict:
295 |         """获取更新服务状态"""
296 |         return {
297 |             "is_running": self.is_running,
298 |             "is_updating": self.is_updating,
299 |             "update_interval": self.update_interval,
300 |             "last_update": self.last_update.isoformat() if self.last_update else None,
301 |             "update_count": self.update_count,
302 |             "error_count": self.error_count,
303 |             "last_error": self.last_error,
304 |             "next_update": (
305 |                 (self.last_update + timedelta(seconds=self.update_interval)).isoformat()
306 |                 if self.last_update else None
307 |             ),
308 |             "files": {
309 |                 "get_tokens_script": os.path.exists(self.get_tokens_script),
310 |                 "accounts_file": os.path.exists(self.accounts_file),
311 |                 "tokens_file": os.path.exists(self.tokens_file)
312 |             }
313 |         }


--------------------------------------------------------------------------------
/k2think_proxy.py:
--------------------------------------------------------------------------------
  1 | """
  2 | K2Think API 代理服务 - 重构版本
  3 | 提供OpenAI兼容的API接口，代理到K2Think服务
  4 | """
  5 | import os
  6 | import sys
  7 | import time
  8 | import logging
  9 | from contextlib import asynccontextmanager
 10 | from fastapi import FastAPI, Request
 11 | from fastapi.middleware.cors import CORSMiddleware
 12 | from fastapi.responses import JSONResponse, Response
 13 | 
 14 | # 确保使用UTF-8编码
 15 | os.environ.setdefault('PYTHONIOENCODING', 'utf-8')
 16 | os.environ.setdefault('PYTHONLEGACYWINDOWSSTDIO', '0')
 17 | 
 18 | # 强制设置UTF-8编码
 19 | import locale
 20 | try:
 21 |     locale.setlocale(locale.LC_ALL, 'C.UTF-8')
 22 | except locale.Error:
 23 |     try:
 24 |         locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
 25 |     except locale.Error:
 26 |         pass  # 如果设置失败，继续使用默认设置
 27 | 
 28 | # 重新配置标准输入输出流
 29 | if hasattr(sys.stdout, 'reconfigure'):
 30 |     sys.stdout.reconfigure(encoding='utf-8', errors='replace')
 31 | if hasattr(sys.stderr, 'reconfigure'):
 32 |     sys.stderr.reconfigure(encoding='utf-8', errors='replace')
 33 | if hasattr(sys.stdin, 'reconfigure'):
 34 |     sys.stdin.reconfigure(encoding='utf-8', errors='replace')
 35 | 
 36 | from src.config import Config
 37 | from src.constants import APIConstants
 38 | from src.exceptions import K2ThinkProxyError
 39 | from src.models import ChatCompletionRequest
 40 | from src.api_handler import APIHandler
 41 | 
 42 | # 初始化配置
 43 | try:
 44 |     Config.validate()
 45 |     Config.setup_logging()
 46 | except Exception as e:
 47 |     print(f"配置错误: {e}")
 48 |     exit(1)
 49 | 
 50 | logger = logging.getLogger(__name__)
 51 | 
 52 | # 全局HTTP客户端管理
 53 | @asynccontextmanager
 54 | async def lifespan(app: FastAPI):
 55 |     logger.info("K2Think API Proxy 启动中...")
 56 |     
 57 |     # 如果启用了token自动更新，启动更新服务
 58 |     if Config.ENABLE_TOKEN_AUTO_UPDATE:
 59 |         token_updater = Config.get_token_updater()
 60 |         if token_updater.start():
 61 |             logger.info(f"Token自动更新服务已启动 - 更新间隔: {Config.TOKEN_UPDATE_INTERVAL}秒")
 62 |         else:
 63 |             logger.error("Token自动更新服务启动失败")
 64 |     else:
 65 |         logger.info("Token自动更新服务未启用")
 66 |     
 67 |     yield
 68 |     
 69 |     # 关闭token更新服务
 70 |     if Config.ENABLE_TOKEN_AUTO_UPDATE and Config._token_updater:
 71 |         Config._token_updater.stop()
 72 |         logger.info("Token自动更新服务已停止")
 73 |     
 74 |     logger.info("K2Think API Proxy 关闭中...")
 75 | 
 76 | # 创建FastAPI应用
 77 | app = FastAPI(
 78 |     title="K2Think API Proxy", 
 79 |     description="OpenAI兼容的K2Think API代理服务",
 80 |     version="2.0.0",
 81 |     lifespan=lifespan
 82 | )
 83 | 
 84 | # CORS配置
 85 | app.add_middleware(
 86 |     CORSMiddleware,
 87 |     allow_origins=Config.CORS_ORIGINS,
 88 |     allow_credentials=True,
 89 |     allow_methods=["*"],
 90 |     allow_headers=["*"],
 91 | )
 92 | 
 93 | # 初始化API处理器
 94 | api_handler = APIHandler(Config)
 95 | 
 96 | @app.get("/")
 97 | async def homepage():
 98 |     """首页 - 返回服务状态"""
 99 |     return JSONResponse(content={
100 |         "status": "success",
101 |         "message": "K2Think API Proxy is running",
102 |         "service": "K2Think API Gateway", 
103 |         "model": APIConstants.MODEL_ID,
104 |         "version": "2.1.0",
105 |         "features": [
106 |             "Token轮询和负载均衡",
107 |             "自动失效检测和重试",
108 |             "Token池管理",
109 |             "OpenAI Function Calling 工具调用"
110 |         ],
111 |         "endpoints": {
112 |             "chat": "/v1/chat/completions",
113 |             "models": "/v1/models",
114 |             "health": "/health",
115 |             "admin": {
116 |                 "token_stats": "/admin/tokens/stats",
117 |                 "reset_token": "/admin/tokens/reset/{token_index}",
118 |                 "reset_all": "/admin/tokens/reset-all", 
119 |                 "reload_tokens": "/admin/tokens/reload",
120 |                 "consecutive_failures": "/admin/tokens/consecutive-failures",
121 |                 "reset_consecutive": "/admin/tokens/reset-consecutive",
122 |                 "updater_status": "/admin/tokens/updater/status",
123 |                 "force_update": "/admin/tokens/updater/force-update",
124 |                 "cleanup_temp_files": "/admin/tokens/updater/cleanup-temp"
125 |             }
126 |         }
127 |     })
128 | 
129 | @app.get("/health")
130 | async def health_check():
131 |     """健康检查"""
132 |     token_manager = Config.get_token_manager()
133 |     token_stats = token_manager.get_token_stats()
134 |     
135 |     return JSONResponse(content={
136 |         "status": "healthy",
137 |         "timestamp": int(time.time()),
138 |         "config": {
139 |             "debug_logging": Config.DEBUG_LOGGING,
140 |             "toolify_enabled": Config.ENABLE_TOOLIFY,
141 |             "note": "思考内容输出现在通过模型名控制"
142 |         },
143 |         "tokens": {
144 |             "total": token_stats["total_tokens"],
145 |             "active": token_stats["active_tokens"],
146 |             "inactive": token_stats["inactive_tokens"],
147 |             "consecutive_failures": token_manager.get_consecutive_failures(),
148 |             "auto_update_enabled": Config.ENABLE_TOKEN_AUTO_UPDATE
149 |         }
150 |     })
151 | 
152 | @app.get("/favicon.ico")
153 | async def favicon():
154 |     """返回favicon"""
155 |     return Response(content="", media_type="image/x-icon")
156 | 
157 | @app.get("/v1/models")
158 | async def get_models():
159 |     """获取模型列表"""
160 |     return await api_handler.get_models()
161 | 
162 | @app.post("/v1/chat/completions")
163 | async def chat_completions(request: ChatCompletionRequest, auth_request: Request):
164 |     """处理聊天补全请求"""
165 |     return await api_handler.chat_completions(request, auth_request)
166 | 
167 | @app.get("/admin/tokens/stats")
168 | async def get_token_stats():
169 |     """获取token池统计信息"""
170 |     token_manager = Config.get_token_manager()
171 |     stats = token_manager.get_token_stats()
172 |     # 添加连续失效信息
173 |     stats["consecutive_failures"] = token_manager.get_consecutive_failures()
174 |     stats["consecutive_failure_threshold"] = token_manager.consecutive_failure_threshold
175 |     # 添加上游服务错误信息
176 |     stats["consecutive_upstream_errors"] = token_manager.get_consecutive_upstream_errors()
177 |     stats["upstream_error_threshold"] = token_manager.upstream_error_threshold
178 |     return JSONResponse(content={
179 |         "status": "success",
180 |         "data": stats
181 |     })
182 | 
183 | @app.post("/admin/tokens/reset/{token_index}")
184 | async def reset_token(token_index: int):
185 |     """重置指定索引的token"""
186 |     token_manager = Config.get_token_manager()
187 |     success = token_manager.reset_token(token_index)
188 |     if success:
189 |         return JSONResponse(content={
190 |             "status": "success",
191 |             "message": f"Token {token_index} 已重置"
192 |         })
193 |     else:
194 |         return JSONResponse(
195 |             status_code=400,
196 |             content={
197 |                 "status": "error",
198 |                 "message": f"无效的token索引: {token_index}"
199 |             }
200 |         )
201 | 
202 | @app.post("/admin/tokens/reset-all")
203 | async def reset_all_tokens():
204 |     """重置所有token"""
205 |     token_manager = Config.get_token_manager()
206 |     token_manager.reset_all_tokens()
207 |     return JSONResponse(content={
208 |         "status": "success",
209 |         "message": "所有token已重置"
210 |     })
211 | 
212 | @app.post("/admin/tokens/reload")
213 | async def reload_tokens():
214 |     """重新加载token文件"""
215 |     try:
216 |         Config.reload_tokens()
217 |         token_manager = Config.get_token_manager()
218 |         stats = token_manager.get_token_stats()
219 |         return JSONResponse(content={
220 |             "status": "success",
221 |             "message": "Token文件已重新加载",
222 |             "data": stats
223 |         })
224 |     except Exception as e:
225 |         return JSONResponse(
226 |             status_code=500,
227 |             content={
228 |                 "status": "error",
229 |                 "message": f"重新加载失败: {str(e)}"
230 |             }
231 |         )
232 | 
233 | @app.get("/admin/tokens/consecutive-failures")
234 | async def get_consecutive_failures():
235 |     """获取连续失效信息"""
236 |     token_manager = Config.get_token_manager()
237 |     return JSONResponse(content={
238 |         "status": "success",
239 |         "data": {
240 |             "consecutive_failures": token_manager.get_consecutive_failures(),
241 |             "threshold": token_manager.consecutive_failure_threshold,
242 |             "consecutive_upstream_errors": token_manager.get_consecutive_upstream_errors(),
243 |             "upstream_error_threshold": token_manager.upstream_error_threshold,
244 |             "last_upstream_error_time": token_manager.last_upstream_error_time.isoformat() if token_manager.last_upstream_error_time else None,
245 |             "token_pool_size": len(token_manager.tokens),
246 |             "auto_refresh_enabled": Config.ENABLE_TOKEN_AUTO_UPDATE and len(token_manager.tokens) > 2,
247 |             "last_check": "实时检测"
248 |         }
249 |     })
250 | 
251 | @app.post("/admin/tokens/reset-consecutive")
252 | async def reset_consecutive_failures():
253 |     """重置连续失效计数"""
254 |     token_manager = Config.get_token_manager()
255 |     old_count = token_manager.get_consecutive_failures()
256 |     token_manager.reset_consecutive_failures()
257 |     return JSONResponse(content={
258 |         "status": "success",
259 |         "message": f"连续失效计数已重置: {old_count} -> 0",
260 |         "data": {
261 |             "previous_count": old_count,
262 |             "current_count": 0
263 |         }
264 |     })
265 | 
266 | @app.get("/admin/tokens/updater/status")
267 | async def get_updater_status():
268 |     """获取token更新器状态"""
269 |     if not Config.ENABLE_TOKEN_AUTO_UPDATE:
270 |         return JSONResponse(content={
271 |             "status": "disabled",
272 |             "message": "Token自动更新未启用"
273 |         })
274 |     
275 |     token_updater = Config.get_token_updater()
276 |     status = token_updater.get_status()
277 |     return JSONResponse(content={
278 |         "status": "success",
279 |         "data": status
280 |     })
281 | 
282 | @app.post("/admin/tokens/updater/force-update")
283 | async def force_update_tokens():
284 |     """强制更新tokens"""
285 |     if not Config.ENABLE_TOKEN_AUTO_UPDATE:
286 |         return JSONResponse(
287 |             status_code=400,
288 |             content={
289 |                 "status": "error",
290 |                 "message": "Token自动更新未启用"
291 |             }
292 |         )
293 |     
294 |     token_updater = Config.get_token_updater()
295 |     success = await token_updater.force_update_async()
296 |     
297 |     if success:
298 |         # 更新成功后重新加载token管理器
299 |         Config.reload_tokens()
300 |         token_manager = Config.get_token_manager()
301 |         stats = token_manager.get_token_stats()
302 |         
303 |         return JSONResponse(content={
304 |             "status": "success",
305 |             "message": "Token强制更新成功",
306 |             "data": stats
307 |         })
308 |     else:
309 |         return JSONResponse(
310 |             status_code=500,
311 |             content={
312 |                 "status": "error",
313 |                 "message": "Token强制更新失败"
314 |             }
315 |         )
316 | 
317 | @app.post("/admin/tokens/updater/cleanup-temp")
318 | async def cleanup_temp_files():
319 |     """清理临时文件"""
320 |     if not Config.ENABLE_TOKEN_AUTO_UPDATE:
321 |         return JSONResponse(
322 |             status_code=400,
323 |             content={
324 |                 "status": "error",
325 |                 "message": "Token自动更新未启用"
326 |             }
327 |         )
328 |     
329 |     token_updater = Config.get_token_updater()
330 |     cleaned_count = token_updater.cleanup_all_temp_files()
331 |     
332 |     return JSONResponse(content={
333 |         "status": "success",
334 |         "message": f"临时文件清理完成，共清理 {cleaned_count} 个文件",
335 |         "data": {
336 |             "cleaned_files": cleaned_count
337 |         }
338 |     })
339 | 
340 | @app.exception_handler(K2ThinkProxyError)
341 | async def proxy_exception_handler(request: Request, exc: K2ThinkProxyError):
342 |     """处理自定义代理异常"""
343 |     return JSONResponse(
344 |         status_code=exc.status_code,
345 |         content={
346 |             "error": {
347 |                 "message": exc.message,
348 |                 "type": exc.error_type
349 |             }
350 |         }
351 |     )
352 | 
353 | @app.exception_handler(404)
354 | async def not_found_handler(request: Request, exc):
355 |     """处理404错误"""
356 |     return JSONResponse(
357 |         status_code=404,
358 |         content={"error": "Not Found"}
359 |     )
360 | 
361 | if __name__ == "__main__":
362 |     import uvicorn
363 |     
364 |     # 配置日志级别
365 |     log_level = "debug" if Config.DEBUG_LOGGING else "info"
366 |     
367 |     logger.info(f"启动服务器: {Config.HOST}:{Config.PORT}")
368 |     logger.info("思考内容输出: 通过模型名控制 (MBZUAI-IFM/K2-Think vs MBZUAI-IFM/K2-Think-nothink)")
369 |     
370 |     uvicorn.run(
371 |         app, 
372 |         host=Config.HOST, 
373 |         port=Config.PORT, 
374 |         access_log=Config.ENABLE_ACCESS_LOG,
375 |         log_level=log_level
376 |     )


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # K2Think API Proxy
  2 | 
  3 | 基于 FastAPI 构建的 K2Think AI 模型代理服务，提供 OpenAI 兼容的 API 接口。
  4 | 
  5 | ## 核心功能特性
  6 | 
  7 | - 🧠 **MBZUAI K2-Think 模型**: 支持 MBZUAI 开发的 K2-Think 推理模型
  8 | - 🔄 **OpenAI 兼容**: 完全兼容 OpenAI API 格式，无缝对接现有应用
  9 | - ⚡ **流式响应**: 支持实时流式聊天响应，支持控制thinking输出
 10 | - 🛠️ **工具调用**: 支持 OpenAI Function Calling，可集成外部工具和API
 11 | - 📊 **文件上传**: 支持文件、图像上传
 12 | 
 13 | ## 智能Token管理系统
 14 | 
 15 | ### 🔄 Token轮询与负载均衡
 16 | 
 17 | - 多token轮流使用，自动故障转移
 18 | - 支持大规模token池（支持数百个token）
 19 | 
 20 | ### 🛡️ 智能失效检测与自愈
 21 | 
 22 | - **自动失效检测**: 三次失败后自动禁用失效token
 23 | - **连续失效自动刷新**: 当连续两个token失效时，自动触发强制刷新（仅在token池数量>2时生效）
 24 | - **智能重试机制**: 失效token会被跳过，确保服务连续性
 25 | 
 26 | ### 📈 Token池管理
 27 | 
 28 | - 完整的管理API查看状态、重置token等
 29 | - 实时监控token使用情况和失效统计
 30 | - 支持手动重置和重新加载
 31 | 
 32 | ### 🔄 Token自动更新
 33 | 
 34 | - 定期从账户文件自动生成新的token池
 35 | - **原子性更新**: 零停机时间，更新过程中服务保持可用
 36 | - **智能触发**: 支持定时更新和连续失效触发的强制更新
 37 | 
 38 | ### 🌐 网络适应性
 39 | 
 40 | - 支持HTTP/HTTPS代理配置，适应不同网络环境
 41 | - 🚀 **高性能**: 异步处理架构，支持高并发请求
 42 | - 🐳 **容器化**: 支持 Docker 部署
 43 | 
 44 | ## 快速开始
 45 | 
 46 | ### 本地运行
 47 | 
 48 | 1. **安装依赖**
 49 | 
 50 | ```bash
 51 | pip install -r requirements.txt
 52 | ```
 53 | 
 54 | 2. **配置环境变量**
 55 | 
 56 | ```bash
 57 | cp .env.example .env
 58 | # 编辑 .env 文件，配置你的API密钥和其他选项
 59 | ```
 60 | 
 61 | 3. **准备Token文件**
 62 | 
 63 | 有两种方式管理Token：
 64 | 
 65 | **方式一：手动管理（传统方式）**
 66 | 
 67 | ```bash
 68 | # 复制token示例文件并编辑
 69 | cd data
 70 | cp tokens.example.txt tokens.txt
 71 | # 编辑tokens.txt文件，添加你的实际K2Think tokens
 72 | ```
 73 | 
 74 | **方式二：自动更新（推荐）**
 75 | 
 76 | ```bash
 77 | # 准备账户文件
 78 | echo '{"email": "your-email@example.com", "k2_password": "your-password"}' > accounts.txt
 79 | # 可以添加多个账户，每行一个JSON对象
 80 | ```
 81 | 
 82 | 4. **启动服务**
 83 | 
 84 | ```bash
 85 | python k2think_proxy.py
 86 | ```
 87 | 
 88 | 服务将在 `http://localhost:8001` 启动。
 89 | 
 90 | ### Docker 部署
 91 | 
 92 | #### 使用 docker-compose（推荐）
 93 | 
 94 | ```bash
 95 | # 准备配置文件
 96 | cp .env.example .env
 97 | cd data
 98 | cp accounts.example.txt accounts.txt
 99 | 
100 | # 编辑配置
101 | # 编辑 .env 文件配置API密钥等
102 | # 编辑 accounts.txt 添加K2Think账户信息，格式：{"email": "xxx@yyy.zzz", "k2_password": "xxx"}，一行一个
103 | 
104 | # 启动服务
105 | docker-compose up -d
106 | 
107 | # 检查服务状态
108 | docker-compose logs -f k2think-api
109 | ```
110 | 
111 | #### 手动构建部署
112 | 
113 | ```bash
114 | # 构建镜像
115 | docker build -t k2think-api .
116 | 
117 | # 运行容器
118 | docker run -d \
119 |   --name k2think-api \
120 |   -p 8001:8001 \
121 |   -v $(pwd)/tokens.txt:/app/tokens.txt \
122 |   -v $(pwd)/accounts.txt:/app/accounts.txt:ro \
123 |   -v $(pwd)/.env:/app/.env:ro \
124 |   k2think-api
125 | ```
126 | 
127 | ## API 接口
128 | 
129 | ### 聊天补全
130 | 
131 | **POST** `/v1/chat/completions`
132 | 
133 | ```bash
134 | curl -X POST http://localhost:8001/v1/chat/completions \
135 |   -H "Content-Type: application/json" \
136 |   -H "Authorization: Bearer sk-k2think" \
137 |   -d '{
138 |     "model": "MBZUAI-IFM/K2-Think",
139 |     "messages": [
140 |       {"role": "user", "content": "你擅长什么？"}
141 |     ],
142 |     "stream": false
143 |   }'
144 | ```
145 | 
146 | ### 模型列表
147 | 
148 | **GET** `/v1/models`
149 | 
150 | ```bash
151 | curl http://localhost:8001/v1/models \
152 |   -H "Authorization: Bearer sk-k2think"
153 | ```
154 | 
155 | ### Token管理接口
156 | 
157 | 查看token池状态：
158 | 
159 | ```bash
160 | curl http://localhost:8001/admin/tokens/stats
161 | ```
162 | 
163 | 查看连续失效状态：
164 | 
165 | ```bash
166 | curl http://localhost:8001/admin/tokens/consecutive-failures
167 | ```
168 | 
169 | 重置连续失效计数：
170 | 
171 | ```bash
172 | curl -X POST http://localhost:8001/admin/tokens/reset-consecutive
173 | ```
174 | 
175 | 重置指定token：
176 | 
177 | ```bash
178 | curl -X POST http://localhost:8001/admin/tokens/reset/0
179 | ```
180 | 
181 | 重置所有token：
182 | 
183 | ```bash
184 | curl -X POST http://localhost:8001/admin/tokens/reset-all
185 | ```
186 | 
187 | 重新加载token文件：
188 | 
189 | ```bash
190 | curl -X POST http://localhost:8001/admin/tokens/reload
191 | ```
192 | 
193 | 查看token更新器状态（仅在启用自动更新时可用）：
194 | 
195 | ```bash
196 | curl http://localhost:8001/admin/tokens/updater/status
197 | ```
198 | 
199 | 强制更新tokens（仅在启用自动更新时可用）：
200 | 
201 | ```bash
202 | curl -X POST http://localhost:8001/admin/tokens/updater/force-update
203 | ```
204 | 
205 | ### 健康检查
206 | 
207 | ```bash
208 | curl http://localhost:8001/health
209 | ```
210 | 
211 | ## 环境变量配置
212 | 
213 | ### 基础配置
214 | 
215 | | 变量名              | 默认值                                      | 说明                 |
216 | | ------------------- | ------------------------------------------- | -------------------- |
217 | | `VALID_API_KEY`   | 无默认值                                    | API 访问密钥（必需） |
218 | | `K2THINK_API_URL` | https://www.k2think.ai/api/chat/completions | K2Think API端点      |
219 | 
220 | ### Token管理配置
221 | 
222 | | 变量名                 | 默认值         | 说明              |
223 | | ---------------------- | -------------- | ----------------- |
224 | | `TOKENS_FILE`        | `tokens.txt` | Token文件路径     |
225 | | `MAX_TOKEN_FAILURES` | `3`          | Token最大失败次数 |
226 | 
227 | ### Token自动更新配置
228 | 
229 | | 变量名                       | 默认值            | 说明                                    |
230 | | ---------------------------- | ----------------- | --------------------------------------- |
231 | | `ENABLE_TOKEN_AUTO_UPDATE` | `false`         | 是否启用token自动更新                   |
232 | | `TOKEN_UPDATE_INTERVAL`    | `86400`         | token更新间隔（秒），默认24小时         |
233 | | `ACCOUNTS_FILE`            | `accounts.txt`  | 账户文件路径                            |
234 | | `GET_TOKENS_SCRIPT`        | `get_tokens.py` | token获取脚本路径                       |
235 | | `PROXY_URL`                | 空                | HTTP/HTTPS代理地址（用于get_tokens.py） |
236 | 
237 | ### 服务器配置
238 | 
239 | | 变量名   | 默认值      | 说明         |
240 | | -------- | ----------- | ------------ |
241 | | `HOST` | `0.0.0.0` | 服务监听地址 |
242 | | `PORT` | `8001`    | 服务端口     |
243 | 
244 | ### 工具调用配置
245 | 
246 | | 变量名                    | 默认值   | 说明                             |
247 | | ------------------------- | -------- | -------------------------------- |
248 | | `ENABLE_TOOLIFY`        | `true` | 是否启用工具调用功能             |
249 | | `TOOLIFY_CUSTOM_PROMPT` | `""`   | 自定义工具调用提示词模板（可选） |
250 | 
251 | 详细配置说明请参考 `.env.example` 文件。
252 | 
253 | ## 智能Token管理系统详解
254 | 
255 | ### 连续失效自动刷新机制
256 | 
257 | 这是系统的核心自愈功能，当检测到连续的token失效时，自动触发强制刷新：
258 | 
259 | #### 工作原理
260 | 
261 | 1. **连续失效检测**
262 | 
263 |    - 系统跟踪连续失效的token数量
264 |    - 当连续两个token失效时触发自动刷新
265 |    - 仅在token池数量大于2时启用（避免小规模token池误触发）
266 | 2. **智能触发条件**
267 | 
268 |    - 连续失效阈值：2个token
269 |    - 最小token池大小：3个token
270 |    - 自动更新必须启用：`ENABLE_TOKEN_AUTO_UPDATE=true`
271 | 3. **自动刷新过程**
272 | 
273 |    - 异步执行，不阻塞当前API请求
274 |    - 使用原子性更新机制
275 |    - 刷新成功后自动重新加载token池
276 |    - 重置连续失效计数器
277 | 
278 | #### 监控和管理
279 | 
280 | ```bash
281 | # 查看连续失效状态
282 | curl http://localhost:8001/admin/tokens/consecutive-failures
283 | 
284 | # 响应示例
285 | {
286 |   "status": "success",
287 |   "data": {
288 |     "consecutive_failures": 1,
289 |     "threshold": 2,
290 |     "token_pool_size": 710,
291 |     "auto_refresh_enabled": true,
292 |     "last_check": "实时检测"
293 |   }
294 | }
295 | 
296 | # 手动重置连续失效计数
297 | curl -X POST http://localhost:8001/admin/tokens/reset-consecutive
298 | ```
299 | 
300 | ### Token自动更新机制
301 | 
302 | #### 功能说明
303 | 
304 | Token自动更新机制允许系统定期从账户文件自动生成新的token池，无需手动维护tokens.txt文件。
305 | 
306 | #### 配置步骤
307 | 
308 | 1. **准备账户文件**
309 | 
310 | 创建 `accounts.txt` 文件，每行一个JSON格式的账户信息：
311 | 
312 | ```json
313 | {"email": "user1@example.com", "k2_password": "password1"}
314 | {"email": "user2@example.com", "k2_password": "password2"}
315 | {"email": "user3@example.com", "k2_password": "password3"}
316 | ```
317 | 
318 | 2. **启用自动更新**
319 | 
320 | 在 `.env` 文件中配置：
321 | 
322 | ```bash
323 | # 启用token自动更新
324 | ENABLE_TOKEN_AUTO_UPDATE=true
325 | 
326 | # 设置更新间隔（秒）
327 | TOKEN_UPDATE_INTERVAL=86400  # 每24小时更新一次
328 | 
329 | # 配置文件路径
330 | ACCOUNTS_FILE=accounts.txt
331 | TOKENS_FILE=tokens.txt
332 | GET_TOKENS_SCRIPT=get_tokens.py
333 | 
334 | # 可选：配置代理（如果需要）
335 | PROXY_URL=http://username:password@proxy_host:proxy_port
336 | ```
337 | 
338 | 3. **更新触发方式**
339 | 
340 | 系统支持多种更新触发方式：
341 | 
342 | - **定时更新**: 按照设置的间隔定期更新
343 | - **连续失效触发**: 当连续两个token失效时自动触发
344 | - **手动强制更新**: 通过API手动触发更新
345 | - **启动时更新**: 如果token文件为空或无效，启动时立即更新
346 | 
347 | #### 原子性更新机制
348 | 
349 | 为了确保token更新过程中服务的连续性，系统采用了原子性更新机制：
350 | 
351 | 1. **临时文件生成**: 新token首先写入 `tokens.txt.tmp` 临时文件
352 | 2. **验证检查**: 确认临时文件存在且不为空
353 | 3. **备份当前文件**: 将现有 `tokens.txt` 重命名为 `tokens.txt.backup`
354 | 4. **原子性替换**: 将临时文件重命名为 `tokens.txt`
355 | 5. **重新加载**: 通知token管理器重新加载新的token池
356 | 
357 | #### 更新状态监控
358 | 
359 | 通过管理接口可以实时监控更新状态：
360 | 
361 | ```bash
362 | # 查看详细更新状态
363 | curl http://localhost:8001/admin/tokens/updater/status
364 | 
365 | # 响应示例
366 | {
367 |   "status": "success",
368 |   "data": {
369 |     "is_running": true,
370 |     "is_updating": false,
371 |     "update_interval": 86400,
372 |     "last_update": "2024-01-01T12:00:00",
373 |     "update_count": 5,
374 |     "error_count": 0,
375 |     "last_error": null,
376 |     "next_update": "2024-01-01T13:00:00",
377 |     "files": {
378 |       "get_tokens_script": true,
379 |       "accounts_file": true,
380 |       "tokens_file": true
381 |     }
382 |   }
383 | }
384 | ```
385 | 
386 | #### 服务保障特性
387 | 
388 | - ✅ **零停机时间**: 更新过程中API服务保持可用
389 | - ✅ **请求不中断**: 正在处理的请求不会受到影响
390 | - ✅ **自动恢复**: 连续失效时自动触发刷新
391 | - ✅ **回滚机制**: 更新失败时保留原有token文件
392 | - ✅ **状态透明**: 可实时查看更新进度和状态
393 | - ✅ **错误处理**: 更新失败时记录详细错误信息
394 | 
395 | ## 工具调用功能
396 | 
397 | K2Think API 代理支持 OpenAI Function Calling 规范的工具调用功能。
398 | 
399 | ### 功能特性
400 | 
401 | - ✅ 支持 OpenAI 标准的 `tools` 和 `tool_choice` 参数
402 | - ✅ 自动工具提示注入和消息处理
403 | - ✅ 流式和非流式响应中的工具调用检测
404 | - ✅ 智能 JSON 解析和工具调用提取
405 | - ✅ 支持多种工具调用格式（JSON 代码块、内联 JSON、自然语言）
406 | 
407 | ### 使用示例
408 | 
409 | ```python
410 | import openai
411 | 
412 | client = openai.OpenAI(
413 |     base_url="http://localhost:8001/v1",
414 |     api_key="sk-k2think"
415 | )
416 | 
417 | # 定义工具
418 | tools = [
419 |     {
420 |         "type": "function",
421 |         "function": {
422 |             "name": "get_weather",
423 |             "description": "获取指定城市的天气信息",
424 |             "parameters": {
425 |                 "type": "object",
426 |                 "properties": {
427 |                     "city": {
428 |                         "type": "string",
429 |                         "description": "城市名称，例如：北京、上海"
430 |                     },
431 |                     "unit": {
432 |                         "type": "string",
433 |                         "enum": ["celsius", "fahrenheit"],
434 |                         "description": "温度单位"
435 |                     }
436 |                 },
437 |                 "required": ["city"]
438 |             }
439 |         }
440 |     }
441 | ]
442 | 
443 | # 发送工具调用请求
444 | response = client.chat.completions.create(
445 |     model="MBZUAI-IFM/K2-Think",
446 |     messages=[
447 |         {"role": "user", "content": "北京今天天气怎么样？"}
448 |     ],
449 |     tools=tools,
450 |     tool_choice="auto"  # auto, none, required 或指定特定工具
451 | )
452 | 
453 | # 处理响应
454 | if response.choices[0].message.tool_calls:
455 |     for tool_call in response.choices[0].message.tool_calls:
456 |         function_name = tool_call.function.name
457 |         function_args = tool_call.function.arguments
458 |         print(f"调用工具: {function_name}")
459 |         print(f"参数: {function_args}")
460 | ```
461 | 
462 | ### tool_choice 参数说明
463 | 
464 | - `"auto"`: 让模型自动决定是否使用工具（推荐）
465 | - `"none"`: 禁用工具调用
466 | - `"required"`: 强制模型使用工具
467 | - `{"type": "function", "function": {"name": "tool_name"}}`: 强制使用特定工具
468 | 
469 | ## Python SDK 使用示例
470 | 
471 | ```python
472 | import openai
473 | 
474 | # 配置客户端
475 | client = openai.OpenAI(
476 |     base_url="http://localhost:8001/v1",
477 |     api_key="sk-k2think"
478 | )
479 | 
480 | # 发送聊天请求
481 | response = client.chat.completions.create(
482 |     model="MBZUAI-IFM/K2-Think",
483 |     messages=[
484 |         {"role": "user", "content": "解释一下量子计算的基本原理"}
485 |     ],
486 |     stream=False
487 | )
488 | 
489 | print(response.choices[0].message.content)
490 | 
491 | # 流式聊天
492 | stream = client.chat.completions.create(
493 |     model="MBZUAI-IFM/K2-Think",
494 |     messages=[
495 |         {"role": "user", "content": "写一首关于人工智能的诗"}
496 |     ],
497 |     stream=True
498 | )
499 | 
500 | for chunk in stream:
501 |     if chunk.choices[0].delta.content is not None:
502 |         print(chunk.choices[0].delta.content, end="")
503 | ```
504 | 
505 | ## 模型特性
506 | 
507 | K2-Think 模型具有以下特点：
508 | 
509 | - **推理能力**: 模型会先进行思考过程，然后给出答案
510 | - **响应格式**: 使用 `<think></think>` 和 `<answer></answer>` 标签结构化输出
511 | - **思考内容控制**:
512 |   - `MBZUAI-IFM/K2-Think`: 包含完整的思考过程
513 |   - `MBZUAI-IFM/K2-Think-nothink`: 仅输出最终答案
514 | - **多语言支持**: 支持中文、英文等多种语言
515 | - **专业领域**: 在数学、科学、编程等领域表现优秀
516 | 
517 | ## 完整配置示例
518 | 
519 | ### .env 文件示例
520 | 
521 | ```bash
522 | # 基础配置
523 | VALID_API_KEY=sk-k2think
524 | HOST=0.0.0.0
525 | PORT=8001
526 | 
527 | # Token管理
528 | TOKENS_FILE=tokens.txt
529 | MAX_TOKEN_FAILURES=3
530 | 
531 | # Token自动更新（推荐）
532 | ENABLE_TOKEN_AUTO_UPDATE=true
533 | TOKEN_UPDATE_INTERVAL=86400 # 24小时
534 | ACCOUNTS_FILE=accounts.txt
535 | GET_TOKENS_SCRIPT=get_tokens.py
536 | 
537 | # 代理配置（可选）
538 | PROXY_URL=http://username:password@proxy.example.com:8080
539 | 
540 | # 功能开关
541 | ENABLE_TOOLIFY=true
542 | DEBUG_LOGGING=false
543 | 
544 | # 工具调用配置（可选）
545 | # TOOLIFY_CUSTOM_PROMPT="自定义提示词模板"
546 | ```
547 | 
548 | ### accounts.txt 文件示例
549 | 
550 | ```json
551 | {"email": "user1@example.com", "k2_password": "password1"}
552 | {"email": "user2@example.com", "k2_password": "password2"}
553 | ```
554 | 
555 | ## 故障排除
556 | 
557 | ### 常见问题
558 | 
559 | 1. **Token 相关问题**
560 | 
561 |    - **所有token失效**: 访问 `/admin/tokens/stats` 查看token状态，使用 `/admin/tokens/reset-all` 重置所有token
562 |    - **连续失效**: 查看 `/admin/tokens/consecutive-failures` 了解连续失效状态，系统会自动触发刷新
563 |    - **添加新token**:
564 |      - 手动模式：编辑 `tokens.txt` 文件添加新token，然后访问 `/admin/tokens/reload` 重新加载
565 |      - 自动模式：编辑 `accounts.txt` 添加新账户，然后访问 `/admin/tokens/updater/force-update` 强制更新
566 |    - **查看token状态**: 访问 `/health` 端点查看简要统计，或 `/admin/tokens/stats` 查看详细信息
567 |    - **自动更新问题**:
568 |      - 访问 `/admin/tokens/updater/status` 查看更新器状态和错误信息
569 |      - 检查 `is_updating` 字段确认是否正在更新中
570 |      - 查看 `last_error` 字段了解最近的错误信息
571 | 2. **端口冲突**
572 | 
573 |    - 修改 `PORT` 环境变量
574 |    - 或使用 Docker 端口映射
575 | 
576 | ### 日志查看
577 | 
578 | ```bash
579 | # Docker 容器日志
580 | docker logs k2think-api
581 | 
582 | # docker-compose日志
583 | docker-compose logs -f k2think-api
584 | 
585 | # 本地运行日志
586 | # 日志会直接输出到控制台
587 | ```
588 | 
589 | ### 配置检查
590 | 
591 | 使用配置检查脚本验证你的环境变量设置：
592 | 
593 | ```bash
594 | # 检查当前配置
595 | python check_config_simple.py
596 | 
597 | # 查看配置示例
598 | python check_config_simple.py --example
599 | ```
600 | 
601 | ### Docker部署注意事项
602 | 
603 | 1. **文件映射**
604 | 
605 |    - `tokens.txt` 通过volume映射到容器内，支持动态更新
606 |    - 如果启用自动更新，`tokens.txt` 不能设置为只读（`:ro`）
607 |    - `accounts.txt` 映射为只读，包含账户信息用于自动更新
608 |    - `.env` 文件包含所有环境变量配置
609 | 2. **健康检查**
610 | 
611 |    - Docker容器包含健康检查机制
612 |    - 可通过 `docker ps` 查看健康状态
613 | 3. **安全考虑**
614 | 
615 |    - 容器以非root用户运行
616 |    - 敏感文件通过volume挂载而非打包到镜像中
617 | 
618 | ## 许可证
619 | 
620 | MIT License
621 | 
622 | ## 贡献
623 | 
624 | 欢迎提交 Issue 和 Pull Request！
625 | 


--------------------------------------------------------------------------------
/src/token_manager.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Token管理模块
  3 | 负责管理K2Think的token池，实现轮询、负载均衡和失效标记
  4 | """
  5 | import os
  6 | import json
  7 | import logging
  8 | import threading
  9 | 
 10 | from typing import List, Dict, Optional, Tuple
 11 | from datetime import datetime, timedelta
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | # 导入安全日志函数
 16 | try:
 17 |     from src.utils import safe_log_error, safe_log_info, safe_log_warning
 18 | except ImportError:
 19 |     # 如果导入失败，提供简单的替代函数
 20 |     def safe_log_error(logger, msg, exc=None):
 21 |         try:
 22 |             if exc:
 23 |                 logger.error(f"{msg}: {str(exc)}")
 24 |             else:
 25 |                 logger.error(msg)
 26 |         except:
 27 |             print(f"Log error: {msg}")
 28 |     
 29 |     def safe_log_info(logger, msg):
 30 |         try:
 31 |             logger.info(msg)
 32 |         except:
 33 |             print(f"Log info: {msg}")
 34 |     
 35 |     def safe_log_warning(logger, msg):
 36 |         try:
 37 |             logger.warning(msg)
 38 |         except:
 39 |             print(f"Log warning: {msg}")
 40 | 
 41 | class TokenManager:
 42 |     """Token管理器 - 支持轮询、负载均衡和失效标记"""
 43 |     
 44 |     def __init__(self, tokens_file: str = "tokens.txt", max_failures: int = 3, allow_empty: bool = False):
 45 |         """
 46 |         初始化token管理器
 47 |         
 48 |         Args:
 49 |             tokens_file: token文件路径
 50 |             max_failures: 最大失败次数，超过后标记为失效
 51 |             allow_empty: 是否允许空的token文件（用于自动更新模式）
 52 |         """
 53 |         self.tokens_file = tokens_file
 54 |         self.max_failures = max_failures
 55 |         self.tokens: List[Dict] = []
 56 |         self.current_index = 0
 57 |         self.lock = threading.Lock()
 58 |         self.allow_empty = allow_empty
 59 |         
 60 |         # 连续失效检测
 61 |         self.consecutive_failures = 0
 62 |         self.consecutive_failure_threshold = 2  # 连续失效阈值
 63 |         self.force_refresh_callback = None  # 强制刷新回调函数
 64 |         
 65 |         # 上游服务连续报错检测
 66 |         self.consecutive_upstream_errors = 0
 67 |         self.upstream_error_threshold = 2  # 上游服务连续报错阈值
 68 |         self.last_upstream_error_time = None
 69 |         
 70 |         # 加载tokens
 71 |         self.load_tokens()
 72 |         
 73 |         if not self.tokens and not allow_empty:
 74 |             raise ValueError(f"未找到有效的token，请检查文件: {tokens_file}")
 75 |     
 76 |     def load_tokens(self) -> None:
 77 |         """从文件加载token列表"""
 78 |         try:
 79 |             if not os.path.exists(self.tokens_file):
 80 |                 raise FileNotFoundError(f"Token文件不存在: {self.tokens_file}")
 81 |             
 82 |             with open(self.tokens_file, 'r', encoding='utf-8') as f:
 83 |                 lines = f.readlines()
 84 |             
 85 |             self.tokens = []
 86 |             valid_token_index = 0
 87 |             for line in lines:
 88 |                 token = line.strip()
 89 |                 # 忽略空行和注释行
 90 |                 if token and not token.startswith('#'):
 91 |                     self.tokens.append({
 92 |                         'token': token,
 93 |                         'failures': 0,
 94 |                         'is_active': True,
 95 |                         'last_used': None,
 96 |                         'last_failure': None,
 97 |                         'index': valid_token_index
 98 |                     })
 99 |                     valid_token_index += 1
100 |             
101 |             safe_log_info(logger, f"成功加载 {len(self.tokens)} 个token")
102 |             
103 |         except Exception as e:
104 |             safe_log_error(logger, "加载token文件失败", e)
105 |             raise
106 |     
107 | 
108 |     def get_next_token(self) -> Optional[str]:
109 |         """
110 |         获取下一个可用的token（轮询算法）
111 |         
112 |         Returns:
113 |             可用的token字符串，如果没有可用token则返回None
114 |         """
115 |         with self.lock:
116 |             active_tokens = [t for t in self.tokens if t['is_active']]
117 |             
118 |             if not active_tokens:
119 |                 if self.allow_empty:
120 |                     safe_log_warning(logger, "没有可用的token，可能正在等待自动更新")
121 |                 else:
122 |                     safe_log_warning(logger, "没有可用的token")
123 |                 return None
124 |             
125 |             # 轮询算法：从当前索引开始寻找下一个可用token
126 |             attempts = 0
127 |             while attempts < len(self.tokens):
128 |                 token_info = self.tokens[self.current_index]
129 |                 
130 |                 if token_info['is_active']:
131 |                     # 更新使用时间
132 |                     token_info['last_used'] = datetime.now()
133 |                     token = token_info['token']
134 |                     
135 |                     # 移动到下一个索引
136 |                     self.current_index = (self.current_index + 1) % len(self.tokens)
137 |                     
138 |                     logger.debug(f"分配token (索引: {token_info['index']}, 失败次数: {token_info['failures']})")
139 |                     return token
140 |                 
141 |                 # 移动到下一个token
142 |                 self.current_index = (self.current_index + 1) % len(self.tokens)
143 |                 attempts += 1
144 |             
145 |             safe_log_warning(logger, "所有token都已失效")
146 |             return None
147 |     
148 |     def mark_token_failure(self, token: str, error_message: str = "") -> bool:
149 |         """
150 |         标记token使用失败
151 |         
152 |         Args:
153 |             token: 失败的token
154 |             error_message: 错误信息
155 |             
156 |         Returns:
157 |             如果token被标记为失效返回True，否则返回False
158 |         """
159 |         with self.lock:
160 |             for token_info in self.tokens:
161 |                 if token_info['token'] == token:
162 |                     token_info['failures'] += 1
163 |                     token_info['last_failure'] = datetime.now()
164 |                     
165 |                     # 检查是否是上游服务错误（401等认证错误）
166 |                     is_upstream_error = self._is_upstream_error(error_message)
167 |                     
168 |                     if is_upstream_error:
169 |                         # 增加上游服务连续报错计数
170 |                         self.consecutive_upstream_errors += 1
171 |                         self.last_upstream_error_time = datetime.now()
172 |                         
173 |                         safe_log_warning(logger, f"🔒 上游服务认证错误 (索引: {token_info['index']}, "
174 |                                      f"失败次数: {token_info['failures']}/{self.max_failures}, "
175 |                                      f"连续上游错误: {self.consecutive_upstream_errors}): {error_message}")
176 |                         
177 |                         # 401错误立即触发强制刷新（不等连续错误阈值）
178 |                         if "401" in error_message and self.force_refresh_callback:
179 |                             safe_log_warning(logger, f"🚨 检测到401认证错误，立即触发token强制刷新")
180 |                             self._trigger_force_refresh("401认证失败")
181 |                             # 重置连续计数，避免重复触发
182 |                             self.consecutive_upstream_errors = 0
183 |                         else:
184 |                             # 其他上游错误按原逻辑处理
185 |                             self._check_consecutive_upstream_errors()
186 |                     else:
187 |                         # 增加连续失效计数
188 |                         self.consecutive_failures += 1
189 |                         
190 |                         safe_log_warning(logger, f"Token失败 (索引: {token_info['index']}, "
191 |                                      f"失败次数: {token_info['failures']}/{self.max_failures}, "
192 |                                      f"连续失效: {self.consecutive_failures}): {error_message}")
193 |                         
194 |                         # 检查连续失效触发条件
195 |                         self._check_consecutive_failures()
196 |                     
197 |                     # 检查是否达到最大失败次数
198 |                     if token_info['failures'] >= self.max_failures:
199 |                         token_info['is_active'] = False
200 |                         safe_log_error(logger, f"Token已失效 (索引: {token_info['index']}, "
201 |                                    f"失败次数: {token_info['failures']})")
202 |                         return True
203 |                     
204 |                     return False
205 |             
206 |             safe_log_warning(logger, "未找到匹配的token进行失败标记")
207 |             return False
208 |     
209 |     def mark_token_success(self, token: str) -> None:
210 |         """
211 |         标记token使用成功（重置失败计数）
212 |         
213 |         Args:
214 |             token: 成功的token
215 |         """
216 |         with self.lock:
217 |             for token_info in self.tokens:
218 |                 if token_info['token'] == token:
219 |                     if token_info['failures'] > 0:
220 |                         safe_log_info(logger, f"Token恢复 (索引: {token_info['index']}, "
221 |                                   f"重置失败次数: {token_info['failures']} -> 0)")
222 |                         token_info['failures'] = 0
223 |                     
224 |                     # 成功请求重置上游服务错误计数
225 |                     if self.consecutive_upstream_errors > 0:
226 |                         safe_log_info(logger, f"重置上游服务连续错误计数: {self.consecutive_upstream_errors} -> 0")
227 |                         self.consecutive_upstream_errors = 0
228 |                     
229 |                     # 注意：不再自动重置连续失效计数，只有手动重置或强制刷新成功后才重置
230 |                     return
231 |     
232 |     def get_token_stats(self) -> Dict:
233 |         """
234 |         获取token池统计信息
235 |         
236 |         Returns:
237 |             包含统计信息的字典
238 |         """
239 |         with self.lock:
240 |             total = len(self.tokens)
241 |             active = sum(1 for t in self.tokens if t['is_active'])
242 |             inactive = total - active
243 |             
244 |             failure_distribution = {}
245 |             for token_info in self.tokens:
246 |                 failures = token_info['failures']
247 |                 failure_distribution[failures] = failure_distribution.get(failures, 0) + 1
248 |             
249 |             return {
250 |                 'total_tokens': total,
251 |                 'active_tokens': active,
252 |                 'inactive_tokens': inactive,
253 |                 'current_index': self.current_index,
254 |                 'failure_distribution': failure_distribution,
255 |                 'max_failures': self.max_failures
256 |             }
257 |     
258 |     def reset_token(self, token_index: int) -> bool:
259 |         """
260 |         重置指定索引的token（清除失败计数，重新激活）
261 |         
262 |         Args:
263 |             token_index: token索引
264 |             
265 |         Returns:
266 |             重置成功返回True，否则返回False
267 |         """
268 |         with self.lock:
269 |             if 0 <= token_index < len(self.tokens):
270 |                 token_info = self.tokens[token_index]
271 |                 old_failures = token_info['failures']
272 |                 old_active = token_info['is_active']
273 |                 
274 |                 token_info['failures'] = 0
275 |                 token_info['is_active'] = True
276 |                 token_info['last_failure'] = None
277 |                 
278 |                 safe_log_info(logger, f"Token重置 (索引: {token_index}, "
279 |                            f"失败次数: {old_failures} -> 0, "
280 |                            f"状态: {old_active} -> True)")
281 |                 return True
282 |             
283 |             safe_log_warning(logger, f"无效的token索引: {token_index}")
284 |             return False
285 |     
286 |     def reset_all_tokens(self) -> None:
287 |         """重置所有token（清除所有失败计数，重新激活所有token）"""
288 |         with self.lock:
289 |             reset_count = 0
290 |             for token_info in self.tokens:
291 |                 if token_info['failures'] > 0 or not token_info['is_active']:
292 |                     token_info['failures'] = 0
293 |                     token_info['is_active'] = True
294 |                     token_info['last_failure'] = None
295 |                     reset_count += 1
296 |             
297 |             safe_log_info(logger, f"重置了 {reset_count} 个token，当前活跃token数: {len(self.tokens)}")
298 |     
299 |     def reload_tokens(self) -> None:
300 |         """重新加载token文件"""
301 |         safe_log_info(logger, "重新加载token文件...")
302 |         old_count = len(self.tokens)
303 |         self.load_tokens()
304 |         new_count = len(self.tokens)
305 |         
306 |         safe_log_info(logger, f"Token重新加载完成: {old_count} -> {new_count}")
307 |     
308 |     def get_token_by_index(self, index: int) -> Optional[Dict]:
309 |         """根据索引获取token信息"""
310 |         with self.lock:
311 |             if 0 <= index < len(self.tokens):
312 |                 return self.tokens[index].copy()
313 |             return None
314 |     
315 |     def set_force_refresh_callback(self, callback):
316 |         """
317 |         设置强制刷新回调函数
318 |         
319 |         Args:
320 |             callback: 当需要强制刷新时调用的异步函数
321 |         """
322 |         self.force_refresh_callback = callback
323 |         safe_log_info(logger, "已设置强制刷新回调函数")
324 |     
325 |     def _is_upstream_error(self, error_message: str) -> bool:
326 |         """
327 |         判断是否为上游服务错误
328 |         
329 |         Args:
330 |             error_message: 错误信息
331 |             
332 |         Returns:
333 |             如果是上游服务错误返回True，否则返回False
334 |         """
335 |         # 检查常见的上游服务错误标识
336 |         upstream_error_indicators = [
337 |             "上游服务错误: 401",
338 |             "上游服务错误: 403", 
339 |             "401",
340 |             "403",
341 |             "unauthorized", 
342 |             "forbidden",
343 |             "invalid token",
344 |             "authentication failed",
345 |             "token expired",
346 |             "authentication error",
347 |             "invalid_request_error",
348 |             "authentication_error"
349 |         ]
350 |         
351 |         error_lower = error_message.lower()
352 |         is_upstream = any(indicator.lower() in error_lower for indicator in upstream_error_indicators)
353 |         
354 |         # 特别检查HTTP状态码模式
355 |         import re
356 |         # 匹配 "上游服务错误: xxx" 或 "HTTP状态错误: xxx" 等格式中的401/403
357 |         status_code_pattern = r'(?:上游服务错误|http状态错误|状态码):\s*(?:40[13])'
358 |         if re.search(status_code_pattern, error_lower):
359 |             is_upstream = True
360 |         
361 |         if is_upstream:
362 |             safe_log_info(logger, f"检测到上游服务认证错误: {error_message}")
363 |         
364 |         return is_upstream
365 |     
366 |     def _check_consecutive_upstream_errors(self):
367 |         """
368 |         检查上游服务连续报错情况，触发强制刷新机制
369 |         """
370 |         if self.consecutive_upstream_errors >= self.upstream_error_threshold:
371 |             safe_log_warning(logger, f"🚨 检测到连续{self.consecutive_upstream_errors}个上游服务认证错误（401/403），触发自动刷新token池")
372 |             
373 |             # 重置上游错误计数，避免重复触发
374 |             self.consecutive_upstream_errors = 0
375 |             
376 |             if self.force_refresh_callback:
377 |                 self._trigger_force_refresh("上游服务连续认证失败 (401/403)")
378 |             else:
379 |                 safe_log_warning(logger, "⚠️ 未设置强制刷新回调函数，无法自动刷新token池")
380 |     
381 |     def _check_consecutive_failures(self):
382 |         """
383 |         检查连续失效情况，触发强制刷新机制
384 |         """
385 |         # 只有在token池数量大于2时才检查连续失效
386 |         if len(self.tokens) <= 2:
387 |             logger.debug(f"Token池数量({len(self.tokens)})不足，跳过连续失效检查")
388 |             return
389 |         
390 |         if self.consecutive_failures >= self.consecutive_failure_threshold:
391 |             safe_log_warning(logger, f"检测到连续{self.consecutive_failures}个token失效，触发强制刷新机制")
392 |             
393 |             if self.force_refresh_callback:
394 |                 self._trigger_force_refresh("连续token失效")
395 |             else:
396 |                 safe_log_warning(logger, "未设置强制刷新回调函数，无法自动刷新token池")
397 |     
398 |     def _trigger_force_refresh(self, reason: str):
399 |         """
400 |         触发强制刷新
401 |         
402 |         Args:
403 |             reason: 触发原因
404 |         """
405 |         try:
406 |             # 异步调用强制刷新
407 |             import asyncio
408 |             import threading
409 |             
410 |             def run_async_callback():
411 |                 try:
412 |                     # 创建新的事件循环（如果当前线程没有）
413 |                     try:
414 |                         loop = asyncio.get_event_loop()
415 |                     except RuntimeError:
416 |                         loop = asyncio.new_event_loop()
417 |                         asyncio.set_event_loop(loop)
418 |                     
419 |                     # 运行强制刷新（现在是同步函数）
420 |                     self.force_refresh_callback()
421 |                     
422 |                     safe_log_info(logger, f"🔄 强制刷新tokens.txt已触发 - 原因: {reason}")
423 |                     
424 |                 except Exception as e:
425 |                     safe_log_error(logger, "执行强制刷新回调失败", e)
426 |             
427 |             # 在新线程中执行，避免阻塞当前操作
428 |             refresh_thread = threading.Thread(target=run_async_callback, daemon=True)
429 |             refresh_thread.start()
430 |             
431 |         except Exception as e:
432 |             safe_log_error(logger, "启动强制刷新线程失败", e)
433 |     
434 |     def get_consecutive_failures(self) -> int:
435 |         """获取当前连续失效次数"""
436 |         return self.consecutive_failures
437 |     
438 |     def get_consecutive_upstream_errors(self) -> int:
439 |         """获取当前上游服务连续错误次数"""
440 |         return self.consecutive_upstream_errors
441 |     
442 |     def reset_consecutive_failures(self):
443 |         """重置连续失效计数"""
444 |         with self.lock:
445 |             old_count = self.consecutive_failures
446 |             old_upstream_count = self.consecutive_upstream_errors
447 |             
448 |             self.consecutive_failures = 0
449 |             self.consecutive_upstream_errors = 0
450 |             
451 |             if old_count > 0:
452 |                 safe_log_info(logger, f"手动重置连续失效计数: {old_count} -> 0")
453 |             if old_upstream_count > 0:
454 |                 safe_log_info(logger, f"手动重置上游服务连续错误计数: {old_upstream_count} -> 0")
455 |     
456 | 
457 | 


--------------------------------------------------------------------------------
/src/response_processor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 响应处理模块
  3 | 处理流式和非流式响应的所有逻辑
  4 | """
  5 | import json
  6 | import time
  7 | import asyncio
  8 | import logging
  9 | import uuid
 10 | from datetime import datetime
 11 | from typing import Dict, AsyncGenerator, Tuple, Optional
 12 | import pytz
 13 | import httpx
 14 | 
 15 | from src.constants import (
 16 |     APIConstants, ResponseConstants, ContentConstants, 
 17 |     NumericConstants, TimeConstants, HeaderConstants
 18 | )
 19 | from src.exceptions import UpstreamError, TimeoutError as ProxyTimeoutError
 20 | from src.utils import safe_log_error, safe_log_info, safe_log_warning
 21 | from src.toolify_config import get_toolify
 22 | from src.toolify.detector import StreamingFunctionCallDetector
 23 | 
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | class ResponseProcessor:
 27 |     """响应处理器"""
 28 |     
 29 |     def __init__(self, config):
 30 |         self.config = config
 31 |     
 32 |     def extract_answer_content(self, full_content: str, output_thinking: bool = True) -> str:
 33 |         """删除第一个<answer>标签和最后一个</answer>标签，保留内容"""
 34 |         if not full_content:
 35 |             return full_content
 36 |         
 37 |         # 完全通过模型名控制思考内容输出，默认显示思考内容
 38 |         should_output_thinking = output_thinking
 39 |         
 40 |         if should_output_thinking:
 41 |             # 删除第一个<answer>
 42 |             answer_start = full_content.find(ContentConstants.ANSWER_START_TAG)
 43 |             if answer_start != -1:
 44 |                 full_content = full_content[:answer_start] + full_content[answer_start + len(ContentConstants.ANSWER_START_TAG):]
 45 | 
 46 |             # 删除最后一个</answer>
 47 |             answer_end = full_content.rfind(ContentConstants.ANSWER_END_TAG)
 48 |             if answer_end != -1:
 49 |                 full_content = full_content[:answer_end] + full_content[answer_end + len(ContentConstants.ANSWER_END_TAG):]
 50 | 
 51 |             return full_content.strip()
 52 |         else:
 53 |             # 删除<think>部分（包括标签）
 54 |             think_start = full_content.find(ContentConstants.THINK_START_TAG)
 55 |             think_end = full_content.find(ContentConstants.THINK_END_TAG)
 56 |             if think_start != -1 and think_end != -1:
 57 |                 full_content = full_content[:think_start] + full_content[think_end + len(ContentConstants.THINK_END_TAG):]
 58 |             
 59 |             # 删除<answer>标签及其内容之外的部分
 60 |             answer_start = full_content.find(ContentConstants.ANSWER_START_TAG)
 61 |             answer_end = full_content.rfind(ContentConstants.ANSWER_END_TAG)
 62 |             if answer_start != -1 and answer_end != -1:
 63 |                 content = full_content[answer_start + len(ContentConstants.ANSWER_START_TAG):answer_end]
 64 |                 return content.strip()
 65 | 
 66 |             return full_content.strip()
 67 |     
 68 |     def calculate_dynamic_chunk_size(self, content_length: int) -> int:
 69 |         """
 70 |         动态计算流式输出的chunk大小
 71 |         确保总输出时间不超过MAX_STREAM_TIME秒
 72 |         
 73 |         Args:
 74 |             content_length: 待输出内容的总长度
 75 |         
 76 |         Returns:
 77 |             int: 动态计算的chunk大小，最小为50
 78 |         """
 79 |         if content_length <= 0:
 80 |             return self.config.STREAM_CHUNK_SIZE
 81 |         
 82 |         # 计算需要的总chunk数量以满足时间限制
 83 |         # 总时间 = chunk数量 * STREAM_DELAY
 84 |         # chunk数量 = content_length / chunk_size
 85 |         # 所以：总时间 = (content_length / chunk_size) * STREAM_DELAY
 86 |         # 解出：chunk_size = (content_length * STREAM_DELAY) / MAX_STREAM_TIME
 87 |         
 88 |         calculated_chunk_size = int((content_length * self.config.STREAM_DELAY) / self.config.MAX_STREAM_TIME)
 89 |         
 90 |         # 确保chunk_size不小于最小值
 91 |         dynamic_chunk_size = max(calculated_chunk_size, NumericConstants.MIN_CHUNK_SIZE)
 92 |         
 93 |         # 如果计算出的chunk_size太大（比如内容很短），使用默认值
 94 |         if dynamic_chunk_size > content_length:
 95 |             dynamic_chunk_size = min(self.config.STREAM_CHUNK_SIZE, content_length)
 96 |         
 97 |         logger.debug(f"动态chunk_size计算: 内容长度={content_length}, 计算值={calculated_chunk_size}, 最终值={dynamic_chunk_size}")
 98 |         
 99 |         return dynamic_chunk_size
100 |     
101 |     def content_to_multimodal(self, content) -> str | list[dict]:
102 |         """将内容转换为多模态格式用于K2Think API"""
103 |         if content is None:
104 |             return ""
105 |         if isinstance(content, str):
106 |             return content
107 |         if isinstance(content, list):
108 |             # 检查是否包含图像内容
109 |             has_image = False
110 |             result_parts = []
111 |             
112 |             for p in content:
113 |                 if hasattr(p, 'type'):  # ContentPart object
114 |                     if getattr(p, 'type') == ContentConstants.TEXT_TYPE and getattr(p, 'text', None):
115 |                         result_parts.append({
116 |                             "type": ContentConstants.TEXT_TYPE,
117 |                             "text": getattr(p, 'text')
118 |                         })
119 |                     elif getattr(p, 'type') == ContentConstants.IMAGE_URL_TYPE and getattr(p, 'image_url', None):
120 |                         has_image = True
121 |                         image_url_obj = getattr(p, 'image_url')
122 |                         if hasattr(image_url_obj, 'url'):
123 |                             url = getattr(image_url_obj, 'url')
124 |                         else:
125 |                             url = image_url_obj.get('url') if isinstance(image_url_obj, dict) else str(image_url_obj)
126 |                         
127 |                         result_parts.append({
128 |                             "type": ContentConstants.IMAGE_URL_TYPE,
129 |                             "image_url": {
130 |                                 "url": url
131 |                             }
132 |                         })
133 |                 elif isinstance(p, dict):
134 |                     if p.get("type") == ContentConstants.TEXT_TYPE and p.get("text"):
135 |                         result_parts.append({
136 |                             "type": ContentConstants.TEXT_TYPE, 
137 |                             "text": p.get("text")
138 |                         })
139 |                     elif p.get("type") == ContentConstants.IMAGE_URL_TYPE and p.get("image_url"):
140 |                         has_image = True
141 |                         result_parts.append({
142 |                             "type": ContentConstants.IMAGE_URL_TYPE,
143 |                             "image_url": p.get("image_url")
144 |                         })
145 |                 elif isinstance(p, str):
146 |                     result_parts.append({
147 |                         "type": ContentConstants.TEXT_TYPE,
148 |                         "text": p
149 |                     })
150 |             
151 |             # 如果包含图像，返回多模态格式；否则返回纯文本
152 |             if has_image and result_parts:
153 |                 return result_parts
154 |             else:
155 |                 # 提取所有文本内容
156 |                 text_parts = []
157 |                 for part in result_parts:
158 |                     if part.get("type") == ContentConstants.TEXT_TYPE:
159 |                         text_parts.append(part.get("text", ""))
160 |                 return " ".join(text_parts)
161 |         
162 |         # 处理其他类型
163 |         try:
164 |             return str(content)
165 |         except:
166 |             return ""
167 |     
168 |     def get_current_datetime_info(self) -> Dict[str, str]:
169 |         """获取当前时间信息"""
170 |         # 设置时区为上海
171 |         tz = pytz.timezone(ContentConstants.DEFAULT_TIMEZONE)
172 |         now = datetime.now(tz)
173 |         
174 |         return {
175 |             "{{USER_NAME}}": ContentConstants.DEFAULT_USER_NAME,
176 |             "{{USER_LOCATION}}": ContentConstants.DEFAULT_USER_LOCATION,
177 |             "{{CURRENT_DATETIME}}": now.strftime(TimeConstants.DATETIME_FORMAT),
178 |             "{{CURRENT_DATE}}": now.strftime(TimeConstants.DATE_FORMAT),
179 |             "{{CURRENT_TIME}}": now.strftime(TimeConstants.TIME_FORMAT),
180 |             "{{CURRENT_WEEKDAY}}": now.strftime(TimeConstants.WEEKDAY_FORMAT),
181 |             "{{CURRENT_TIMEZONE}}": ContentConstants.DEFAULT_TIMEZONE,
182 |             "{{USER_LANGUAGE}}": ContentConstants.DEFAULT_USER_LANGUAGE
183 |         }
184 |     
185 |     def generate_session_id(self) -> str:
186 |         """生成会话ID"""
187 |         return str(uuid.uuid4())
188 |     
189 |     def generate_chat_id(self) -> str:
190 |         """生成聊天ID"""
191 |         return str(uuid.uuid4())
192 |     
193 |     async def create_http_client(self) -> httpx.AsyncClient:
194 |         """创建HTTP客户端"""
195 |         base_kwargs = {
196 |             "timeout": httpx.Timeout(timeout=None, connect=10.0),
197 |             "limits": httpx.Limits(
198 |                 max_keepalive_connections=self.config.MAX_KEEPALIVE_CONNECTIONS, 
199 |                 max_connections=self.config.MAX_CONNECTIONS
200 |             ),
201 |             "follow_redirects": True
202 |         }
203 |         
204 |         try:
205 |             return httpx.AsyncClient(**base_kwargs)
206 |         except Exception as e:
207 |             safe_log_error(logger, "创建客户端失败", e)
208 |             raise e
209 |     
210 |     async def make_request(
211 |         self, 
212 |         method: str, 
213 |         url: str, 
214 |         headers: dict, 
215 |         json_data: dict = None, 
216 |         stream: bool = False
217 |     ) -> httpx.Response:
218 |         """发送HTTP请求"""
219 |         client = None
220 |         
221 |         try:
222 |             client = await self.create_http_client()
223 |             
224 |             if stream:
225 |                 # 流式请求返回context manager
226 |                 return client.stream(method, url, headers=headers, json=json_data, timeout=None)
227 |             else:
228 |                 response = await client.request(
229 |                     method, url, headers=headers, json=json_data, 
230 |                     timeout=self.config.REQUEST_TIMEOUT
231 |                 )
232 |                 
233 |                 # 详细记录非200响应
234 |                 if response.status_code != APIConstants.HTTP_OK:
235 |                     safe_log_error(logger, f"上游API返回错误状态码: {response.status_code}")
236 |                     safe_log_error(logger, f"响应头: {dict(response.headers)}")
237 |                     try:
238 |                         error_body = response.text
239 |                         safe_log_error(logger, f"错误响应体: {error_body}")
240 |                     except:
241 |                         safe_log_error(logger, "无法读取错误响应体")
242 |                 
243 |                 response.raise_for_status()
244 |                 return response
245 |                 
246 |         except httpx.HTTPStatusError as e:
247 |             safe_log_error(logger, f"HTTP状态错误: {e.response.status_code} - {e.response.text}")
248 |             if client and not stream:
249 |                 await client.aclose()
250 |             raise UpstreamError(f"上游服务错误: {e.response.status_code}", e.response.status_code)
251 |         except httpx.TimeoutException as e:
252 |             safe_log_error(logger, "请求超时", e)
253 |             if client and not stream:
254 |                 await client.aclose()
255 |             raise ProxyTimeoutError("请求超时")
256 |         except Exception as e:
257 |             safe_log_error(logger, "请求异常", e)
258 |             if client and not stream:
259 |                 await client.aclose()
260 |             raise e
261 |     
262 |     async def process_non_stream_response(self, k2think_payload: dict, headers: dict, output_thinking: bool = None) -> Tuple[str, dict]:
263 |         """处理非流式响应"""
264 |         try:
265 |             response = await self.make_request(
266 |                 "POST", 
267 |                 self.config.K2THINK_API_URL, 
268 |                 headers, 
269 |                 k2think_payload, 
270 |                 stream=False
271 |             )
272 |             
273 |             # K2Think 非流式请求返回标准JSON格式
274 |             result = response.json()
275 |             
276 |             # 提取内容
277 |             full_content = ""
278 |             if result.get('choices') and len(result['choices']) > 0:
279 |                 choice = result['choices'][0]
280 |                 if choice.get('message') and choice['message'].get('content'):
281 |                     raw_content = choice['message']['content']
282 |                     # 提取<answer>标签中的内容，去除标签
283 |                     full_content = self.extract_answer_content(raw_content, output_thinking)
284 |             
285 |             # 提取token信息
286 |             token_info = result.get('usage', {
287 |                 "prompt_tokens": NumericConstants.DEFAULT_PROMPT_TOKENS, 
288 |                 "completion_tokens": NumericConstants.DEFAULT_COMPLETION_TOKENS, 
289 |                 "total_tokens": NumericConstants.DEFAULT_TOTAL_TOKENS
290 |             })
291 |             
292 |             await response.aclose()
293 |             return full_content, token_info
294 |                         
295 |         except Exception as e:
296 |             safe_log_error(logger, "处理非流式响应错误", e)
297 |             raise
298 |     
299 |     async def process_stream_response(
300 |         self, 
301 |         k2think_payload: dict, 
302 |         headers: dict,
303 |         output_thinking: bool = None,
304 |         original_model: str = None,
305 |         enable_toolify: bool = False
306 |     ) -> AsyncGenerator[str, None]:
307 |         """处理流式响应"""
308 |         try:
309 |             # 发送开始chunk
310 |             start_chunk = self._create_chunk_data(
311 |                 delta={"role": "assistant", "content": ""},
312 |                 finish_reason=None,
313 |                 model=original_model
314 |             )
315 |             yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(start_chunk)}\n\n"
316 |             
317 |             # 优化的模拟流式输出 - 立即开始获取响应并流式发送
318 |             k2think_payload_copy = k2think_payload.copy()
319 |             k2think_payload_copy["stream"] = False
320 |             
321 |             headers_copy = headers.copy()
322 |             headers_copy[HeaderConstants.ACCEPT] = HeaderConstants.APPLICATION_JSON
323 |             
324 |             # 获取完整响应
325 |             full_content, token_info = await self.process_non_stream_response(k2think_payload_copy, headers_copy, output_thinking)
326 |             
327 |             if not full_content:
328 |                 yield ResponseConstants.STREAM_DONE_MARKER
329 |                 return
330 |             
331 |             # 检测工具调用（如果启用）
332 |             toolify_detector = None
333 |             if enable_toolify:
334 |                 toolify = get_toolify()
335 |                 if toolify:
336 |                     toolify_detector = StreamingFunctionCallDetector(toolify.trigger_signal)
337 |                     safe_log_info(logger, "[TOOLIFY] 流式工具调用检测器已初始化")
338 |             
339 |             # 发送内容（支持工具调用检测）
340 |             if toolify_detector:
341 |                 # 使用工具调用检测器处理内容
342 |                 async for chunk in self._stream_content_with_tool_detection(
343 |                     full_content, original_model, toolify_detector, k2think_payload.get("chat_id", "")
344 |                 ):
345 |                     yield chunk
346 |             else:
347 |                 # 正常流式发送
348 |                 async for chunk in self._stream_content(full_content, original_model):
349 |                     yield chunk
350 |                 
351 |                 # 发送结束chunk
352 |                 end_chunk = self._create_chunk_data(
353 |                     delta={},
354 |                     finish_reason=ResponseConstants.FINISH_REASON_STOP,
355 |                     model=original_model
356 |                 )
357 |                 yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(end_chunk)}\n\n"
358 |                 yield ResponseConstants.STREAM_DONE_MARKER
359 |             
360 |         except Exception as e:
361 |             safe_log_error(logger, "流式响应处理错误", e)
362 |             
363 |             # 发送错误信息作为流式响应的一部分，而不是抛出异常
364 |             if "401" in str(e) or "unauthorized" in str(e).lower():
365 |                 # 401错误：显示tokens强制刷新消息
366 |                 error_message = "🔄 tokens强制刷新已启动，请稍后再试"
367 |                 safe_log_info(logger, "检测到401错误，向客户端发送强制刷新提示")
368 |             else:
369 |                 # 其他错误：显示一般错误信息
370 |                 error_message = f"请求处理失败: {str(e)}"
371 |             
372 |             # 发送错误内容作为正常的流式响应
373 |             error_chunk = self._create_chunk_data(
374 |                 delta={"content": f"\n\n{error_message}"},
375 |                 finish_reason=None,
376 |                 model=original_model
377 |             )
378 |             yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(error_chunk)}\n\n"
379 |             
380 |             # 发送结束chunk
381 |             end_chunk = self._create_chunk_data(
382 |                 delta={},
383 |                 finish_reason=ResponseConstants.FINISH_REASON_ERROR,
384 |                 model=original_model
385 |             )
386 |             yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(end_chunk)}\n\n"
387 |             yield ResponseConstants.STREAM_DONE_MARKER
388 |             
389 |             # 重新抛出异常以便上层处理token失败（在发送友好消息之后）
390 |             # 上层会捕获这个异常并调用token_manager.mark_token_failure
391 |             raise e
392 |     
393 |     async def _stream_content(self, content: str, model: str = None) -> AsyncGenerator[str, None]:
394 |         """流式发送内容"""
395 |         chunk_size = self.calculate_dynamic_chunk_size(len(content))
396 |         
397 |         for i in range(0, len(content), chunk_size):
398 |             chunk_content = content[i:i + chunk_size]
399 |             
400 |             chunk = self._create_chunk_data(
401 |                 delta={"content": chunk_content},
402 |                 finish_reason=None,
403 |                 model=model
404 |             )
405 |             
406 |             yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(chunk)}\n\n"
407 |             # 添加延迟模拟真实流式效果
408 |             await asyncio.sleep(self.config.STREAM_DELAY)
409 |     
410 |     async def _stream_content_with_tool_detection(
411 |         self, 
412 |         content: str, 
413 |         model: str, 
414 |         detector: StreamingFunctionCallDetector,
415 |         chat_id: str
416 |     ) -> AsyncGenerator[str, None]:
417 |         """流式发送内容并检测工具调用"""
418 |         chunk_size = self.calculate_dynamic_chunk_size(len(content))
419 |         
420 |         for i in range(0, len(content), chunk_size):
421 |             chunk_content = content[i:i + chunk_size]
422 |             
423 |             # 使用检测器处理chunk
424 |             is_tool_detected, content_to_yield = detector.process_chunk(chunk_content)
425 |             
426 |             if is_tool_detected:
427 |                 safe_log_info(logger, "[TOOLIFY] 检测到工具调用触发信号")
428 |             
429 |             # 输出处理后的内容
430 |             if content_to_yield:
431 |                 chunk = self._create_chunk_data(
432 |                     delta={"content": content_to_yield},
433 |                     finish_reason=None,
434 |                     model=model
435 |                 )
436 |                 yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(chunk)}\n\n"
437 |             
438 |             await asyncio.sleep(self.config.STREAM_DELAY)
439 |         
440 |         # 流结束时的最终处理
441 |         parsed_tools, remaining_content = detector.finalize()
442 |         
443 |         # 输出剩余内容
444 |         if remaining_content:
445 |             safe_log_info(logger, f"[TOOLIFY] 输出缓冲区剩余内容: {len(remaining_content)}字符")
446 |             chunk = self._create_chunk_data(
447 |                 delta={"content": remaining_content},
448 |                 finish_reason=None,
449 |                 model=model
450 |             )
451 |             yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(chunk)}\n\n"
452 |         
453 |         # 如果检测到工具调用，输出工具调用结果
454 |         if parsed_tools:
455 |             safe_log_info(logger, f"[TOOLIFY] 检测到 {len(parsed_tools)} 个工具调用")
456 |             from src.toolify_handler import format_toolify_response_for_stream
457 |             tool_chunks = format_toolify_response_for_stream(parsed_tools, model, chat_id)
458 |             for chunk in tool_chunks:
459 |                 yield chunk
460 |         else:
461 |             # 没有工具调用，正常结束
462 |             end_chunk = self._create_chunk_data(
463 |                 delta={},
464 |                 finish_reason=ResponseConstants.FINISH_REASON_STOP,
465 |                 model=model
466 |             )
467 |             yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(end_chunk)}\n\n"
468 |             yield ResponseConstants.STREAM_DONE_MARKER
469 |     
470 |     def _create_chunk_data(self, delta: dict, finish_reason: Optional[str], model: str = None) -> dict:
471 |         """创建流式响应chunk数据"""
472 |         return {
473 |             "id": f"chatcmpl-{int(time.time() * 1000)}",
474 |             "object": ResponseConstants.CHAT_COMPLETION_CHUNK_OBJECT,
475 |             "created": int(time.time()),
476 |             "model": model or APIConstants.MODEL_ID,
477 |             "choices": [{
478 |                 "index": 0,
479 |                 "delta": delta,
480 |                 "finish_reason": finish_reason
481 |             }]
482 |         }
483 |     
484 |     def create_completion_response(
485 |         self, 
486 |         content: Optional[str],
487 |         token_info: Optional[dict] = None,
488 |         model: str = None
489 |     ) -> dict:
490 |         """创建完整的聊天补全响应"""
491 |         message = {
492 |             "role": "assistant",
493 |             "content": content,
494 |         }
495 |         
496 |         return {
497 |             "id": f"chatcmpl-{int(time.time())}",
498 |             "object": ResponseConstants.CHAT_COMPLETION_OBJECT,
499 |             "created": int(time.time()),
500 |             "model": model or APIConstants.MODEL_ID,
501 |             "choices": [{
502 |                 "index": 0,
503 |                 "message": message,
504 |                 "finish_reason": ResponseConstants.FINISH_REASON_STOP
505 |             }],
506 |             "usage": token_info or {
507 |                 "prompt_tokens": NumericConstants.DEFAULT_PROMPT_TOKENS,
508 |                 "completion_tokens": NumericConstants.DEFAULT_COMPLETION_TOKENS,
509 |                 "total_tokens": NumericConstants.DEFAULT_TOTAL_TOKENS
510 |             }
511 |         }


--------------------------------------------------------------------------------
/src/api_handler.py:
--------------------------------------------------------------------------------
  1 | """
  2 | API处理模块
  3 | 处理主要的API路由逻辑
  4 | """
  5 | import json
  6 | import time
  7 | import asyncio
  8 | import logging
  9 | from typing import Dict, List
 10 | from fastapi import HTTPException, Request
 11 | from fastapi.responses import StreamingResponse, JSONResponse
 12 | 
 13 | from src.config import Config
 14 | from src.constants import (
 15 |     APIConstants, ResponseConstants, LogMessages, 
 16 |     ErrorMessages, HeaderConstants
 17 | )
 18 | from src.exceptions import (
 19 |     AuthenticationError, SerializationError, 
 20 |     K2ThinkProxyError, UpstreamError
 21 | )
 22 | from src.models import ChatCompletionRequest, ModelsResponse, ModelInfo
 23 | from src.response_processor import ResponseProcessor
 24 | from src.token_manager import TokenManager
 25 | from src.utils import safe_log_error, safe_log_info, safe_log_warning
 26 | from src.toolify_handler import should_enable_toolify, prepare_toolify_request
 27 | 
 28 | logger = logging.getLogger(__name__)
 29 | 
 30 | class APIHandler:
 31 |     """API处理器"""
 32 |     
 33 |     def __init__(self, config: Config):
 34 |         self.config = config
 35 |         self.response_processor = ResponseProcessor(config)
 36 |         self.token_manager = config.get_token_manager()
 37 |     
 38 |     def validate_api_key(self, authorization: str) -> bool:
 39 |         """验证API密钥"""
 40 |         if not authorization or not authorization.startswith(APIConstants.BEARER_PREFIX):
 41 |             return False
 42 |         api_key = authorization[APIConstants.BEARER_PREFIX_LENGTH:]  # 移除 "Bearer " 前缀
 43 |         return api_key == self.config.VALID_API_KEY
 44 |     
 45 |     def should_output_thinking(self, model_name: str) -> bool:
 46 |         """根据模型名判断是否应该输出思考内容"""
 47 |         return model_name != APIConstants.MODEL_ID_NOTHINK
 48 |     
 49 |     def get_actual_model_id(self, model_name: str) -> str:
 50 |         """获取实际的模型ID（将nothink版本映射回原始模型）"""
 51 |         if model_name == APIConstants.MODEL_ID_NOTHINK:
 52 |             return APIConstants.MODEL_ID
 53 |         return model_name
 54 |     
 55 |     async def get_models(self) -> ModelsResponse:
 56 |         """获取模型列表"""
 57 |         model_info_standard = ModelInfo(
 58 |             id=APIConstants.MODEL_ID,
 59 |             created=int(time.time()),
 60 |             owned_by=APIConstants.MODEL_OWNER,
 61 |             root=APIConstants.MODEL_ROOT
 62 |         )
 63 |         model_info_nothink = ModelInfo(
 64 |             id=APIConstants.MODEL_ID_NOTHINK,
 65 |             created=int(time.time()),
 66 |             owned_by=APIConstants.MODEL_OWNER,
 67 |             root=APIConstants.MODEL_ROOT
 68 |         )
 69 |         return ModelsResponse(data=[model_info_standard, model_info_nothink])
 70 |     
 71 |     async def chat_completions(self, request: ChatCompletionRequest, auth_request: Request):
 72 |         """处理聊天补全请求"""
 73 |         # 验证API密钥
 74 |         authorization = auth_request.headers.get(HeaderConstants.AUTHORIZATION, "")
 75 |         if not self.validate_api_key(authorization):
 76 |             raise AuthenticationError()
 77 |         
 78 |         # 判断是否应该输出思考内容
 79 |         output_thinking = self.should_output_thinking(request.model)
 80 |         actual_model_id = self.get_actual_model_id(request.model)
 81 |         
 82 |         try:
 83 |             # 处理消息
 84 |             raw_messages = self._process_raw_messages(request.messages)
 85 |             
 86 |             # 检查是否需要启用工具调用
 87 |             request_dict = request.model_dump()
 88 |             enable_toolify = should_enable_toolify(request_dict)
 89 |             
 90 |             # 如果启用工具调用，预处理消息并注入提示词
 91 |             if enable_toolify:
 92 |                 safe_log_info(logger, "[TOOLIFY] 工具调用功能已启用")
 93 |                 raw_messages, _ = prepare_toolify_request(request_dict, raw_messages)
 94 |             
 95 |             self._log_request_info(raw_messages)
 96 |             
 97 |             # 构建K2Think请求
 98 |             k2think_payload = self._build_k2think_payload(
 99 |                 request, raw_messages, actual_model_id
100 |             )
101 |             
102 |             # 验证JSON序列化
103 |             self._validate_json_serialization(k2think_payload)
104 |             
105 |             # 处理响应（带重试机制）
106 |             if request.stream:
107 |                 return await self._handle_stream_response_with_retry(
108 |                     request, k2think_payload, output_thinking, enable_toolify
109 |                 )
110 |             else:
111 |                 return await self._handle_non_stream_response_with_retry(
112 |                     request, k2think_payload, output_thinking, enable_toolify
113 |                 )
114 |                 
115 |         except K2ThinkProxyError:
116 |             # 重新抛出自定义异常
117 |             raise
118 |         except Exception as e:
119 |             safe_log_error(logger, "API转发错误", e)
120 |             raise HTTPException(
121 |                 status_code=APIConstants.HTTP_INTERNAL_ERROR,
122 |                 detail={
123 |                     "error": {
124 |                         "message": str(e),
125 |                         "type": ErrorMessages.API_ERROR
126 |                     }
127 |                 }
128 |             )
129 |     
130 |     def _process_raw_messages(self, messages: List) -> List[Dict]:
131 |         """处理原始消息"""
132 |         raw_messages = []
133 |         for msg in messages:
134 |             try:
135 |                 raw_messages.append({
136 |                     "role": msg.role, 
137 |                     "content": msg.content  # 保持原始格式，稍后再转换
138 |                 })
139 |             except Exception as e:
140 |                 safe_log_error(logger, f"处理消息时出错, 消息: {msg}", e)
141 |                 # 使用默认值
142 |                 raw_messages.append({
143 |                     "role": msg.role, 
144 |                     "content": str(msg.content) if msg.content else ""
145 |                 })
146 |         return raw_messages
147 |     
148 |     def _log_request_info(self, raw_messages: List[Dict]):
149 |         """记录请求信息"""
150 |         safe_log_info(logger, LogMessages.MESSAGE_RECEIVED.format(len(raw_messages)))
151 |         
152 |         # 记录原始消息的角色分布
153 |         role_count = {}
154 |         for msg in raw_messages:
155 |             role = msg.get("role", "unknown")
156 |             role_count[role] = role_count.get(role, 0) + 1
157 |         safe_log_info(logger, LogMessages.ROLE_DISTRIBUTION.format("原始", role_count))
158 |     
159 |     def _build_k2think_payload(
160 |         self, 
161 |         request: ChatCompletionRequest, 
162 |         processed_messages: List[Dict],
163 |         actual_model_id: str = None
164 |     ) -> Dict:
165 |         """构建K2Think请求负载"""
166 |         # 构建K2Think格式的请求体 - 支持多模态内容
167 |         k2think_messages = []
168 |         for msg in processed_messages:
169 |             try:
170 |                 # 使用多模态内容转换函数
171 |                 content = self.response_processor.content_to_multimodal(msg.get("content", ""))
172 |                 k2think_messages.append({
173 |                     "role": msg["role"], 
174 |                     "content": content
175 |                 })
176 |             except Exception as e:
177 |                 safe_log_error(logger, f"构建K2Think消息时出错, 消息: {msg}", e)
178 |                 # 使用安全的默认值
179 |                 fallback_content = str(msg.get("content", ""))
180 |                 k2think_messages.append({
181 |                     "role": msg.get("role", "user"), 
182 |                     "content": fallback_content
183 |                 })
184 |         
185 |         # 使用实际的模型ID
186 |         model_id = actual_model_id or APIConstants.MODEL_ID
187 |         
188 |         return {
189 |             "stream": request.stream,
190 |             "model": model_id,
191 |             "messages": k2think_messages,
192 |             "params": {},
193 |             "tool_servers": [],
194 |             "features": {
195 |                 "image_generation": False,
196 |                 "code_interpreter": False,
197 |                 "web_search": False
198 |             },
199 |             "variables": self.response_processor.get_current_datetime_info(),
200 |             "model_item": {
201 |                 "id": model_id,
202 |                 "object": ResponseConstants.MODEL_OBJECT,
203 |                 "owned_by": APIConstants.MODEL_OWNER,
204 |                 "root": APIConstants.MODEL_ROOT,
205 |                 "parent": None,
206 |                 "status": "active",
207 |                 "connection_type": "external",
208 |                 "name": model_id
209 |             },
210 |             "background_tasks": {
211 |                 "title_generation": True,
212 |                 "tags_generation": True
213 |             },
214 |             "chat_id": self.response_processor.generate_chat_id(),
215 |             "id": self.response_processor.generate_session_id(),
216 |             "session_id": self.response_processor.generate_session_id()
217 |         }
218 |     
219 |     def _validate_json_serialization(self, k2think_payload: Dict):
220 |         """验证JSON序列化"""
221 |         try:
222 |             # 测试JSON序列化
223 |             json.dumps(k2think_payload, ensure_ascii=False)
224 |             safe_log_info(logger, LogMessages.JSON_VALIDATION_SUCCESS)
225 |         except Exception as e:
226 |             safe_log_error(logger, LogMessages.JSON_VALIDATION_FAILED.format(e))
227 |             # 尝试修复序列化问题
228 |             try:
229 |                 k2think_payload = json.loads(json.dumps(k2think_payload, default=str, ensure_ascii=False))
230 |                 safe_log_info(logger, LogMessages.JSON_FIXED)
231 |             except Exception as fix_error:
232 |                 safe_log_error(logger, "无法修复序列化问题", fix_error)
233 |                 raise SerializationError()
234 |     
235 |     def _build_request_headers(self, request: ChatCompletionRequest, k2think_payload: Dict, token: str) -> Dict[str, str]:
236 |         """构建请求头"""
237 |         return {
238 |             HeaderConstants.ACCEPT: (
239 |                 HeaderConstants.EVENT_STREAM_JSON if request.stream 
240 |                 else HeaderConstants.APPLICATION_JSON
241 |             ),
242 |             HeaderConstants.CONTENT_TYPE: HeaderConstants.APPLICATION_JSON,
243 |             HeaderConstants.AUTHORIZATION: f"{APIConstants.BEARER_PREFIX}{token}",
244 |             HeaderConstants.ORIGIN: "https://www.k2think.ai",
245 |             HeaderConstants.REFERER: "https://www.k2think.ai/c/" + k2think_payload["chat_id"],
246 |             HeaderConstants.USER_AGENT: HeaderConstants.DEFAULT_USER_AGENT
247 |         }
248 |     
249 |     async def _handle_stream_response(
250 |         self, 
251 |         k2think_payload: Dict, 
252 |         headers: Dict[str, str],
253 |         output_thinking: bool = True,
254 |         original_model: str = None
255 |     ) -> StreamingResponse:
256 |         """处理流式响应"""
257 |         return StreamingResponse(
258 |             self.response_processor.process_stream_response(
259 |                 k2think_payload, headers, output_thinking, original_model
260 |             ),
261 |             media_type=HeaderConstants.TEXT_EVENT_STREAM,
262 |             headers={
263 |                 HeaderConstants.CACHE_CONTROL: HeaderConstants.NO_CACHE,
264 |                 HeaderConstants.CONNECTION: HeaderConstants.KEEP_ALIVE,
265 |                 HeaderConstants.X_ACCEL_BUFFERING: HeaderConstants.NO_BUFFERING
266 |             }
267 |         )
268 |     
269 |     async def _handle_non_stream_response(
270 |         self, 
271 |         k2think_payload: Dict, 
272 |         headers: Dict[str, str],
273 |         output_thinking: bool = True,
274 |         original_model: str = None
275 |     ) -> JSONResponse:
276 |         """处理非流式响应"""
277 |         full_content, token_info = await self.response_processor.process_non_stream_response(
278 |             k2think_payload, headers, output_thinking
279 |         )
280 |         
281 |         openai_response = self.response_processor.create_completion_response(
282 |             full_content, token_info, original_model
283 |         )
284 |         
285 |         return JSONResponse(content=openai_response)
286 |     
287 |     async def _handle_stream_response_with_retry(
288 |         self, 
289 |         request: ChatCompletionRequest,
290 |         k2think_payload: Dict,
291 |         output_thinking: bool = True,
292 |         enable_toolify: bool = False,
293 |         max_retries: int = 3
294 |     ) -> StreamingResponse:
295 |         """处理流式响应（带重试机制）"""
296 |         last_exception = None
297 |         
298 |         for attempt in range(max_retries):
299 |             # 获取下一个可用token
300 |             token = self.token_manager.get_next_token()
301 |             if not token:
302 |                 # 根据是否启用自动更新提供不同的错误信息
303 |                 if Config.ENABLE_TOKEN_AUTO_UPDATE:
304 |                     error_message = "Token池暂时为空，可能正在自动更新中。请稍后重试或检查自动更新服务状态。"
305 |                     safe_log_warning(logger, "没有可用的token，可能正在自动更新中")
306 |                 else:
307 |                     error_message = "所有token都已失效，请检查token配置或重新加载token文件。"
308 |                     safe_log_error(logger, "没有可用的token")
309 |                 
310 |                 raise HTTPException(
311 |                     status_code=APIConstants.HTTP_SERVICE_UNAVAILABLE,
312 |                     detail={
313 |                         "error": {
314 |                             "message": error_message,
315 |                             "type": ErrorMessages.API_ERROR
316 |                         }
317 |                     }
318 |                 )
319 |             
320 |             # 构建请求头
321 |             headers = self._build_request_headers(request, k2think_payload, token)
322 |             
323 |             try:
324 |                 safe_log_info(logger, f"尝试流式请求 (第{attempt + 1}次)")
325 |                 
326 |                 # 创建流式生成器，内部处理token成功/失败标记
327 |                 async def stream_generator():
328 |                     try:
329 |                         async for chunk in self.response_processor.process_stream_response(
330 |                             k2think_payload, headers, output_thinking, request.model, enable_toolify
331 |                         ):
332 |                             yield chunk
333 |                         # 流式响应成功完成，标记token成功
334 |                         self.token_manager.mark_token_success(token)
335 |                     except Exception as e:
336 |                         # 流式响应过程中出现错误，标记token失败
337 |                         safe_log_warning(logger, f"🔍 流式响应异常被捕获，准备标记token失败: {str(e)}")
338 |                         
339 |                         # 标记token失败（这会触发自动刷新逻辑）
340 |                         token_failed = self.token_manager.mark_token_failure(token, str(e))
341 |                         
342 |                         # 特别处理401错误
343 |                         if "401" in str(e) or "unauthorized" in str(e).lower():
344 |                             safe_log_warning(logger, f"🔒 流式响应中检测到401认证错误，token标记失败: {token_failed}")
345 |                             safe_log_info(logger, f"🚨 已调用mark_token_failure，应该触发自动刷新")
346 |                         else:
347 |                             safe_log_warning(logger, f"流式响应中检测到其他错误: {str(e)}")
348 |                         
349 |                         # 注意：不重新抛出异常，避免"response already started"错误
350 |                         # 错误信息已经通过response_processor发送给客户端
351 |                 
352 |                 return StreamingResponse(
353 |                     stream_generator(),
354 |                     media_type=HeaderConstants.TEXT_EVENT_STREAM,
355 |                     headers={
356 |                         HeaderConstants.CACHE_CONTROL: HeaderConstants.NO_CACHE,
357 |                         HeaderConstants.CONNECTION: HeaderConstants.KEEP_ALIVE,
358 |                         HeaderConstants.X_ACCEL_BUFFERING: HeaderConstants.NO_BUFFERING
359 |                     }
360 |                 )
361 |             except (UpstreamError, Exception) as e:
362 |                 # 这里只处理流式响应启动前的异常（主要是连接错误）
363 |                 # 401等上游服务错误现在在流式响应内部处理，不会到达这里
364 |                 last_exception = e
365 |                 safe_log_warning(logger, f"流式请求启动失败 (第{attempt + 1}次): {e}")
366 |                 
367 |                 # 标记token失败
368 |                 token_failed = self.token_manager.mark_token_failure(token, str(e))
369 |                 if token_failed:
370 |                     safe_log_error(logger, f"Token已被标记为失效")
371 |                 
372 |                 # 如果是最后一次尝试，抛出异常
373 |                 if attempt == max_retries - 1:
374 |                     break
375 |                 
376 |                 # 短暂延迟后重试
377 |                 await asyncio.sleep(0.5)
378 |         
379 |         # 所有重试都失败了
380 |         safe_log_error(logger, "所有流式请求重试都失败了，最后错误", last_exception)
381 |         raise HTTPException(
382 |             status_code=APIConstants.HTTP_INTERNAL_ERROR,
383 |             detail={
384 |                 "error": {
385 |                     "message": f"流式请求失败: {str(last_exception)}",
386 |                     "type": ErrorMessages.API_ERROR
387 |                 }
388 |             }
389 |         )
390 |     
391 |     async def _handle_non_stream_response_with_retry(
392 |         self, 
393 |         request: ChatCompletionRequest,
394 |         k2think_payload: Dict,
395 |         output_thinking: bool = True,
396 |         enable_toolify: bool = False,
397 |         max_retries: int = 3
398 |     ) -> JSONResponse:
399 |         """处理非流式响应（带重试机制）"""
400 |         last_exception = None
401 |         
402 |         for attempt in range(max_retries):
403 |             # 获取下一个可用token
404 |             token = self.token_manager.get_next_token()
405 |             if not token:
406 |                 # 根据是否启用自动更新提供不同的错误信息
407 |                 if Config.ENABLE_TOKEN_AUTO_UPDATE:
408 |                     error_message = "Token池暂时为空，可能正在自动更新中。请稍后重试或检查自动更新服务状态。"
409 |                     safe_log_warning(logger, "没有可用的token，可能正在自动更新中")
410 |                 else:
411 |                     error_message = "所有token都已失效，请检查token配置或重新加载token文件。"
412 |                     safe_log_error(logger, "没有可用的token")
413 |                 
414 |                 raise HTTPException(
415 |                     status_code=APIConstants.HTTP_SERVICE_UNAVAILABLE,
416 |                     detail={
417 |                         "error": {
418 |                             "message": error_message,
419 |                             "type": ErrorMessages.API_ERROR
420 |                         }
421 |                     }
422 |                 )
423 |             
424 |             # 构建请求头
425 |             headers = self._build_request_headers(request, k2think_payload, token)
426 |             
427 |             try:
428 |                 safe_log_info(logger, f"尝试非流式请求 (第{attempt + 1}次)")
429 |                 
430 |                 # 处理响应
431 |                 full_content, token_info = await self.response_processor.process_non_stream_response(
432 |                     k2think_payload, headers, output_thinking
433 |                 )
434 |                 
435 |                 # 标记token成功
436 |                 self.token_manager.mark_token_success(token)
437 |                 
438 |                 # 检查是否有工具调用
439 |                 tool_response = None
440 |                 if enable_toolify:
441 |                     from src.toolify_handler import parse_toolify_response
442 |                     tool_response = parse_toolify_response(full_content, request.model)
443 |                 
444 |                 if tool_response:
445 |                     # 返回包含tool_calls的响应
446 |                     openai_response = {
447 |                         "id": f"chatcmpl-{int(time.time())}",
448 |                         "object": ResponseConstants.CHAT_COMPLETION_OBJECT,
449 |                         "created": int(time.time()),
450 |                         "model": request.model,
451 |                         "choices": [{
452 |                             "index": 0,
453 |                             "message": tool_response,
454 |                             "finish_reason": "tool_calls"
455 |                         }],
456 |                         "usage": token_info or {
457 |                             "prompt_tokens": 0,
458 |                             "completion_tokens": 0,
459 |                             "total_tokens": 0
460 |                         }
461 |                     }
462 |                 else:
463 |                     openai_response = self.response_processor.create_completion_response(
464 |                         full_content, token_info, request.model
465 |                     )
466 |                 
467 |                 return JSONResponse(content=openai_response)
468 |                 
469 |             except (UpstreamError, Exception) as e:
470 |                 last_exception = e
471 |                 
472 |                 # 特别处理401错误
473 |                 if "401" in str(e) or "unauthorized" in str(e).lower():
474 |                     safe_log_warning(logger, f"🔒 非流式请求遇到401认证错误 (第{attempt + 1}次): {e}")
475 |                     
476 |                     # 对于401错误，如果是第一次尝试，返回友好消息而不重试
477 |                     if attempt == 0:
478 |                         # 标记token失败以触发自动刷新
479 |                         self.token_manager.mark_token_failure(token, str(e))
480 |                         
481 |                         # 返回友好的刷新提示消息
482 |                         openai_response = self.response_processor.create_completion_response(
483 |                             content="🔄 tokens强制刷新已启动，请稍后再试",
484 |                             token_info={
485 |                                 "prompt_tokens": 0,
486 |                                 "completion_tokens": 10,
487 |                                 "total_tokens": 10
488 |                             },
489 |                             model=request.model
490 |                         )
491 |                         return JSONResponse(content=openai_response)
492 |                 else:
493 |                     safe_log_warning(logger, f"非流式请求失败 (第{attempt + 1}次): {e}")
494 |                 
495 |                 # 标记token失败
496 |                 token_failed = self.token_manager.mark_token_failure(token, str(e))
497 |                 if token_failed:
498 |                     safe_log_error(logger, f"Token已被标记为失效")
499 |                 
500 |                 # 如果是最后一次尝试，抛出异常
501 |                 if attempt == max_retries - 1:
502 |                     break
503 |                 
504 |                 # 短暂延迟后重试
505 |                 await asyncio.sleep(0.5)
506 |         
507 |         # 所有重试都失败了
508 |         safe_log_error(logger, "所有非流式请求重试都失败了，最后错误", last_exception)
509 |         raise HTTPException(
510 |             status_code=APIConstants.HTTP_INTERNAL_ERROR,
511 |             detail={
512 |                 "error": {
513 |                     "message": f"非流式请求失败: {str(last_exception)}",
514 |                     "type": ErrorMessages.API_ERROR
515 |                 }
516 |             }
517 |         )


--------------------------------------------------------------------------------