├── src ├── __init__.py ├── toolify │ ├── __init__.py │ ├── parser.py │ ├── detector.py │ ├── prompt.py │ └── core.py ├── toolify_config.py ├── exceptions.py ├── models.py ├── constants.py ├── utils.py ├── toolify_handler.py ├── config.py ├── token_updater.py ├── token_manager.py ├── response_processor.py └── api_handler.py ├── data ├── accounts.example.txt └── tokens.example.txt ├── requirements.txt ├── .gitignore ├── .dockerignore ├── docker-compose.yml ├── Dockerfile ├── .github └── workflows │ ├── docker-test.yml │ └── docker-build-push.yml ├── .env.example ├── get_tokens.py ├── tests └── test_tool_calling.py ├── k2think_proxy.py └── README.md /src/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | K2Think API Proxy 源代码包 3 | """ -------------------------------------------------------------------------------- /data/accounts.example.txt: -------------------------------------------------------------------------------- 1 | # {"email": "user1@example.com", "k2_password": "password1"} -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | uvicorn[standard] 3 | httpx 4 | pydantic 5 | python-dotenv 6 | pytz 7 | requests -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | 工具调用参考/ 3 | test/ 4 | utils/ 5 | 6 | .vscode/ 7 | 8 | .env 9 | tokens.txt 10 | accounts.txt 11 | tokens.txt.backup 12 | *.pyc 13 | *.log -------------------------------------------------------------------------------- /src/toolify/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Toolify 插件 - 为 LLM 提供工具调用能力 3 | 从 Toolify 项目提取的核心功能模块 4 | """ 5 | 6 | from .core import ToolifyCore 7 | from .parser import parse_function_calls_xml, remove_think_blocks 8 | from .detector import StreamingFunctionCallDetector 9 | from .prompt import generate_function_prompt 10 | 11 | __all__ = [ 12 | 'ToolifyCore', 13 | 'parse_function_calls_xml', 14 | 'remove_think_blocks', 15 | 'StreamingFunctionCallDetector', 16 | 'generate_function_prompt', 17 | ] 18 | 19 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Git相关 2 | .git 3 | .gitignore 4 | 5 | # Python相关 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | *.so 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # 虚拟环境 28 | env/ 29 | venv/ 30 | ENV/ 31 | env.bak/ 32 | venv.bak/ 33 | 34 | # IDE相关 35 | .vscode/ 36 | .idea/ 37 | *.swp 38 | *.swo 39 | *~ 40 | 41 | # 配置和数据文件(这些通过volume挂载) 42 | .env 43 | .env.local 44 | .env.example 45 | tokens.txt 46 | tokens.example.txt 47 | tokens.txt.backup 48 | 49 | # 日志文件 50 | *.log 51 | logs/ 52 | 53 | # 测试相关 54 | .pytest_cache/ 55 | .coverage 56 | htmlcov/ 57 | test/ 58 | 59 | # Docker相关 60 | Dockerfile* 61 | docker-compose*.yml 62 | .dockerignore 63 | 64 | # 文档 65 | README.md 66 | *.md 67 | 68 | # 其他 69 | .DS_Store 70 | Thumbs.db -------------------------------------------------------------------------------- /data/tokens.example.txt: -------------------------------------------------------------------------------- 1 | # K2Think Token文件示例 2 | # 每行一个token,以下为示例格式(请替换为实际的token) 3 | 4 | # 注意事项: 5 | # 1. 每行只能有一个token 6 | # 2. 空行和以#开头的注释行会被忽略 7 | # 3. Token失效时会自动标记,可通过API管理界面重置 8 | # 4. 建议至少配置2-3个token以确保高可用性 9 | 10 | # 以下放置几个可用token,随时可能失效,请自行替换自己的token 11 | eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IjlhMGE1ZDY5LWQ0ZDgtNGFiMC1hYjhjLTQ5ODNmY2NhZDM4NyIsImV4cCI6MTc1ODIwMjg4NX0.mTDsIrtO0iVTE5hhLcX1bTgmJHMydsHQqGKUsucEg_0 12 | eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6ImY2NmU3MWUwLWUyYTUtNGIwMi04MGY1LWE0Y2RiYjJjZTM1OSIsImV4cCI6MTc1ODIwMjg4Nn0.zcNU3ylq5YXFSFidgzQOXwoicqAefUnf9x1HtKFpY2I 13 | eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IjMxZWZmMWU3LTQwNTUtNDg3ZS04MzA1LWFiMDU5MTE1OTc0OSIsImV4cCI6MTc1ODIwMjkxM30.3lgvrmPo6esDsfAbVlkl37vRsN3EKYs6BXq45bvu9-E 14 | eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6ImIwZDY2YTZmLTAwN2MtNDNkNS1hZWExLWRkMzM2NjM1ZmUyNCIsImV4cCI6MTc1ODIwMjk0NX0.urcVmh_lBivvE6tNnCmVeDW5vW52GXoYFoqFh196T1I -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | k2think-api: 5 | image: julienol/k2think2api:latest 6 | container_name: k2think-api 7 | ports: 8 | - "${HOST_PORT:-8001}:8001" 9 | volumes: 10 | # 使用目录挂载而非文件挂载,避免文件锁定问题 11 | - ./data:/app/data 12 | # 或者使用命名卷(推荐用于生产环境) 13 | # - k2think_data:/app/data 14 | # 直接以root用户运行,简化权限管理 15 | env_file: 16 | - .env 17 | environment: 18 | - PYTHONUNBUFFERED=1 19 | - PYTHONIOENCODING=utf-8 20 | - PYTHONLEGACYWINDOWSSTDIO=0 21 | - LC_ALL=C.UTF-8 22 | - LANG=C.UTF-8 23 | # 更新配置文件路径指向data目录 24 | - TOKENS_FILE=/app/data/tokens.txt 25 | - ACCOUNTS_FILE=/app/data/accounts.txt 26 | restart: unless-stopped 27 | # 健康检查 28 | healthcheck: 29 | test: ["CMD", "curl", "-f", "http://localhost:8001/health"] 30 | interval: 30s 31 | timeout: 10s 32 | retries: 3 33 | start_period: 10s 34 | 35 | # 可选:使用命名卷(推荐用于生产环境) 36 | # volumes: 37 | # k2think_data: 38 | # driver: local -------------------------------------------------------------------------------- /src/toolify_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Toolify 配置模块 3 | 管理工具调用功能的配置和实例 4 | """ 5 | 6 | import logging 7 | from typing import Optional 8 | from src.toolify import ToolifyCore 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | # 全局 Toolify 实例 13 | _toolify_instance: Optional[ToolifyCore] = None 14 | 15 | 16 | def get_toolify() -> Optional[ToolifyCore]: 17 | """ 18 | 获取 Toolify 实例(单例模式) 19 | 20 | Returns: 21 | ToolifyCore实例,如果功能未启用则返回None 22 | """ 23 | global _toolify_instance 24 | 25 | # 延迟导入配置以避免循环依赖 26 | from src.config import Config 27 | 28 | if not Config.ENABLE_TOOLIFY: 29 | logger.debug("[TOOLIFY] 工具调用功能已禁用") 30 | return None 31 | 32 | if _toolify_instance is None: 33 | _toolify_instance = ToolifyCore(enable_function_calling=True) 34 | logger.info("[TOOLIFY] 工具调用功能已启用并初始化") 35 | 36 | return _toolify_instance 37 | 38 | 39 | def is_toolify_enabled() -> bool: 40 | """检查 Toolify 功能是否启用""" 41 | from src.config import Config 42 | return Config.ENABLE_TOOLIFY 43 | 44 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim 2 | 3 | # 安装curl用于健康检查 4 | RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/* 5 | 6 | # 设置环境变量 - 强化编码支持 7 | ENV PYTHONUNBUFFERED=1 8 | ENV PYTHONIOENCODING=utf-8 9 | ENV PYTHONLEGACYWINDOWSSTDIO=0 10 | ENV LC_ALL=C.UTF-8 11 | ENV LANG=C.UTF-8 12 | 13 | # 设置工作目录 14 | WORKDIR /app 15 | 16 | # 复制依赖文件并安装 17 | COPY requirements.txt . 18 | RUN pip install --no-cache-dir -r requirements.txt 19 | 20 | # 复制应用代码 21 | COPY k2think_proxy.py . 22 | COPY get_tokens.py . 23 | COPY src/ ./src/ 24 | 25 | # 创建数据目录和默认文件 26 | RUN mkdir -p /app/data && \ 27 | touch /app/data/tokens.txt && \ 28 | echo "# Token文件将由自动更新服务生成" > /app/data/tokens.txt && \ 29 | touch /app/data/accounts.txt && \ 30 | echo "# 请通过volume挂载实际的accounts.txt文件" > /app/data/accounts.txt 31 | 32 | # 创建简单的启动脚本 33 | RUN echo '#!/bin/bash\n\ 34 | # 确保数据目录存在\n\ 35 | mkdir -p /app/data\n\ 36 | # 直接运行应用\n\ 37 | exec "$@"' > /app/entrypoint.sh && \ 38 | chmod +x /app/entrypoint.sh 39 | 40 | # 暴露端口 41 | EXPOSE 8001 42 | 43 | # 健康检查 44 | HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \ 45 | CMD curl -f http://localhost:8001/health || exit 1 46 | 47 | # 设置entrypoint和默认命令 48 | ENTRYPOINT ["/app/entrypoint.sh"] 49 | CMD ["python", "k2think_proxy.py"] -------------------------------------------------------------------------------- /.github/workflows/docker-test.yml: -------------------------------------------------------------------------------- 1 | name: Test Docker Build 2 | 3 | on: 4 | # 对PR进行测试构建,但不推送 5 | pull_request: 6 | branches: 7 | - main 8 | - master 9 | paths-ignore: 10 | - 'README.md' 11 | - '*.md' 12 | - '.gitignore' 13 | - 'LICENSE' 14 | 15 | jobs: 16 | test-build: 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - name: Checkout repository 21 | uses: actions/checkout@v4 22 | 23 | - name: Set up Docker Buildx 24 | uses: docker/setup-buildx-action@v3 25 | 26 | - name: Test Docker build 27 | uses: docker/build-push-action@v5 28 | with: 29 | context: . 30 | file: ./Dockerfile 31 | push: false 32 | tags: test:latest 33 | cache-from: type=gha 34 | cache-to: type=gha,mode=max 35 | 36 | - name: Test container startup 37 | run: | 38 | # 创建测试环境变量文件 39 | cat > .env.test << EOF 40 | VALID_API_KEY=test-key 41 | K2THINK_TOKEN=test-token 42 | TOOL_SUPPORT=true 43 | DEBUG_LOGGING=true 44 | HOST=0.0.0.0 45 | PORT=8001 46 | EOF 47 | 48 | # 启动容器 49 | docker run -d --name test-container -p 8001:8001 --env-file .env.test test:latest 50 | 51 | # 等待容器启动 52 | sleep 10 53 | 54 | # 测试健康检查 55 | curl -f http://localhost:8001/health || exit 1 56 | 57 | # 测试模型接口 58 | curl -f http://localhost:8001/v1/models || exit 1 59 | 60 | # 停止容器 61 | docker stop test-container 62 | docker rm test-container 63 | 64 | echo "✅ Docker container test passed!" 65 | -------------------------------------------------------------------------------- /src/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | 自定义异常类模块 3 | 统一管理所有自定义异常 4 | """ 5 | 6 | class K2ThinkProxyError(Exception): 7 | """K2Think代理服务基础异常类""" 8 | def __init__(self, message: str, error_type: str = "api_error", status_code: int = 500): 9 | self.message = message 10 | self.error_type = error_type 11 | self.status_code = status_code 12 | super().__init__(self.message) 13 | 14 | class ConfigurationError(K2ThinkProxyError): 15 | """配置错误异常""" 16 | def __init__(self, message: str): 17 | super().__init__(message, "configuration_error", 500) 18 | 19 | class AuthenticationError(K2ThinkProxyError): 20 | """认证错误异常""" 21 | def __init__(self, message: str = "Invalid API key provided"): 22 | super().__init__(message, "authentication_error", 401) 23 | 24 | class UpstreamError(K2ThinkProxyError): 25 | """上游服务错误异常""" 26 | def __init__(self, message: str, status_code: int = 502): 27 | super().__init__(message, "upstream_error", status_code) 28 | 29 | class TimeoutError(K2ThinkProxyError): 30 | """超时错误异常""" 31 | def __init__(self, message: str = "请求超时"): 32 | super().__init__(message, "timeout_error", 504) 33 | 34 | class SerializationError(K2ThinkProxyError): 35 | """序列化错误异常""" 36 | def __init__(self, message: str = "请求数据序列化失败"): 37 | super().__init__(message, "serialization_error", 400) 38 | 39 | class ToolProcessingError(K2ThinkProxyError): 40 | """工具处理错误异常""" 41 | def __init__(self, message: str): 42 | super().__init__(message, "tool_processing_error", 400) 43 | 44 | class ContentProcessingError(K2ThinkProxyError): 45 | """内容处理错误异常""" 46 | def __init__(self, message: str): 47 | super().__init__(message, "content_processing_error", 400) -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # K2Think API 代理服务环境变量配置文件 2 | # 复制此文件为 .env 并根据实际情况修改配置值 3 | # 思考内容输出控制: MBZUAI-IFM/K2-Think(显示思考) / MBZUAI-IFM/K2-Think-nothink(不显示思考) 4 | 5 | # API认证配置 6 | VALID_API_KEY=sk-k2think # 客户端访问API时使用的密钥 7 | # K2THINK_TOKEN=your_k2think_jwt_token_here # 从K2Think官网获取的JWT Token 8 | 9 | # 服务器配置 10 | HOST=0.0.0.0 # 监听地址,0.0.0.0为所有接口,127.0.0.1为仅本地 11 | PORT=8001 # 服务监听端口 12 | 13 | # 上游API配置 14 | K2THINK_API_URL=https://www.k2think.ai/api/chat/completions # K2Think API完整URL 15 | 16 | # 工具调用配置 17 | ENABLE_TOOLIFY=true 18 | 19 | # Token管理配置 20 | # Token文件路径(每行一个token) 21 | TOKENS_FILE=data/tokens.txt 22 | 23 | # Token最大失败次数(超过后将被标记为失效) 24 | MAX_TOKEN_FAILURES=3 25 | 26 | # Token自动更新配置 27 | ENABLE_TOKEN_AUTO_UPDATE=false # 是否启用token自动更新 28 | TOKEN_UPDATE_INTERVAL=3600 # token更新间隔(秒),默认1小时 29 | ACCOUNTS_FILE=data/accounts.txt # 账户文件路径 30 | GET_TOKENS_SCRIPT=get_tokens.py # token获取脚本路径 31 | 32 | # 代理配置(用于get_tokens.py) 33 | # PROXY_URL=http://username:password@proxy_host:proxy_port # HTTP/HTTPS代理地址,留空则不使用代理 34 | PROXY_URL="" # 示例: http://admin:sk-123456@192.168.10.100:8282 35 | 36 | # 调试配置 37 | LOG_LEVEL=INFO # 调试日志级别: DEBUG/INFO/WARNING/ERROR 38 | DEBUG_LOGGING=false # 是否启用详细请求日志 39 | 40 | # 高级配置 41 | REQUEST_TIMEOUT=60 # HTTP请求超时时间(秒) 42 | MAX_KEEPALIVE_CONNECTIONS=20 # 最大保持连接数 43 | MAX_CONNECTIONS=100 # 最大连接数 44 | 45 | # 性能配置 46 | STREAM_DELAY=0.05 # 流式响应模拟延迟(秒) 47 | STREAM_CHUNK_SIZE=50 # 流式响应块大小(字符数) 48 | MAX_STREAM_TIME=6 # 流式响应块最大用时(秒) 49 | 50 | # 部署配置 51 | APP_ENV=development # 应用环境: development/production/testing 52 | ENABLE_ACCESS_LOG=true # 是否启用访问日志 53 | CORS_ORIGINS=* # CORS允许的源 54 | 55 | # 使用说明: 56 | # 1. 必须配置: VALID_API_KEY, TOKENS_FILE (tokens.txt文件,每行一个token) 57 | # 2. 推荐配置: HOST, PORT 58 | # 3. 可选配置: TOOL_SUPPORT, DEBUG_LOGGING 59 | # 4. Token自动更新: 设置ENABLE_TOKEN_AUTO_UPDATE=true并提供accounts.txt文件 60 | -------------------------------------------------------------------------------- /.github/workflows/docker-build-push.yml: -------------------------------------------------------------------------------- 1 | name: Build and Push Docker Image 2 | 3 | on: 4 | # 自动触发:当推送到main分支时 5 | push: 6 | branches: 7 | - main 8 | - master 9 | # 忽略README等文档文件的更改 10 | paths-ignore: 11 | - 'README.md' 12 | - '*.md' 13 | - '.gitignore' 14 | - 'LICENSE' 15 | 16 | # 手动触发 17 | workflow_dispatch: 18 | inputs: 19 | tag: 20 | description: 'Docker image tag (default: latest)' 21 | required: false 22 | default: 'latest' 23 | 24 | env: 25 | REGISTRY: docker.io 26 | IMAGE_NAME: julienol/k2think2api 27 | 28 | jobs: 29 | build-and-push: 30 | runs-on: ubuntu-latest 31 | 32 | steps: 33 | - name: Checkout repository 34 | uses: actions/checkout@v4 35 | 36 | - name: Set up Docker Buildx 37 | uses: docker/setup-buildx-action@v3 38 | 39 | - name: Log in to Docker Hub 40 | uses: docker/login-action@v3 41 | with: 42 | username: ${{ secrets.DOCKER_USERNAME }} 43 | password: ${{ secrets.DOCKER_PASSWORD }} 44 | 45 | - name: Extract metadata 46 | id: meta 47 | uses: docker/metadata-action@v5 48 | with: 49 | images: ${{ env.IMAGE_NAME }} 50 | tags: | 51 | type=ref,event=branch 52 | type=ref,event=pr 53 | type=sha,prefix={{branch}}- 54 | type=raw,value=latest,enable={{is_default_branch}} 55 | type=raw,value=${{ github.event.inputs.tag }},enable=${{ github.event_name == 'workflow_dispatch' }} 56 | 57 | - name: Build and push Docker image 58 | uses: docker/build-push-action@v5 59 | with: 60 | context: . 61 | file: ./Dockerfile 62 | push: true 63 | tags: ${{ steps.meta.outputs.tags }} 64 | labels: ${{ steps.meta.outputs.labels }} 65 | platforms: linux/amd64,linux/arm64 66 | cache-from: type=gha 67 | cache-to: type=gha,mode=max 68 | 69 | - name: Update Docker Hub description 70 | uses: peter-evans/dockerhub-description@v4 71 | with: 72 | username: ${{ secrets.DOCKER_USERNAME }} 73 | password: ${{ secrets.DOCKER_PASSWORD }} 74 | repository: ${{ env.IMAGE_NAME }} 75 | readme-filepath: ./README.md -------------------------------------------------------------------------------- /src/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | 数据模型定义 3 | 定义所有API请求和响应的数据模型 4 | """ 5 | from pydantic import BaseModel 6 | from typing import List, Dict, Optional, Union, Any 7 | 8 | class ImageUrl(BaseModel): 9 | """Image URL model for vision content""" 10 | url: str 11 | detail: Optional[str] = "auto" 12 | 13 | class ContentPart(BaseModel): 14 | """Content part model for OpenAI's new content format""" 15 | type: str 16 | text: Optional[str] = None 17 | image_url: Optional[ImageUrl] = None 18 | 19 | class Message(BaseModel): 20 | role: str 21 | content: Optional[Union[str, List[ContentPart]]] = None 22 | tool_call_id: Optional[str] = None # 用于tool消息 23 | tool_calls: Optional[List[Dict[str, Any]]] = None # 用于assistant消息 24 | 25 | class FunctionParameters(BaseModel): 26 | """Function parameters schema""" 27 | type: str = "object" 28 | properties: Dict[str, Any] = {} 29 | required: Optional[List[str]] = None 30 | 31 | class FunctionDefinition(BaseModel): 32 | """Function definition""" 33 | name: str 34 | description: Optional[str] = None 35 | parameters: Optional[FunctionParameters] = None 36 | 37 | class ToolDefinition(BaseModel): 38 | """Tool definition""" 39 | type: str = "function" 40 | function: FunctionDefinition 41 | 42 | class ToolChoice(BaseModel): 43 | """Tool choice specification""" 44 | type: str = "function" 45 | function: Dict[str, str] # {"name": "tool_name"} 46 | 47 | class ChatCompletionRequest(BaseModel): 48 | model: str = "MBZUAI-IFM/K2-Think" 49 | messages: List[Message] 50 | stream: bool = False 51 | temperature: float = 0.7 52 | max_tokens: Optional[int] = None 53 | top_p: Optional[float] = None 54 | frequency_penalty: Optional[float] = None 55 | presence_penalty: Optional[float] = None 56 | stop: Optional[Union[str, List[str]]] = None 57 | # 工具调用相关字段 58 | tools: Optional[List[ToolDefinition]] = None 59 | tool_choice: Optional[Union[str, ToolChoice]] = None # "auto", "none", 或指定工具 60 | 61 | class ModelInfo(BaseModel): 62 | id: str 63 | object: str = "model" 64 | created: int 65 | owned_by: str 66 | permission: List[Dict] = [] 67 | root: str 68 | parent: Optional[str] = None 69 | 70 | class ModelsResponse(BaseModel): 71 | object: str = "list" 72 | data: List[ModelInfo] -------------------------------------------------------------------------------- /src/constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | 常量定义模块 3 | 统一管理所有魔法数字和硬编码字符串 4 | """ 5 | 6 | # API相关常量 7 | class APIConstants: 8 | MODEL_ID = "MBZUAI-IFM/K2-Think" 9 | MODEL_ID_NOTHINK = "MBZUAI-IFM/K2-Think-nothink" 10 | MODEL_OWNER = "MBZUAI" 11 | MODEL_ROOT = "mbzuai-k2-think-2508" 12 | 13 | # HTTP状态码 14 | HTTP_OK = 200 15 | HTTP_UNAUTHORIZED = 401 16 | HTTP_NOT_FOUND = 404 17 | HTTP_INTERNAL_ERROR = 500 18 | HTTP_GATEWAY_TIMEOUT = 504 19 | 20 | # 认证相关 21 | BEARER_PREFIX = "Bearer " 22 | BEARER_PREFIX_LENGTH = 7 23 | 24 | # 响应相关常量 25 | class ResponseConstants: 26 | CHAT_COMPLETION_OBJECT = "chat.completion" 27 | CHAT_COMPLETION_CHUNK_OBJECT = "chat.completion.chunk" 28 | MODEL_OBJECT = "model" 29 | LIST_OBJECT = "list" 30 | 31 | # 完成原因 32 | FINISH_REASON_STOP = "stop" 33 | FINISH_REASON_ERROR = "error" 34 | 35 | # 流式响应标记 36 | STREAM_DONE_MARKER = "data: [DONE]\n\n" 37 | STREAM_DATA_PREFIX = "data: " 38 | 39 | # 内容处理相关常量 40 | class ContentConstants: 41 | # XML标签 42 | THINK_START_TAG = "" 43 | THINK_END_TAG = "" 44 | ANSWER_START_TAG = "" 45 | ANSWER_END_TAG = "" 46 | 47 | # 内容类型 48 | TEXT_TYPE = "text" 49 | IMAGE_URL_TYPE = "image_url" 50 | 51 | # 图像占位符 52 | IMAGE_PLACEHOLDER = "[图像内容]" 53 | 54 | # 默认值 55 | DEFAULT_USER_NAME = "User" 56 | DEFAULT_USER_LOCATION = "Unknown" 57 | DEFAULT_USER_LANGUAGE = "en-US" 58 | DEFAULT_TIMEZONE = "Asia/Shanghai" 59 | 60 | # 错误消息常量 61 | class ErrorMessages: 62 | INVALID_API_KEY = "Invalid API key provided" 63 | AUTHENTICATION_ERROR = "authentication_error" 64 | UPSTREAM_ERROR = "upstream_error" 65 | TIMEOUT_ERROR = "timeout_error" 66 | API_ERROR = "api_error" 67 | 68 | # 中文错误消息 69 | REQUEST_TIMEOUT = "请求超时" 70 | SERIALIZATION_FAILED = "请求数据序列化失败" 71 | UPSTREAM_SERVICE_ERROR = "上游服务错误" 72 | 73 | # 日志消息常量 74 | class LogMessages: 75 | MESSAGE_RECEIVED = "📥 接收到的原始消息数: {}" 76 | ROLE_DISTRIBUTION = "📊 {}消息角色分布: {}" 77 | JSON_VALIDATION_SUCCESS = "✅ K2Think请求体JSON序列化验证通过" 78 | JSON_VALIDATION_FAILED = "❌ K2Think请求体JSON序列化失败: {}" 79 | JSON_FIXED = "🔧 使用default=str修复了序列化问题" 80 | 81 | # 动态chunk计算日志 82 | DYNAMIC_CHUNK_CALC = "动态chunk_size计算: 内容长度={}, 计算值={}, 最终值={}" 83 | 84 | # HTTP头常量 85 | class HeaderConstants: 86 | AUTHORIZATION = "Authorization" 87 | CONTENT_TYPE = "Content-Type" 88 | ACCEPT = "Accept" 89 | ORIGIN = "Origin" 90 | REFERER = "Referer" 91 | USER_AGENT = "User-Agent" 92 | CACHE_CONTROL = "Cache-Control" 93 | CONNECTION = "Connection" 94 | X_ACCEL_BUFFERING = "X-Accel-Buffering" 95 | 96 | # 值 97 | APPLICATION_JSON = "application/json" 98 | TEXT_EVENT_STREAM = "text/event-stream" 99 | EVENT_STREAM_JSON = "text/event-stream,application/json" 100 | NO_CACHE = "no-cache" 101 | KEEP_ALIVE = "keep-alive" 102 | NO_BUFFERING = "no" 103 | 104 | # User-Agent值 105 | DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0" 106 | 107 | # 时间相关常量 108 | class TimeConstants: 109 | # 时间格式 110 | DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S" 111 | DATE_FORMAT = "%Y-%m-%d" 112 | TIME_FORMAT = "%H:%M:%S" 113 | WEEKDAY_FORMAT = "%A" 114 | 115 | # 微秒转换 116 | MICROSECONDS_MULTIPLIER = 1000000 117 | 118 | # 数值常量 119 | class NumericConstants: 120 | # chunk大小限制 121 | MIN_CHUNK_SIZE = 50 122 | 123 | # 内容预览长度 124 | CONTENT_PREVIEW_LENGTH = 200 125 | CONTENT_PREVIEW_SUFFIX = "..." 126 | 127 | # 默认token使用量 128 | DEFAULT_PROMPT_TOKENS = 0 129 | DEFAULT_COMPLETION_TOKENS = 0 130 | DEFAULT_TOTAL_TOKENS = 0 -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | 工具函数模块 3 | 提供通用的工具函数 4 | """ 5 | import logging 6 | import sys 7 | 8 | def safe_log_error(logger: logging.Logger, message: str, exception: Exception = None): 9 | """ 10 | 安全地记录错误日志,避免编码问题 11 | 12 | Args: 13 | logger: 日志记录器 14 | message: 错误消息 15 | exception: 异常对象(可选) 16 | """ 17 | try: 18 | # 确保消息是字符串类型 19 | if not isinstance(message, str): 20 | message = str(message) 21 | 22 | if exception: 23 | # 安全地处理异常信息,避免编码问题 24 | try: 25 | error_msg = str(exception) 26 | # 处理可能的编码问题 27 | if isinstance(error_msg, bytes): 28 | error_msg = error_msg.decode('utf-8', errors='replace') 29 | else: 30 | error_msg = error_msg.encode('utf-8', errors='replace').decode('utf-8') 31 | except Exception: 32 | error_msg = repr(exception) 33 | 34 | full_message = f"{message}: {error_msg}" 35 | else: 36 | full_message = message 37 | 38 | # 确保消息本身也是安全的 39 | try: 40 | if isinstance(full_message, bytes): 41 | safe_message = full_message.decode('utf-8', errors='replace') 42 | else: 43 | safe_message = full_message.encode('utf-8', errors='replace').decode('utf-8') 44 | except Exception: 45 | safe_message = repr(full_message) 46 | 47 | logger.error(safe_message) 48 | 49 | except Exception as e: 50 | # 如果连安全日志都失败了,使用最基本的方式记录 51 | try: 52 | fallback_msg = f"Logging error: {repr(e)}, Original: {repr(message)}" 53 | logger.error(fallback_msg) 54 | except Exception: 55 | # 最后的保险措施 - 直接打印到控制台 56 | try: 57 | print(f"CRITICAL LOGGING FAILURE: {repr(message)}", file=sys.stderr) 58 | except Exception: 59 | pass # 如果连print都失败了,就放弃 60 | 61 | def safe_log_info(logger: logging.Logger, message: str): 62 | """ 63 | 安全地记录信息日志,避免编码问题 64 | 65 | Args: 66 | logger: 日志记录器 67 | message: 信息消息 68 | """ 69 | try: 70 | # 确保消息是字符串类型 71 | if not isinstance(message, str): 72 | message = str(message) 73 | 74 | # 确保消息是安全的 75 | try: 76 | if isinstance(message, bytes): 77 | safe_message = message.decode('utf-8', errors='replace') 78 | else: 79 | safe_message = message.encode('utf-8', errors='replace').decode('utf-8') 80 | except Exception: 81 | safe_message = repr(message) 82 | 83 | logger.info(safe_message) 84 | 85 | except Exception as e: 86 | try: 87 | fallback_msg = f"Logging info error: {repr(e)}, Original: {repr(message)}" 88 | logger.info(fallback_msg) 89 | except Exception: 90 | try: 91 | print(f"CRITICAL INFO LOGGING FAILURE: {repr(message)}", file=sys.stderr) 92 | except Exception: 93 | pass 94 | 95 | def safe_log_warning(logger: logging.Logger, message: str): 96 | """ 97 | 安全地记录警告日志,避免编码问题 98 | 99 | Args: 100 | logger: 日志记录器 101 | message: 警告消息 102 | """ 103 | try: 104 | # 确保消息是字符串类型 105 | if not isinstance(message, str): 106 | message = str(message) 107 | 108 | # 确保消息是安全的 109 | try: 110 | if isinstance(message, bytes): 111 | safe_message = message.decode('utf-8', errors='replace') 112 | else: 113 | safe_message = message.encode('utf-8', errors='replace').decode('utf-8') 114 | except Exception: 115 | safe_message = repr(message) 116 | 117 | logger.warning(safe_message) 118 | 119 | except Exception as e: 120 | try: 121 | fallback_msg = f"Logging warning error: {repr(e)}, Original: {repr(message)}" 122 | logger.warning(fallback_msg) 123 | except Exception: 124 | try: 125 | print(f"CRITICAL WARNING LOGGING FAILURE: {repr(message)}", file=sys.stderr) 126 | except Exception: 127 | pass 128 | 129 | def safe_str(obj) -> str: 130 | """ 131 | 安全地将对象转换为字符串,避免编码问题 132 | 133 | Args: 134 | obj: 要转换的对象 135 | 136 | Returns: 137 | str: 安全的字符串表示 138 | """ 139 | try: 140 | if isinstance(obj, bytes): 141 | return obj.decode('utf-8', errors='replace') 142 | elif isinstance(obj, str): 143 | return obj.encode('utf-8', errors='replace').decode('utf-8') 144 | else: 145 | return str(obj).encode('utf-8', errors='replace').decode('utf-8') 146 | except Exception: 147 | return repr(obj) -------------------------------------------------------------------------------- /src/toolify/parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Toolify XML 解析器 3 | 解析模型响应中的工具调用XML格式 4 | """ 5 | 6 | import re 7 | import json 8 | import logging 9 | from typing import List, Dict, Any, Optional 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | def remove_think_blocks(text: str) -> str: 15 | """ 16 | 临时移除所有 ... 块用于XML解析 17 | 支持嵌套think标签 18 | 注意:此函数仅用于临时解析,不影响返回给用户的原始内容 19 | """ 20 | while '' in text and '' in text: 21 | start_pos = text.find('') 22 | if start_pos == -1: 23 | break 24 | 25 | pos = start_pos + 7 26 | depth = 1 27 | 28 | while pos < len(text) and depth > 0: 29 | if text[pos:pos+7] == '': 30 | depth += 1 31 | pos += 7 32 | elif text[pos:pos+8] == '': 33 | depth -= 1 34 | pos += 8 35 | else: 36 | pos += 1 37 | 38 | if depth == 0: 39 | text = text[:start_pos] + text[pos:] 40 | else: 41 | break 42 | 43 | return text 44 | 45 | 46 | def parse_function_calls_xml(xml_string: str, trigger_signal: str) -> Optional[List[Dict[str, Any]]]: 47 | """ 48 | 增强型XML解析函数,支持动态触发信号 49 | 50 | 1. 保留 ... 块(它们应正常返回给用户) 51 | 2. 解析时临时移除think块,防止干扰XML解析 52 | 3. 查找触发信号的最后一次出现 53 | 4. 从最后一个触发信号开始解析function_calls 54 | 55 | Args: 56 | xml_string: 包含XML的响应字符串 57 | trigger_signal: 触发信号字符串 58 | 59 | Returns: 60 | 解析出的工具调用列表,格式为 [{"name": "tool_name", "args": {...}}, ...] 61 | 如果没有找到工具调用,返回None 62 | """ 63 | logger.debug(f"[TOOLIFY] 开始解析XML,输入长度: {len(xml_string) if xml_string else 0}") 64 | logger.debug(f"[TOOLIFY] 使用触发信号: {trigger_signal[:20]}...") 65 | 66 | if not xml_string or trigger_signal not in xml_string: 67 | logger.debug(f"[TOOLIFY] 输入为空或不包含触发信号") 68 | return None 69 | 70 | # 临时移除think块用于解析 71 | cleaned_content = remove_think_blocks(xml_string) 72 | logger.debug(f"[TOOLIFY] 移除think块后内容长度: {len(cleaned_content)}") 73 | 74 | # 查找所有触发信号位置 75 | signal_positions = [] 76 | start_pos = 0 77 | while True: 78 | pos = cleaned_content.find(trigger_signal, start_pos) 79 | if pos == -1: 80 | break 81 | signal_positions.append(pos) 82 | start_pos = pos + 1 83 | 84 | if not signal_positions: 85 | logger.debug(f"[TOOLIFY] 在清理后的内容中未找到触发信号") 86 | return None 87 | 88 | logger.debug(f"[TOOLIFY] 找到 {len(signal_positions)} 个触发信号位置: {signal_positions}") 89 | 90 | # 使用最后一个触发信号位置 91 | last_signal_pos = signal_positions[-1] 92 | content_after_signal = cleaned_content[last_signal_pos:] 93 | logger.debug(f"[TOOLIFY] 从最后触发信号开始的内容: {repr(content_after_signal[:100])}") 94 | 95 | # 查找function_calls标签 96 | calls_content_match = re.search(r"([\s\S]*?)", content_after_signal) 97 | if not calls_content_match: 98 | logger.warning(f"[TOOLIFY] 未找到function_calls标签!内容: {repr(content_after_signal[:300])}") 99 | # 检查是否有不完整的function_calls开始标签 100 | if "([\s\S]*?)", calls_content) 110 | logger.debug(f"[TOOLIFY] 找到 {len(call_blocks)} 个function_call块") 111 | 112 | for i, block in enumerate(call_blocks): 113 | logger.debug(f"[TOOLIFY] 处理function_call #{i+1}: {repr(block)}") 114 | 115 | # 提取tool名称 116 | tool_match = re.search(r"(.*?)", block) 117 | if not tool_match: 118 | logger.debug(f"[TOOLIFY] 块 #{i+1} 中未找到tool标签") 119 | continue 120 | 121 | name = tool_match.group(1).strip() 122 | args = {} 123 | 124 | # 提取args块 125 | args_block_match = re.search(r"([\s\S]*?)", block) 126 | if args_block_match: 127 | args_content = args_block_match.group(1) 128 | # 支持包含连字符的参数标签名(如-i, -A);匹配任何非空格、非'>'、非'/'字符 129 | arg_matches = re.findall(r"<([^\s>/]+)>([\s\S]*?)", args_content) 130 | 131 | def _coerce_value(v: str): 132 | """尝试将字符串值转换为JSON对象""" 133 | try: 134 | return json.loads(v) 135 | except Exception: 136 | pass 137 | return v 138 | 139 | for k, v in arg_matches: 140 | args[k] = _coerce_value(v) 141 | 142 | result = {"name": name, "args": args} 143 | results.append(result) 144 | logger.debug(f"[TOOLIFY] 添加工具调用: {result}") 145 | 146 | logger.debug(f"[TOOLIFY] 最终解析结果: {results}") 147 | return results if results else None 148 | 149 | -------------------------------------------------------------------------------- /src/toolify_handler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Toolify 请求和响应处理模块 3 | 处理工具调用相关的请求预处理和响应解析 4 | """ 5 | 6 | import json 7 | import logging 8 | import uuid 9 | from typing import Dict, Any, List, Optional 10 | 11 | from src.toolify_config import get_toolify, is_toolify_enabled 12 | from src.toolify.prompt import generate_function_prompt, safe_process_tool_choice 13 | from src.toolify.parser import parse_function_calls_xml 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def should_enable_toolify(request_dict: Dict[str, Any]) -> bool: 19 | """ 20 | 判断是否应该为当前请求启用工具调用功能 21 | 22 | Args: 23 | request_dict: 请求字典 24 | 25 | Returns: 26 | 是否启用工具调用 27 | """ 28 | if not is_toolify_enabled(): 29 | return False 30 | 31 | # 检查请求中是否包含tools 32 | has_tools = request_dict.get("tools") and len(request_dict.get("tools", [])) > 0 33 | 34 | return has_tools 35 | 36 | 37 | def prepare_toolify_request(request_dict: Dict[str, Any], messages: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], bool]: 38 | """ 39 | 准备带工具调用的请求 40 | 41 | Args: 42 | request_dict: 原始请求字典 43 | messages: 消息列表 44 | 45 | Returns: 46 | (处理后的消息列表, 是否启用了工具调用) 47 | """ 48 | toolify = get_toolify() 49 | if not toolify: 50 | return messages, False 51 | 52 | tools = request_dict.get("tools") 53 | if not tools or len(tools) == 0: 54 | return messages, False 55 | 56 | logger.info(f"[TOOLIFY] 检测到 {len(tools)} 个工具定义,启用工具调用功能") 57 | 58 | # 预处理消息(转换tool和tool_calls) 59 | processed_messages = toolify.preprocess_messages(messages) 60 | logger.debug(f"[TOOLIFY] 消息预处理完成: {len(messages)} -> {len(processed_messages)}") 61 | 62 | # 生成工具调用提示词 63 | from src.config import Config 64 | function_prompt, trigger_signal = generate_function_prompt( 65 | tools, 66 | toolify.trigger_signal, 67 | Config.TOOLIFY_CUSTOM_PROMPT 68 | ) 69 | 70 | # 处理 tool_choice 71 | tool_choice = request_dict.get("tool_choice") 72 | tool_choice_prompt = safe_process_tool_choice(tool_choice) 73 | if tool_choice_prompt: 74 | function_prompt += tool_choice_prompt 75 | 76 | # 在消息开头注入系统提示词 77 | system_message = {"role": "system", "content": function_prompt} 78 | processed_messages.insert(0, system_message) 79 | 80 | logger.debug(f"[TOOLIFY] 已注入工具调用系统提示词,消息数: {len(processed_messages)}") 81 | 82 | return processed_messages, True 83 | 84 | 85 | def parse_toolify_response(content: str, model: str) -> Optional[Dict[str, Any]]: 86 | """ 87 | 解析响应中的工具调用 88 | 89 | Args: 90 | content: 响应内容 91 | model: 模型名称 92 | 93 | Returns: 94 | 如果检测到工具调用,返回包含tool_calls的响应字典;否则返回None 95 | """ 96 | toolify = get_toolify() 97 | if not toolify: 98 | return None 99 | 100 | logger.debug(f"[TOOLIFY] 开始解析响应中的工具调用,内容长度: {len(content)}") 101 | 102 | # 解析 XML 格式的工具调用 103 | parsed_tools = parse_function_calls_xml(content, toolify.trigger_signal) 104 | 105 | if not parsed_tools: 106 | logger.debug("[TOOLIFY] 未检测到工具调用") 107 | return None 108 | 109 | logger.info(f"[TOOLIFY] 检测到 {len(parsed_tools)} 个工具调用") 110 | 111 | # 转换为 OpenAI 格式 112 | tool_calls = toolify.convert_parsed_tools_to_openai_format(parsed_tools) 113 | 114 | return { 115 | "tool_calls": tool_calls, 116 | "content": None, 117 | "role": "assistant" 118 | } 119 | 120 | 121 | def format_toolify_response_for_stream(parsed_tools: List[Dict[str, Any]], model: str, chat_id: str) -> List[str]: 122 | """ 123 | 格式化工具调用为流式响应块 124 | 125 | Args: 126 | parsed_tools: 解析出的工具列表 127 | model: 模型名称 128 | chat_id: 会话ID 129 | 130 | Returns: 131 | SSE格式的响应块列表 132 | """ 133 | toolify = get_toolify() 134 | if not toolify: 135 | return [] 136 | 137 | tool_calls = toolify.convert_parsed_tools_to_openai_format(parsed_tools) 138 | chunks: List[str] = [] 139 | 140 | # 初始块 - 发送角色和tool_calls 141 | initial_chunk = { 142 | "id": chat_id, 143 | "object": "chat.completion.chunk", 144 | "created": int(uuid.uuid4().time_low), 145 | "model": model, 146 | "choices": [{ 147 | "index": 0, 148 | "delta": { 149 | "role": "assistant", 150 | "content": None, 151 | "tool_calls": tool_calls 152 | }, 153 | "finish_reason": None 154 | }], 155 | } 156 | chunks.append(f"data: {json.dumps(initial_chunk)}\n\n") 157 | 158 | # 结束块 159 | final_chunk = { 160 | "id": chat_id, 161 | "object": "chat.completion.chunk", 162 | "created": int(uuid.uuid4().time_low), 163 | "model": model, 164 | "choices": [{ 165 | "index": 0, 166 | "delta": {}, 167 | "finish_reason": "tool_calls" 168 | }], 169 | } 170 | chunks.append(f"data: {json.dumps(final_chunk)}\n\n") 171 | chunks.append("data: [DONE]\n\n") 172 | 173 | return chunks 174 | 175 | -------------------------------------------------------------------------------- /src/toolify/detector.py: -------------------------------------------------------------------------------- 1 | """ 2 | Toolify 流式检测器 3 | 用于在流式响应中检测工具调用 4 | """ 5 | 6 | import logging 7 | from typing import Optional, List, Dict, Any 8 | from .parser import parse_function_calls_xml 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class StreamingFunctionCallDetector: 14 | """ 15 | 增强型流式函数调用检测器,支持动态触发信号,避免在标签内误判 16 | 17 | 核心特性: 18 | 1. 避免在块内触发工具调用检测 19 | 2. 正常输出块内容给用户 20 | 3. 支持嵌套think标签 21 | """ 22 | 23 | def __init__(self, trigger_signal: str): 24 | self.trigger_signal = trigger_signal 25 | self.reset() 26 | 27 | def reset(self): 28 | """重置检测器状态""" 29 | self.content_buffer = "" 30 | self.state = "detecting" # detecting, signal_detected, tool_parsing 31 | self.in_think_block = False 32 | self.think_depth = 0 33 | self.signal = self.trigger_signal 34 | self.signal_len = len(self.signal) 35 | self.signal_position = -1 # 记录触发信号的位置 36 | 37 | def process_chunk(self, delta_content: str) -> tuple[bool, str]: 38 | """ 39 | 处理流式内容块 40 | 41 | Args: 42 | delta_content: 新的内容块 43 | 44 | Returns: 45 | (is_tool_call_detected, content_to_yield): 是否检测到工具调用,以及应该输出的内容 46 | """ 47 | if not delta_content: 48 | return False, "" 49 | 50 | self.content_buffer += delta_content 51 | content_to_yield = "" 52 | 53 | if self.state == "tool_parsing": 54 | # 已经在解析工具调用,继续累积内容 55 | logger.debug(f"[TOOLIFY-DETECTOR] 状态已是tool_parsing,继续累积,缓冲区长度: {len(self.content_buffer)}") 56 | return False, "" 57 | 58 | if self.state == "signal_detected": 59 | # 已检测到触发信号,等待标签 60 | logger.debug(f"[TOOLIFY-DETECTOR] 状态是signal_detected,检查是否有,缓冲区长度: {len(self.content_buffer)}") 61 | if "" in self.content_buffer: 62 | logger.debug(f"[TOOLIFY-DETECTOR] 确认有标签,进入tool_parsing状态") 63 | self.state = "tool_parsing" 64 | return True, "" 65 | elif len(self.content_buffer) > 300: 66 | # 触发信号后300字符内还没有,认为是误判 67 | logger.debug(f"[TOOLIFY-DETECTOR] 触发信号后300字符内未发现,视为误判,恢复正常输出") 68 | self.state = "detecting" 69 | # 输出所有缓冲的内容 70 | output = self.content_buffer 71 | self.content_buffer = "" 72 | self.signal_position = -1 73 | return False, output 74 | else: 75 | # 继续等待 76 | return False, "" 77 | 78 | if delta_content: 79 | logger.debug(f"[TOOLIFY-DETECTOR] 处理块: {repr(delta_content[:50])}{'...' if len(delta_content) > 50 else ''}, 缓冲区长度: {len(self.content_buffer)}, think状态: {self.in_think_block}") 80 | 81 | i = 0 82 | while i < len(self.content_buffer): 83 | # 更新think状态 84 | skip_chars = self._update_think_state(i) 85 | if skip_chars > 0: 86 | for j in range(skip_chars): 87 | if i + j < len(self.content_buffer): 88 | content_to_yield += self.content_buffer[i + j] 89 | i += skip_chars 90 | continue 91 | 92 | # 在非think块中检测触发信号 93 | if not self.in_think_block and self._can_detect_signal_at(i): 94 | if self.content_buffer[i:i+self.signal_len] == self.signal: 95 | # 检测到触发信号 96 | logger.debug(f"[TOOLIFY-DETECTOR] 在非think块中检测到触发信号! 信号: {self.signal[:20]}...") 97 | logger.debug(f"[TOOLIFY-DETECTOR] 触发信号位置: {i}, think状态: {self.in_think_block}, think深度: {self.think_depth}") 98 | 99 | # 输出触发信号之前的内容 100 | # 保留触发信号及之后的内容在缓冲区,进入signal_detected状态等待验证 101 | self.state = "signal_detected" 102 | self.signal_position = 0 # 触发信号现在在缓冲区开头 103 | self.content_buffer = self.content_buffer[i:] 104 | logger.debug(f"[TOOLIFY-DETECTOR] 进入signal_detected状态,等待标签") 105 | return False, content_to_yield 106 | 107 | # 如果剩余内容不足以判断,保留在缓冲区 108 | remaining_len = len(self.content_buffer) - i 109 | if remaining_len < self.signal_len or remaining_len < 8: 110 | break 111 | 112 | content_to_yield += self.content_buffer[i] 113 | i += 1 114 | 115 | self.content_buffer = self.content_buffer[i:] 116 | return False, content_to_yield 117 | 118 | def _update_think_state(self, pos: int): 119 | """更新think标签状态,支持嵌套""" 120 | remaining = self.content_buffer[pos:] 121 | 122 | if remaining.startswith(''): 123 | self.think_depth += 1 124 | self.in_think_block = True 125 | logger.debug(f"[TOOLIFY-DETECTOR] 进入think块,深度: {self.think_depth}") 126 | return 7 127 | 128 | elif remaining.startswith(''): 129 | self.think_depth = max(0, self.think_depth - 1) 130 | self.in_think_block = self.think_depth > 0 131 | logger.debug(f"[TOOLIFY-DETECTOR] 退出think块,深度: {self.think_depth}") 132 | return 8 133 | 134 | return 0 135 | 136 | def _can_detect_signal_at(self, pos: int) -> bool: 137 | """检查是否可以在指定位置检测信号""" 138 | return (pos + self.signal_len <= len(self.content_buffer) and 139 | not self.in_think_block) 140 | 141 | def finalize(self) -> tuple[Optional[List[Dict[str, Any]]], str]: 142 | """ 143 | 流结束时的最终处理 144 | 145 | Returns: 146 | (parsed_tools, remaining_content): 解析出的工具调用和剩余未输出的内容 147 | """ 148 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 当前状态: {self.state}, 缓冲区长度: {len(self.content_buffer)}") 149 | 150 | if self.state == "tool_parsing": 151 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 缓冲区内容前500字符: {repr(self.content_buffer[:500])}") 152 | result = parse_function_calls_xml(self.content_buffer, self.trigger_signal) 153 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 解析结果: {result}") 154 | return result, "" 155 | 156 | elif self.state == "signal_detected": 157 | # 流结束时还在等待标签,说明模型输出了触发信号但没有完整的工具调用 158 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 流结束但状态是signal_detected,可能是不完整的工具调用") 159 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 缓冲区内容: {repr(self.content_buffer[:300])}") 160 | # 尝试解析,如果失败就把缓冲区内容作为普通文本返回 161 | result = parse_function_calls_xml(self.content_buffer, self.trigger_signal) 162 | if result: 163 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 成功解析出工具调用: {result}") 164 | return result, "" 165 | else: 166 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 解析失败,返回缓冲区内容作为普通文本") 167 | return None, self.content_buffer 168 | 169 | # detecting状态:没有检测到工具调用,返回缓冲区中剩余的内容 170 | if self.content_buffer: 171 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 状态是detecting,返回缓冲区内容: {repr(self.content_buffer[:100])}") 172 | else: 173 | logger.debug(f"[TOOLIFY-DETECTOR] finalize() - 状态是detecting,缓冲区为空") 174 | return None, self.content_buffer 175 | 176 | -------------------------------------------------------------------------------- /src/toolify/prompt.py: -------------------------------------------------------------------------------- 1 | """ 2 | Toolify 提示词生成器 3 | 生成工具调用的系统提示词 4 | """ 5 | 6 | import json 7 | import logging 8 | from typing import List, Dict, Any 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | def get_function_call_prompt_template(trigger_signal: str, custom_template: str = None) -> str: 14 | """ 15 | 基于动态触发信号生成提示词模板 16 | 17 | Args: 18 | trigger_signal: 触发信号字符串 19 | custom_template: 自定义模板(可选) 20 | 21 | Returns: 22 | 提示词模板字符串 23 | """ 24 | if custom_template: 25 | logger.info("[TOOLIFY] 使用配置中的自定义提示词模板") 26 | return custom_template.format( 27 | trigger_signal=trigger_signal, 28 | tools_list="{tools_list}" 29 | ) 30 | 31 | return f""" 32 | 你可以访问以下可用工具来帮助解决问题: 33 | 34 | {{tools_list}} 35 | 36 | **重要上下文说明:** 37 | 1. 如果需要,你可以在单次响应中调用多个工具。 38 | 2. 对话上下文中可能已包含之前函数调用的工具执行结果。请仔细查看对话历史,避免不必要的重复工具调用。 39 | 3. 当工具执行结果出现在上下文中时,它们将使用 ... 这样的XML标签格式化,便于识别。 40 | 4. 这是你可以使用的唯一工具调用格式,任何偏差都将导致失败。 41 | 42 | 当你需要使用工具时,你**必须**严格遵循以下格式。不要在工具调用语法的第一行和第二行包含任何额外的文本、解释或对话: 43 | 44 | 1. 开始工具调用时,在新行上准确输出: 45 | {trigger_signal} 46 | 不要有前导或尾随空格,完全按照上面显示的输出。触发信号必须单独占一行,且只出现一次。 47 | 48 | 2. 从第二行开始,**立即**紧跟完整的 XML块。 49 | 50 | 3. 对于多个工具调用,在同一个 包装器中包含多个 块。 51 | 52 | 4. 在结束标签 后不要添加任何文本或解释。 53 | 54 | 严格的参数键规则: 55 | - 你必须使用**完全相同**的参数键(区分大小写和标点符号)。不要重命名、添加或删除字符。 56 | - 如果键以连字符开头(例如 -i, -C),你必须在标签名中保留连字符。例如:<-i>true, <-C>2。 57 | - 永远不要将 "-i" 转换为 "i" 或将 "-C" 转换为 "C"。不要复数化、翻译或给参数键起别名。 58 | - 标签必须包含列表中某个工具的确切名称。任何其他工具名称都是无效的。 59 | - 必须包含该工具的所有必需参数。 60 | 61 | 正确示例(多个工具调用,包括带连字符的键): 62 | ...响应内容(可选)... 63 | {trigger_signal} 64 | 65 | 66 | search 67 | 68 | ["Python Document", "how to use python"] 69 | 70 | 71 | 72 | 73 | 现在请准备好严格遵循以上规范。 74 | """ 75 | 76 | 77 | def generate_function_prompt(tools: List[Dict[str, Any]], trigger_signal: str, custom_template: str = None) -> tuple[str, str]: 78 | """ 79 | 基于客户端请求中的工具定义生成注入的系统提示词 80 | 81 | Args: 82 | tools: 工具定义列表(OpenAI格式) 83 | trigger_signal: 触发信号 84 | custom_template: 自定义模板(可选) 85 | 86 | Returns: 87 | (prompt_content, trigger_signal): 提示词内容和触发信号 88 | """ 89 | tools_list_str = [] 90 | for i, tool in enumerate(tools): 91 | func = tool.get("function", {}) 92 | name = func.get("name", "") 93 | description = func.get("description", "") 94 | 95 | # 读取 JSON Schema 字段 96 | schema: Dict[str, Any] = func.get("parameters", {}) or {} 97 | props: Dict[str, Any] = schema.get("properties", {}) or {} 98 | required_list: List[str] = schema.get("required", []) or [] 99 | 100 | # 简要摘要行:name (type) 101 | params_summary = ", ".join([ 102 | f"{p_name} ({(p_info or {}).get('type', 'any')})" for p_name, p_info in props.items() 103 | ]) or "None" 104 | 105 | # 构建详细参数规范 106 | detail_lines: List[str] = [] 107 | for p_name, p_info in props.items(): 108 | p_info = p_info or {} 109 | p_type = p_info.get("type", "any") 110 | is_required = "Yes" if p_name in required_list else "No" 111 | p_desc = p_info.get("description") 112 | enum_vals = p_info.get("enum") 113 | default_val = p_info.get("default") 114 | examples_val = p_info.get("examples") or p_info.get("example") 115 | 116 | # 常见约束和提示 117 | constraints: Dict[str, Any] = {} 118 | for key in [ 119 | "minimum", "maximum", "exclusiveMinimum", "exclusiveMaximum", 120 | "minLength", "maxLength", "pattern", "format", 121 | "minItems", "maxItems", "uniqueItems" 122 | ]: 123 | if key in p_info: 124 | constraints[key] = p_info.get(key) 125 | 126 | # 数组项类型提示 127 | if p_type == "array": 128 | items = p_info.get("items") or {} 129 | if isinstance(items, dict): 130 | itype = items.get("type") 131 | if itype: 132 | constraints["items.type"] = itype 133 | 134 | # 组合详细行 135 | detail_lines.append(f"- {p_name}:") 136 | detail_lines.append(f" - type: {p_type}") 137 | detail_lines.append(f" - required: {is_required}") 138 | if p_desc: 139 | detail_lines.append(f" - description: {p_desc}") 140 | if enum_vals is not None: 141 | try: 142 | detail_lines.append(f" - enum: {json.dumps(enum_vals, ensure_ascii=False)}") 143 | except Exception: 144 | detail_lines.append(f" - enum: {enum_vals}") 145 | if default_val is not None: 146 | try: 147 | detail_lines.append(f" - default: {json.dumps(default_val, ensure_ascii=False)}") 148 | except Exception: 149 | detail_lines.append(f" - default: {default_val}") 150 | if examples_val is not None: 151 | try: 152 | detail_lines.append(f" - examples: {json.dumps(examples_val, ensure_ascii=False)}") 153 | except Exception: 154 | detail_lines.append(f" - examples: {examples_val}") 155 | if constraints: 156 | try: 157 | detail_lines.append(f" - constraints: {json.dumps(constraints, ensure_ascii=False)}") 158 | except Exception: 159 | detail_lines.append(f" - constraints: {constraints}") 160 | 161 | detail_block = "\n".join(detail_lines) if detail_lines else "(无参数详情)" 162 | 163 | desc_block = f"```\n{description}\n```" if description else "None" 164 | 165 | tools_list_str.append( 166 | f"{i + 1}. \n" 167 | f" 描述:\n{desc_block}\n" 168 | f" 参数摘要: {params_summary}\n" 169 | f" 必需参数: {', '.join(required_list) if required_list else 'None'}\n" 170 | f" 参数详情:\n{detail_block}" 171 | ) 172 | 173 | prompt_template = get_function_call_prompt_template(trigger_signal, custom_template) 174 | prompt_content = prompt_template.replace("{tools_list}", "\n\n".join(tools_list_str)) 175 | 176 | return prompt_content, trigger_signal 177 | 178 | 179 | def safe_process_tool_choice(tool_choice) -> str: 180 | """ 181 | 安全处理tool_choice字段,避免类型错误 182 | 183 | Args: 184 | tool_choice: tool_choice参数(可能是字符串或对象) 185 | 186 | Returns: 187 | 附加的提示词内容 188 | """ 189 | try: 190 | if tool_choice is None: 191 | return "" 192 | 193 | if isinstance(tool_choice, str): 194 | if tool_choice == "none": 195 | return "\n\n**重要提示:** 本轮你被禁止使用任何工具。请像普通聊天助手一样响应,直接回答用户的问题。" 196 | else: 197 | logger.debug(f"[TOOLIFY] 未知的tool_choice字符串值: {tool_choice}") 198 | return "" 199 | 200 | elif hasattr(tool_choice, 'function') and hasattr(tool_choice.function, 'name'): 201 | required_tool_name = tool_choice.function.name 202 | return f"\n\n**重要提示:** 本轮你必须**仅**使用名为 `{required_tool_name}` 的工具。生成必要的参数并按指定的XML格式输出。" 203 | 204 | else: 205 | logger.debug(f"[TOOLIFY] 不支持的tool_choice类型: {type(tool_choice)}") 206 | return "" 207 | 208 | except Exception as e: 209 | logger.error(f"[TOOLIFY] 处理tool_choice时出错: {e}") 210 | return "" 211 | 212 | -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 配置管理模块 4 | 统一管理所有环境变量和配置项 5 | """ 6 | import os 7 | import logging 8 | from typing import List 9 | from dotenv import load_dotenv 10 | from src.token_manager import TokenManager 11 | from src.token_updater import TokenUpdater 12 | 13 | # 加载环境变量 14 | load_dotenv() 15 | 16 | class Config: 17 | """应用配置类""" 18 | 19 | # API认证配置 20 | VALID_API_KEY: str = os.getenv("VALID_API_KEY", "") 21 | # 移除硬编码的K2THINK_TOKEN,使用token管理器 22 | K2THINK_API_URL: str = os.getenv("K2THINK_API_URL", "https://www.k2think.ai/api/chat/completions") 23 | 24 | # Token管理配置 25 | TOKENS_FILE: str = os.getenv("TOKENS_FILE", "tokens.txt") 26 | MAX_TOKEN_FAILURES: int = int(os.getenv("MAX_TOKEN_FAILURES", "3")) 27 | 28 | # Token自动更新配置 29 | ENABLE_TOKEN_AUTO_UPDATE: bool = os.getenv("ENABLE_TOKEN_AUTO_UPDATE", "false").lower() == "true" 30 | TOKEN_UPDATE_INTERVAL: int = int(os.getenv("TOKEN_UPDATE_INTERVAL", "86400")) # 默认24小时 31 | ACCOUNTS_FILE: str = os.getenv("ACCOUNTS_FILE", "accounts.txt") 32 | GET_TOKENS_SCRIPT: str = os.getenv("GET_TOKENS_SCRIPT", "get_tokens.py") 33 | 34 | # Token管理器实例(延迟初始化) 35 | _token_manager: TokenManager = None 36 | _token_updater: TokenUpdater = None 37 | 38 | # 服务器配置 39 | HOST: str = os.getenv("HOST", "0.0.0.0") 40 | PORT: int = int(os.getenv("PORT", "8001")) 41 | 42 | # 功能开关 43 | DEBUG_LOGGING: bool = os.getenv("DEBUG_LOGGING", "false").lower() == "true" 44 | ENABLE_ACCESS_LOG: bool = os.getenv("ENABLE_ACCESS_LOG", "true").lower() == "true" 45 | 46 | # 性能配置 47 | REQUEST_TIMEOUT: float = float(os.getenv("REQUEST_TIMEOUT", "60")) 48 | MAX_KEEPALIVE_CONNECTIONS: int = int(os.getenv("MAX_KEEPALIVE_CONNECTIONS", "20")) 49 | MAX_CONNECTIONS: int = int(os.getenv("MAX_CONNECTIONS", "100")) 50 | STREAM_DELAY: float = float(os.getenv("STREAM_DELAY", "0.05")) 51 | STREAM_CHUNK_SIZE: int = int(os.getenv("STREAM_CHUNK_SIZE", "50")) 52 | MAX_STREAM_TIME: float = float(os.getenv("MAX_STREAM_TIME", "10.0")) 53 | 54 | # 日志配置 55 | LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO").upper() 56 | 57 | # CORS配置 58 | CORS_ORIGINS: List[str] = ( 59 | os.getenv("CORS_ORIGINS", "*").split(",") 60 | if os.getenv("CORS_ORIGINS", "*") != "*" 61 | else ["*"] 62 | ) 63 | 64 | # 工具调用配置 65 | ENABLE_TOOLIFY: bool = os.getenv("ENABLE_TOOLIFY", "true").lower() == "true" 66 | TOOLIFY_CUSTOM_PROMPT: str = os.getenv("TOOLIFY_CUSTOM_PROMPT", "") 67 | 68 | @classmethod 69 | def validate(cls) -> None: 70 | """验证必需的配置项""" 71 | if not cls.VALID_API_KEY: 72 | raise ValueError("错误:VALID_API_KEY 环境变量未设置。请在 .env 文件中提供一个安全的API密钥。") 73 | 74 | # 验证token文件是否存在 75 | if not os.path.exists(cls.TOKENS_FILE): 76 | if cls.ENABLE_TOKEN_AUTO_UPDATE: 77 | # 如果启用了自动更新,检查必要的文件是否存在 78 | if not os.path.exists(cls.ACCOUNTS_FILE): 79 | raise ValueError(f"错误:启用了token自动更新,但账户文件 {cls.ACCOUNTS_FILE} 不存在。请创建账户文件或禁用自动更新。") 80 | if not os.path.exists(cls.GET_TOKENS_SCRIPT): 81 | raise ValueError(f"错误:启用了token自动更新,但脚本文件 {cls.GET_TOKENS_SCRIPT} 不存在。") 82 | 83 | # 创建一个空的token文件,让token更新服务来处理 84 | print(f"Token文件 {cls.TOKENS_FILE} 不存在,已启用自动更新。创建空token文件,等待更新服务生成...") 85 | try: 86 | with open(cls.TOKENS_FILE, 'w', encoding='utf-8') as f: 87 | f.write("# Token文件将由自动更新服务生成\n") 88 | print("空token文件已创建,服务启动后将自动更新token池。") 89 | except Exception as e: 90 | raise ValueError(f"错误:无法创建token文件 {cls.TOKENS_FILE}: {e}") 91 | else: 92 | # 如果没有启用自动更新,则要求手动提供token文件 93 | raise ValueError(f"错误:Token文件 {cls.TOKENS_FILE} 不存在。请手动创建token文件或启用自动更新功能(设置 ENABLE_TOKEN_AUTO_UPDATE=true)。") 94 | 95 | # 验证数值范围 96 | if cls.PORT < 1 or cls.PORT > 65535: 97 | raise ValueError(f"错误:PORT 值 {cls.PORT} 不在有效范围内 (1-65535)") 98 | 99 | if cls.REQUEST_TIMEOUT <= 0: 100 | raise ValueError(f"错误:REQUEST_TIMEOUT 必须大于0,当前值: {cls.REQUEST_TIMEOUT}") 101 | 102 | if cls.STREAM_DELAY < 0: 103 | raise ValueError(f"错误:STREAM_DELAY 不能为负数,当前值: {cls.STREAM_DELAY}") 104 | 105 | @classmethod 106 | def setup_logging(cls) -> None: 107 | """设置日志配置""" 108 | import sys 109 | 110 | level_map = { 111 | "DEBUG": logging.DEBUG, 112 | "INFO": logging.INFO, 113 | "WARNING": logging.WARNING, 114 | "ERROR": logging.ERROR 115 | } 116 | 117 | log_level = level_map.get(cls.LOG_LEVEL, logging.INFO) 118 | 119 | # 确保日志输出使用UTF-8编码 120 | logging.basicConfig( 121 | level=log_level, 122 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 123 | handlers=[ 124 | logging.StreamHandler(sys.stdout) 125 | ] 126 | ) 127 | 128 | # 确保标准输出使用UTF-8编码 129 | if hasattr(sys.stdout, 'reconfigure'): 130 | sys.stdout.reconfigure(encoding='utf-8') 131 | if hasattr(sys.stderr, 'reconfigure'): 132 | sys.stderr.reconfigure(encoding='utf-8') 133 | 134 | @classmethod 135 | def get_token_manager(cls) -> TokenManager: 136 | """获取token管理器实例(单例模式)""" 137 | if cls._token_manager is None: 138 | cls._token_manager = TokenManager( 139 | tokens_file=cls.TOKENS_FILE, 140 | max_failures=cls.MAX_TOKEN_FAILURES, 141 | allow_empty=cls.ENABLE_TOKEN_AUTO_UPDATE # 自动更新模式下允许空文件 142 | ) 143 | # 如果启用了自动更新,设置强制刷新回调 144 | if cls.ENABLE_TOKEN_AUTO_UPDATE: 145 | cls._setup_force_refresh_callback() 146 | return cls._token_manager 147 | 148 | @classmethod 149 | def get_token_updater(cls) -> TokenUpdater: 150 | """获取token更新器实例(单例模式)""" 151 | if cls._token_updater is None: 152 | cls._token_updater = TokenUpdater( 153 | update_interval=cls.TOKEN_UPDATE_INTERVAL, 154 | get_tokens_script=cls.GET_TOKENS_SCRIPT, 155 | accounts_file=cls.ACCOUNTS_FILE, 156 | tokens_file=cls.TOKENS_FILE 157 | ) 158 | # 如果token_manager已存在且启用了自动更新,建立连接 159 | if cls._token_manager is not None and cls.ENABLE_TOKEN_AUTO_UPDATE: 160 | cls._setup_force_refresh_callback() 161 | return cls._token_updater 162 | 163 | @classmethod 164 | def reload_tokens(cls) -> None: 165 | """重新加载token""" 166 | if cls._token_manager is not None: 167 | cls._token_manager.reload_tokens() 168 | 169 | @classmethod 170 | def _setup_force_refresh_callback(cls) -> None: 171 | """设置强制刷新回调函数""" 172 | if cls._token_manager is not None and cls._token_updater is None: 173 | # 确保token_updater已被初始化 174 | cls.get_token_updater() 175 | 176 | if cls._token_manager is not None and cls._token_updater is not None: 177 | # 设置强制刷新回调 178 | def force_refresh_callback(): 179 | try: 180 | logging.getLogger(__name__).info("🔄 检测到token问题,启动自动刷新") 181 | success = cls._token_updater.force_update() 182 | if success: 183 | # 强制刷新成功后,重新加载token管理器 184 | cls._token_manager.reload_tokens() 185 | cls._token_manager.reset_consecutive_failures() 186 | logging.getLogger(__name__).info("✅ 自动刷新完成,tokens.txt已更新,token池已重新加载") 187 | else: 188 | logging.getLogger(__name__).error("❌ 自动刷新失败,请检查accounts.txt文件或手动更新token") 189 | except Exception as e: 190 | logging.getLogger(__name__).error(f"❌ 自动刷新回调执行失败: {e}") 191 | 192 | cls._token_manager.set_force_refresh_callback(force_refresh_callback) 193 | logging.getLogger(__name__).info("已设置连续失效自动强制刷新机制") -------------------------------------------------------------------------------- /get_tokens.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import sys 6 | import requests 7 | import json 8 | import time 9 | import threading 10 | from concurrent.futures import ThreadPoolExecutor, as_completed 11 | from typing import Optional 12 | import re 13 | from dotenv import load_dotenv 14 | 15 | # 确保使用UTF-8编码 16 | os.environ.setdefault('PYTHONIOENCODING', 'utf-8') 17 | os.environ.setdefault('PYTHONLEGACYWINDOWSSTDIO', '0') 18 | 19 | # 强制设置UTF-8编码 20 | import locale 21 | try: 22 | locale.setlocale(locale.LC_ALL, 'C.UTF-8') 23 | except locale.Error: 24 | try: 25 | locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') 26 | except locale.Error: 27 | pass # 如果设置失败,继续使用默认设置 28 | 29 | # 重新配置标准输入输出流 30 | if hasattr(sys.stdout, 'reconfigure'): 31 | sys.stdout.reconfigure(encoding='utf-8', errors='replace') 32 | if hasattr(sys.stderr, 'reconfigure'): 33 | sys.stderr.reconfigure(encoding='utf-8', errors='replace') 34 | if hasattr(sys.stdin, 'reconfigure'): 35 | sys.stdin.reconfigure(encoding='utf-8', errors='replace') 36 | 37 | # 加载环境变量 38 | load_dotenv() 39 | 40 | class K2ThinkTokenExtractor: 41 | def __init__(self): 42 | self.base_url = "https://www.k2think.ai" 43 | self.login_url = f"{self.base_url}/api/v1/auths/signin" 44 | 45 | # 从环境变量读取代理配置 46 | proxy_url = os.getenv("PROXY_URL", "") 47 | self.proxies = {} 48 | if proxy_url: 49 | self.proxies = { 50 | 'http': proxy_url, 51 | 'https': proxy_url 52 | } 53 | print(f"使用代理: {proxy_url}") 54 | else: 55 | print("未配置代理,直接连接") 56 | 57 | # 基于f12调试信息的请求头 58 | self.headers = { 59 | 'Accept': '*/*', 60 | 'Accept-Encoding': 'gzip, deflate, br, zstd', 61 | 'Accept-Language': 'zh-CN,zh;q=0.9', 62 | 'Content-Type': 'application/json', 63 | 'Origin': 'https://www.k2think.ai', 64 | 'Priority': 'u=1, i', 65 | 'Referer': 'https://www.k2think.ai/auth?mode=signin', 66 | 'Sec-Ch-Ua': '"Chromium";v="140", "Not=A?Brand";v="24", "Microsoft Edge";v="140"', 67 | 'Sec-Ch-Ua-Mobile': '?0', 68 | 'Sec-Ch-Ua-Platform': '"Windows"', 69 | 'Sec-Fetch-Dest': 'empty', 70 | 'Sec-Fetch-Mode': 'cors', 71 | 'Sec-Fetch-Site': 'same-origin', 72 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0' 73 | } 74 | 75 | self.lock = threading.Lock() 76 | 77 | def extract_token_from_set_cookie(self, response: requests.Response) -> Optional[str]: 78 | """从响应的Set-Cookie头中提取token""" 79 | set_cookie_headers = response.headers.get_list('Set-Cookie') if hasattr(response.headers, 'get_list') else [response.headers.get('Set-Cookie')] 80 | 81 | # 处理多个Set-Cookie头 82 | if set_cookie_headers: 83 | for cookie_header in set_cookie_headers: 84 | if cookie_header and 'token=' in cookie_header: 85 | # 使用正则提取token值 86 | match = re.search(r'token=([^;]+)', cookie_header) 87 | if match: 88 | return match.group(1) 89 | 90 | return None 91 | 92 | def login_and_get_token(self, email: str, password: str, retry_count: int = 3) -> Optional[str]: 93 | """登录并获取token,带重试机制""" 94 | login_data = { 95 | "email": email, 96 | "password": password 97 | } 98 | 99 | for attempt in range(retry_count): 100 | try: 101 | session = requests.Session() 102 | session.headers.update(self.headers) 103 | 104 | response = session.post( 105 | self.login_url, 106 | json=login_data, 107 | proxies=self.proxies if self.proxies else None, 108 | timeout=30 109 | ) 110 | 111 | if response.status_code == 200: 112 | token = self.extract_token_from_set_cookie(response) 113 | if token: 114 | return token 115 | 116 | except Exception as e: 117 | if attempt == retry_count - 1: 118 | return None 119 | time.sleep(2) # 重试间隔2秒 120 | continue 121 | 122 | return None 123 | 124 | def load_accounts(self, file_path: str = "./accounts.txt"): 125 | """从文件加载账户信息""" 126 | accounts = [] 127 | try: 128 | with open(file_path, 'r', encoding='utf-8') as f: 129 | for line in f: 130 | line = line.strip() 131 | if not line: 132 | continue 133 | 134 | try: 135 | account_data = json.loads(line) 136 | if 'email' in account_data and 'k2_password' in account_data: 137 | accounts.append({ 138 | 'email': account_data['email'], 139 | 'password': account_data['k2_password'] 140 | }) 141 | except: 142 | continue 143 | 144 | return accounts 145 | 146 | except FileNotFoundError: 147 | return [] 148 | except Exception: 149 | return [] 150 | 151 | def save_token(self, token: str, file_path: str = "./tokens.txt"): 152 | """保存token到文件""" 153 | try: 154 | with self.lock: 155 | with open(file_path, 'a', encoding='utf-8') as f: 156 | f.write(token + '\n') 157 | except Exception: 158 | pass 159 | 160 | def clear_tokens_file(self, file_path: str = "./tokens.txt"): 161 | """清空tokens文件,准备写入新的tokens""" 162 | try: 163 | with open(file_path, 'w', encoding='utf-8') as f: 164 | f.write('') # 清空文件 165 | print(f"已清空tokens文件: {file_path}") 166 | except Exception as e: 167 | print(f"清空tokens文件失败: {e}") 168 | 169 | def process_account(self, account, tokens_file: str = "./tokens.txt"): 170 | """处理单个账户""" 171 | token = self.login_and_get_token(account['email'], account['password']) 172 | if token: 173 | self.save_token(token, tokens_file) 174 | return True 175 | return False 176 | 177 | def process_all_accounts(self, accounts_file: str = "./accounts.txt", tokens_file: str = "./tokens.txt"): 178 | """使用并发处理所有账户""" 179 | accounts = self.load_accounts(accounts_file) 180 | if not accounts: 181 | print("没有账户需要处理或accounts.txt文件不存在") 182 | return False 183 | 184 | # 清空现有的tokens文件 185 | self.clear_tokens_file(tokens_file) 186 | 187 | print(f"开始处理 {len(accounts)} 个账户,4线程并发...") 188 | success_count = 0 189 | failed_count = 0 190 | 191 | # 先测试单个账户 192 | test_account = accounts[0] 193 | print(f"测试账户: {test_account['email']}") 194 | 195 | try: 196 | token = self.login_and_get_token(test_account['email'], test_account['password']) 197 | if token: 198 | print(f"测试成功,获取token: {token[:50]}...") 199 | else: 200 | print("测试失败,无法获取token") 201 | except Exception as e: 202 | print(f"测试异常: {e}") 203 | 204 | with ThreadPoolExecutor(max_workers=4) as executor: 205 | # 提交所有任务 206 | future_to_account = {executor.submit(self.process_account, account, tokens_file): account for account in accounts} 207 | 208 | # 处理结果 209 | for future in as_completed(future_to_account): 210 | account = future_to_account[future] 211 | try: 212 | if future.result(): 213 | success_count += 1 214 | print(f"✓ {account['email']}") 215 | else: 216 | failed_count += 1 217 | print(f"✗ {account['email']}") 218 | except Exception as e: 219 | failed_count += 1 220 | print(f"✗ {account['email']} - {e}") 221 | 222 | print(f"\n处理完成: 成功 {success_count}, 失败 {failed_count}") 223 | 224 | # 返回是否有成功获取的token 225 | return success_count > 0 226 | 227 | 228 | def main(): 229 | import sys 230 | 231 | # 支持命令行参数 232 | accounts_file = sys.argv[1] if len(sys.argv) > 1 else "./accounts.txt" 233 | tokens_file = sys.argv[2] if len(sys.argv) > 2 else "./tokens.txt" 234 | 235 | extractor = K2ThinkTokenExtractor() 236 | success = extractor.process_all_accounts(accounts_file, tokens_file) 237 | 238 | # 设置退出码 239 | sys.exit(0 if success else 1) 240 | 241 | 242 | if __name__ == "__main__": 243 | main() 244 | -------------------------------------------------------------------------------- /tests/test_tool_calling.py: -------------------------------------------------------------------------------- 1 | """ 2 | K2Think API Proxy 工具调用示例 3 | 演示如何使用工具调用功能 4 | """ 5 | import json 6 | from openai import OpenAI 7 | 8 | # 配置客户端 9 | client = OpenAI( 10 | base_url="http://localhost:8001/v1", 11 | api_key="sk-123456" 12 | ) 13 | 14 | # 定义工具 15 | tools = [ 16 | { 17 | "type": "function", 18 | "function": { 19 | "name": "get_weather", 20 | "description": "获取指定城市的天气信息", 21 | "parameters": { 22 | "type": "object", 23 | "properties": { 24 | "city": { 25 | "type": "string", 26 | "description": "城市名称,例如:北京、上海、深圳" 27 | }, 28 | "unit": { 29 | "type": "string", 30 | "enum": ["celsius", "fahrenheit"], 31 | "description": "温度单位", 32 | "default": "celsius" 33 | } 34 | }, 35 | "required": ["city"] 36 | } 37 | } 38 | }, 39 | { 40 | "type": "function", 41 | "function": { 42 | "name": "search_web", 43 | "description": "在互联网上搜索信息", 44 | "parameters": { 45 | "type": "object", 46 | "properties": { 47 | "query": { 48 | "type": "string", 49 | "description": "搜索关键词" 50 | }, 51 | "num_results": { 52 | "type": "integer", 53 | "description": "返回结果数量", 54 | "default": 5, 55 | "minimum": 1, 56 | "maximum": 10 57 | } 58 | }, 59 | "required": ["query"] 60 | } 61 | } 62 | }, 63 | { 64 | "type": "function", 65 | "function": { 66 | "name": "calculate", 67 | "description": "执行数学计算", 68 | "parameters": { 69 | "type": "object", 70 | "properties": { 71 | "expression": { 72 | "type": "string", 73 | "description": "数学表达式,例如:2+2, 10*5, sqrt(16)" 74 | } 75 | }, 76 | "required": ["expression"] 77 | } 78 | } 79 | } 80 | ] 81 | 82 | def example_basic_tool_call(): 83 | """基础工具调用示例""" 84 | print("\n=== 基础工具调用示例 ===\n") 85 | 86 | response = client.chat.completions.create( 87 | model="MBZUAI-IFM/K2-Think", 88 | messages=[ 89 | {"role": "user", "content": "北京今天天气怎么样?"} 90 | ], 91 | tools=tools, 92 | tool_choice="auto" 93 | ) 94 | 95 | # 处理响应 96 | message = response.choices[0].message 97 | 98 | if message.tool_calls: 99 | print("模型请求调用工具:") 100 | for tool_call in message.tool_calls: 101 | print(f"\n工具名称: {tool_call.function.name}") 102 | print(f"工具参数: {tool_call.function.arguments}") 103 | 104 | # 模拟执行工具并返回结果 105 | function_name = tool_call.function.name 106 | function_args = json.loads(tool_call.function.arguments) 107 | 108 | # 模拟工具执行结果 109 | if function_name == "get_weather": 110 | result = { 111 | "city": function_args.get("city"), 112 | "temperature": 22, 113 | "condition": "晴天", 114 | "humidity": 45, 115 | "unit": function_args.get("unit", "celsius") 116 | } 117 | else: 118 | result = {"status": "success", "data": "模拟数据"} 119 | 120 | print(f"工具执行结果: {json.dumps(result, ensure_ascii=False)}") 121 | else: 122 | print("模型直接回答:") 123 | print(message.content) 124 | 125 | 126 | def example_multi_turn_conversation(): 127 | """多轮对话示例(包含工具调用)""" 128 | print("\n=== 多轮对话示例 ===\n") 129 | 130 | messages = [ 131 | {"role": "user", "content": "查一下上海的天气,然后搜索关于上海的旅游景点"} 132 | ] 133 | 134 | response = client.chat.completions.create( 135 | model="MBZUAI-IFM/K2-Think", 136 | messages=messages, 137 | tools=tools, 138 | tool_choice="auto" 139 | ) 140 | 141 | message = response.choices[0].message 142 | 143 | if message.tool_calls: 144 | print("第一轮 - 模型请求调用工具:") 145 | messages.append(message) # 添加助手的响应 146 | 147 | # 处理每个工具调用 148 | for tool_call in message.tool_calls: 149 | print(f"\n调用工具: {tool_call.function.name}") 150 | print(f"参数: {tool_call.function.arguments}") 151 | 152 | # 模拟工具执行并返回结果 153 | function_name = tool_call.function.name 154 | 155 | if function_name == "get_weather": 156 | result = '{"temperature": 25, "condition": "多云", "city": "上海"}' 157 | elif function_name == "search_web": 158 | result = '{"results": ["外滩", "东方明珠", "豫园", "南京路"]}' 159 | else: 160 | result = '{"status": "success"}' 161 | 162 | # 添加工具结果到消息历史 163 | messages.append({ 164 | "role": "tool", 165 | "tool_call_id": tool_call.id, 166 | "content": result 167 | }) 168 | 169 | # 发送工具结果给模型,获取最终回答 170 | print("\n第二轮 - 发送工具结果给模型...") 171 | 172 | final_response = client.chat.completions.create( 173 | model="MBZUAI-IFM/K2-Think", 174 | messages=messages, 175 | tools=tools 176 | ) 177 | 178 | print("\n模型的最终回答:") 179 | print(final_response.choices[0].message.content) 180 | 181 | 182 | def example_forced_tool_call(): 183 | """强制使用特定工具的示例""" 184 | print("\n=== 强制工具调用示例 ===\n") 185 | 186 | response = client.chat.completions.create( 187 | model="MBZUAI-IFM/K2-Think", 188 | messages=[ 189 | {"role": "user", "content": "计算 123 * 456"} 190 | ], 191 | tools=tools, 192 | tool_choice={ 193 | "type": "function", 194 | "function": {"name": "calculate"} 195 | } 196 | ) 197 | 198 | message = response.choices[0].message 199 | 200 | if message.tool_calls: 201 | print("模型被强制使用工具:") 202 | for tool_call in message.tool_calls: 203 | print(f"工具: {tool_call.function.name}") 204 | print(f"参数: {tool_call.function.arguments}") 205 | 206 | 207 | def example_stream_with_tools(): 208 | """流式响应中的工具调用示例""" 209 | print("\n=== 流式工具调用示例 ===\n") 210 | 211 | stream = client.chat.completions.create( 212 | model="MBZUAI-IFM/K2-Think", 213 | messages=[ 214 | {"role": "user", "content": "帮我搜索一下人工智能的最新发展"} 215 | ], 216 | tools=tools, 217 | stream=True 218 | ) 219 | 220 | print("流式响应:") 221 | for chunk in stream: 222 | if chunk.choices[0].delta.content: 223 | print(chunk.choices[0].delta.content, end="", flush=True) 224 | 225 | # 检查是否有工具调用 226 | if hasattr(chunk.choices[0].delta, 'tool_calls') and chunk.choices[0].delta.tool_calls: 227 | print("\n检测到工具调用:") 228 | for tool_call in chunk.choices[0].delta.tool_calls: 229 | if hasattr(tool_call, 'function'): 230 | print(f"\n工具: {tool_call.function.name if hasattr(tool_call.function, 'name') else '未知'}") 231 | 232 | # 检查结束原因 233 | if chunk.choices[0].finish_reason == "tool_calls": 234 | print("\n[流结束 - 需要工具调用]") 235 | break 236 | elif chunk.choices[0].finish_reason == "stop": 237 | print("\n[流结束]") 238 | break 239 | 240 | print() 241 | 242 | 243 | def example_disable_tools(): 244 | """禁用工具调用的示例""" 245 | print("\n=== 禁用工具调用示例 ===\n") 246 | 247 | response = client.chat.completions.create( 248 | model="MBZUAI-IFM/K2-Think", 249 | messages=[ 250 | {"role": "user", "content": "北京今天天气怎么样?"} 251 | ], 252 | tools=tools, 253 | tool_choice="none" # 禁用工具调用 254 | ) 255 | 256 | print("模型直接回答(未使用工具):") 257 | print(response.choices[0].message.content) 258 | 259 | 260 | if __name__ == "__main__": 261 | print("=" * 60) 262 | print("K2Think API Proxy - 工具调用功能示例") 263 | print("=" * 60) 264 | 265 | try: 266 | # 运行示例 267 | example_basic_tool_call() 268 | example_forced_tool_call() 269 | example_stream_with_tools() 270 | example_disable_tools() 271 | example_multi_turn_conversation() 272 | 273 | print("\n" + "=" * 60) 274 | print("示例运行完成!") 275 | print("=" * 60) 276 | 277 | except Exception as e: 278 | print(f"\n错误: {e}") 279 | print("\n请确保:") 280 | print("1. K2Think API Proxy 服务正在运行(http://localhost:8001)") 281 | print("2. 环境变量 ENABLE_TOOLIFY=true") 282 | print("3. API密钥配置正确") 283 | 284 | -------------------------------------------------------------------------------- /src/toolify/core.py: -------------------------------------------------------------------------------- 1 | """ 2 | Toolify 核心功能模块 3 | 提供工具调用的主要功能:请求处理、响应解析、格式转换 4 | """ 5 | 6 | import uuid 7 | import json 8 | import secrets 9 | import string 10 | import logging 11 | from typing import List, Dict, Any, Optional 12 | from collections import OrderedDict 13 | import time 14 | import threading 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | def generate_random_trigger_signal() -> str: 20 | """生成随机的、自闭合的触发信号,如 """ 21 | chars = string.ascii_letters + string.digits 22 | random_str = ''.join(secrets.choice(chars) for _ in range(4)) 23 | return f"" 24 | 25 | 26 | class ToolCallMappingManager: 27 | """ 28 | 工具调用映射管理器(带TTL和大小限制) 29 | 30 | 功能: 31 | 1. 自动过期清理 - 条目在指定时间后自动删除 32 | 2. 大小限制 - 防止内存无限增长 33 | 3. LRU驱逐 - 达到大小限制时删除最少使用的条目 34 | 4. 线程安全 - 支持并发访问 35 | 5. 周期性清理 - 后台线程定期清理过期条目 36 | """ 37 | 38 | def __init__(self, max_size: int = 1000, ttl_seconds: int = 3600, cleanup_interval: int = 300): 39 | """ 40 | 初始化映射管理器 41 | 42 | Args: 43 | max_size: 最大存储条目数 44 | ttl_seconds: 条目生存时间(秒) 45 | cleanup_interval: 清理间隔(秒) 46 | """ 47 | self.max_size = max_size 48 | self.ttl_seconds = ttl_seconds 49 | self.cleanup_interval = cleanup_interval 50 | 51 | self._data: OrderedDict[str, Dict[str, Any]] = OrderedDict() 52 | self._timestamps: Dict[str, float] = {} 53 | self._lock = threading.RLock() 54 | 55 | self._cleanup_thread = threading.Thread(target=self._periodic_cleanup, daemon=True) 56 | self._cleanup_thread.start() 57 | 58 | logger.debug(f"[TOOLIFY] 工具调用映射管理器已启动 - 最大条目: {max_size}, TTL: {ttl_seconds}s") 59 | 60 | def store(self, tool_call_id: str, name: str, args: dict, description: str = "") -> None: 61 | """存储工具调用映射""" 62 | with self._lock: 63 | current_time = time.time() 64 | 65 | if tool_call_id in self._data: 66 | del self._data[tool_call_id] 67 | del self._timestamps[tool_call_id] 68 | 69 | while len(self._data) >= self.max_size: 70 | oldest_key = next(iter(self._data)) 71 | del self._data[oldest_key] 72 | del self._timestamps[oldest_key] 73 | logger.debug(f"[TOOLIFY] 因大小限制移除最旧条目: {oldest_key}") 74 | 75 | self._data[tool_call_id] = { 76 | "name": name, 77 | "args": args, 78 | "description": description, 79 | "created_at": current_time 80 | } 81 | self._timestamps[tool_call_id] = current_time 82 | 83 | logger.debug(f"[TOOLIFY] 存储工具调用映射: {tool_call_id} -> {name}") 84 | 85 | def get(self, tool_call_id: str) -> Optional[Dict[str, Any]]: 86 | """获取工具调用映射(更新LRU顺序)""" 87 | with self._lock: 88 | current_time = time.time() 89 | 90 | if tool_call_id not in self._data: 91 | logger.debug(f"[TOOLIFY] 未找到工具调用映射: {tool_call_id}") 92 | return None 93 | 94 | if current_time - self._timestamps[tool_call_id] > self.ttl_seconds: 95 | logger.debug(f"[TOOLIFY] 工具调用映射已过期: {tool_call_id}") 96 | del self._data[tool_call_id] 97 | del self._timestamps[tool_call_id] 98 | return None 99 | 100 | result = self._data[tool_call_id] 101 | self._data.move_to_end(tool_call_id) 102 | 103 | logger.debug(f"[TOOLIFY] 找到工具调用映射: {tool_call_id} -> {result['name']}") 104 | return result 105 | 106 | def cleanup_expired(self) -> int: 107 | """清理过期条目,返回清理数量""" 108 | with self._lock: 109 | current_time = time.time() 110 | expired_keys = [] 111 | 112 | for key, timestamp in self._timestamps.items(): 113 | if current_time - timestamp > self.ttl_seconds: 114 | expired_keys.append(key) 115 | 116 | for key in expired_keys: 117 | del self._data[key] 118 | del self._timestamps[key] 119 | 120 | if expired_keys: 121 | logger.debug(f"[TOOLIFY] 清理了 {len(expired_keys)} 个过期条目") 122 | 123 | return len(expired_keys) 124 | 125 | def _periodic_cleanup(self) -> None: 126 | """后台周期性清理线程""" 127 | while True: 128 | try: 129 | time.sleep(self.cleanup_interval) 130 | self.cleanup_expired() 131 | except Exception as e: 132 | logger.error(f"[TOOLIFY] 后台清理线程异常: {e}") 133 | 134 | 135 | class ToolifyCore: 136 | """Toolify 核心类 - 管理工具调用功能""" 137 | 138 | def __init__(self, enable_function_calling: bool = True): 139 | """ 140 | 初始化 Toolify 核心 141 | 142 | Args: 143 | enable_function_calling: 是否启用函数调用功能 144 | """ 145 | self.enable_function_calling = enable_function_calling 146 | self.mapping_manager = ToolCallMappingManager() 147 | self.trigger_signal = generate_random_trigger_signal() 148 | 149 | logger.info(f"[TOOLIFY] 核心已初始化 - 功能启用: {enable_function_calling}") 150 | logger.debug(f"[TOOLIFY] 触发信号: {self.trigger_signal}") 151 | 152 | def store_tool_call_mapping(self, tool_call_id: str, name: str, args: dict, description: str = ""): 153 | """存储工具调用ID与调用内容的映射""" 154 | self.mapping_manager.store(tool_call_id, name, args, description) 155 | 156 | def get_tool_call_mapping(self, tool_call_id: str) -> Optional[Dict[str, Any]]: 157 | """获取工具调用ID对应的调用内容""" 158 | return self.mapping_manager.get(tool_call_id) 159 | 160 | def format_tool_result_for_ai(self, tool_call_id: str, result_content: str) -> str: 161 | """格式化工具调用结果供AI理解""" 162 | logger.debug(f"[TOOLIFY] 格式化工具调用结果: tool_call_id={tool_call_id}") 163 | tool_info = self.get_tool_call_mapping(tool_call_id) 164 | if not tool_info: 165 | logger.debug(f"[TOOLIFY] 未找到工具调用映射,使用默认格式") 166 | return f"Tool execution result:\n\n{result_content}\n" 167 | 168 | formatted_text = f"""Tool execution result: 169 | - Tool name: {tool_info['name']} 170 | - Execution result: 171 | 172 | {result_content} 173 | """ 174 | 175 | logger.debug(f"[TOOLIFY] 格式化完成,工具名: {tool_info['name']}") 176 | return formatted_text 177 | 178 | def format_assistant_tool_calls_for_ai(self, tool_calls: List[Dict[str, Any]]) -> str: 179 | """将助手的工具调用格式化为AI可读的字符串格式""" 180 | logger.debug(f"[TOOLIFY] 格式化助手工具调用. 数量: {len(tool_calls)}") 181 | 182 | xml_calls_parts = [] 183 | for tool_call in tool_calls: 184 | function_info = tool_call.get("function", {}) 185 | name = function_info.get("name", "") 186 | arguments_json = function_info.get("arguments", "{}") 187 | 188 | try: 189 | args_dict = json.loads(arguments_json) 190 | except (json.JSONDecodeError, TypeError): 191 | args_dict = {"raw_arguments": arguments_json} 192 | 193 | args_parts = [] 194 | for key, value in args_dict.items(): 195 | json_value = json.dumps(value, ensure_ascii=False) 196 | args_parts.append(f"<{key}>{json_value}") 197 | 198 | args_content = "\n".join(args_parts) 199 | 200 | xml_call = f"\n{name}\n\n{args_content}\n\n" 201 | xml_calls_parts.append(xml_call) 202 | 203 | all_calls = "\n".join(xml_calls_parts) 204 | final_str = f"{self.trigger_signal}\n\n{all_calls}\n" 205 | 206 | logger.debug("[TOOLIFY] 助手工具调用格式化成功") 207 | return final_str 208 | 209 | def preprocess_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: 210 | """ 211 | 预处理消息,转换工具类型消息为AI可理解格式 212 | 213 | Args: 214 | messages: OpenAI格式的消息列表 215 | 216 | Returns: 217 | 处理后的消息列表 218 | """ 219 | processed_messages = [] 220 | 221 | for message in messages: 222 | if isinstance(message, dict): 223 | # 处理 tool 角色消息 224 | if message.get("role") == "tool": 225 | tool_call_id = message.get("tool_call_id") 226 | content = message.get("content") 227 | 228 | if tool_call_id and content: 229 | formatted_content = self.format_tool_result_for_ai(tool_call_id, content) 230 | processed_message = { 231 | "role": "user", 232 | "content": formatted_content 233 | } 234 | processed_messages.append(processed_message) 235 | logger.debug(f"[TOOLIFY] 转换tool消息为user消息: tool_call_id={tool_call_id}") 236 | else: 237 | logger.debug(f"[TOOLIFY] 跳过无效tool消息: tool_call_id={tool_call_id}") 238 | 239 | # 处理 assistant 角色的 tool_calls 240 | elif message.get("role") == "assistant" and "tool_calls" in message and message["tool_calls"]: 241 | tool_calls = message.get("tool_calls", []) 242 | formatted_tool_calls_str = self.format_assistant_tool_calls_for_ai(tool_calls) 243 | 244 | # 与原始内容合并 245 | original_content = message.get("content") or "" 246 | final_content = f"{original_content}\n{formatted_tool_calls_str}".strip() 247 | 248 | processed_message = { 249 | "role": "assistant", 250 | "content": final_content 251 | } 252 | # 复制其他字段(除了tool_calls) 253 | for key, value in message.items(): 254 | if key not in ["role", "content", "tool_calls"]: 255 | processed_message[key] = value 256 | 257 | processed_messages.append(processed_message) 258 | logger.debug(f"[TOOLIFY] 转换assistant的tool_calls为content") 259 | else: 260 | processed_messages.append(message) 261 | else: 262 | processed_messages.append(message) 263 | 264 | return processed_messages 265 | 266 | def convert_parsed_tools_to_openai_format(self, parsed_tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]: 267 | """ 268 | 将解析出的工具调用转换为OpenAI格式的tool_calls 269 | 270 | Args: 271 | parsed_tools: 解析出的工具列表 [{"name": "tool_name", "args": {...}}, ...] 272 | 273 | Returns: 274 | OpenAI格式的tool_calls列表 275 | """ 276 | tool_calls = [] 277 | for tool in parsed_tools: 278 | tool_call_id = f"call_{uuid.uuid4().hex}" 279 | self.store_tool_call_mapping( 280 | tool_call_id, 281 | tool["name"], 282 | tool["args"], 283 | f"调用工具 {tool['name']}" 284 | ) 285 | tool_calls.append({ 286 | "id": tool_call_id, 287 | "type": "function", 288 | "function": { 289 | "name": tool["name"], 290 | "arguments": json.dumps(tool["args"]) 291 | } 292 | }) 293 | 294 | logger.debug(f"[TOOLIFY] 转换了 {len(tool_calls)} 个工具调用") 295 | return tool_calls 296 | 297 | -------------------------------------------------------------------------------- /src/token_updater.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Token更新服务模块 4 | 定期运行get_tokens.py来更新token池 5 | """ 6 | import os 7 | import time 8 | import logging 9 | import threading 10 | import subprocess 11 | import shutil 12 | from typing import Optional 13 | from datetime import datetime, timedelta 14 | from src.utils import safe_log_error, safe_log_info, safe_log_warning 15 | # 移除循环导入,Config在需要时动态导入 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | class TokenUpdater: 20 | """Token更新服务 - 定期更新token池""" 21 | 22 | def __init__(self, 23 | update_interval: int = 86400, # 默认24小时更新一次 24 | get_tokens_script: str = "get_tokens.py", 25 | accounts_file: str = "accounts.txt", 26 | tokens_file: str = "tokens.txt"): 27 | """ 28 | 初始化Token更新器 29 | 30 | Args: 31 | update_interval: 更新间隔(秒) 32 | get_tokens_script: get_tokens.py脚本路径 33 | accounts_file: 账户文件路径 34 | tokens_file: tokens文件路径 35 | """ 36 | self.update_interval = update_interval 37 | self.get_tokens_script = get_tokens_script 38 | self.accounts_file = accounts_file 39 | self.tokens_file = tokens_file 40 | 41 | self.is_running = False 42 | self.update_thread: Optional[threading.Thread] = None 43 | self.last_update: Optional[datetime] = None 44 | self.update_count = 0 45 | self.error_count = 0 46 | self.is_updating = False 47 | self.last_error: Optional[str] = None 48 | 49 | safe_log_info(logger, f"Token更新器初始化完成 - 更新间隔: {update_interval}秒") 50 | 51 | # 清理可能遗留的临时文件 52 | self.cleanup_all_temp_files() 53 | 54 | def _check_files_exist(self) -> bool: 55 | """检查必要文件是否存在""" 56 | if not os.path.exists(self.get_tokens_script): 57 | safe_log_error(logger, f"get_tokens.py脚本不存在: {self.get_tokens_script}") 58 | return False 59 | 60 | if not os.path.exists(self.accounts_file): 61 | safe_log_error(logger, f"账户文件不存在: {self.accounts_file}") 62 | return False 63 | 64 | return True 65 | 66 | def _run_token_update(self) -> bool: 67 | """运行token更新脚本(原子性更新)""" 68 | if self.is_updating: 69 | safe_log_warning(logger, "Token更新已在进行中,跳过此次更新") 70 | return False 71 | 72 | self.is_updating = True 73 | self.last_error = None 74 | temp_tokens_file = f"{self.tokens_file}.tmp" 75 | 76 | try: 77 | safe_log_info(logger, "开始更新token池...") 78 | 79 | # 使用临时文件进行更新,避免服务中断 80 | result = subprocess.run( 81 | ["python", self.get_tokens_script, self.accounts_file, temp_tokens_file], 82 | capture_output=True, 83 | encoding='utf-8', 84 | text=True, 85 | timeout=300 # 5分钟超时 86 | ) 87 | 88 | if result.returncode == 0: 89 | # 检查临时文件是否生成且不为空 90 | if os.path.exists(temp_tokens_file) and os.path.getsize(temp_tokens_file) > 0: 91 | try: 92 | # 原子性替换:避免重命名正在使用的文件 93 | if os.path.exists(self.tokens_file): 94 | # 备份当前文件(使用复制而非重命名,避免文件锁定问题) 95 | backup_file = f"{self.tokens_file}.backup" 96 | if os.path.exists(backup_file): 97 | os.remove(backup_file) # 删除旧备份 98 | 99 | # 复制当前文件到备份位置 100 | shutil.copy2(self.tokens_file, backup_file) 101 | logger.debug(f"已备份当前tokens文件到: {backup_file}") 102 | 103 | # 使用临时文件替换原文件(Windows下更安全的方式) 104 | if os.name == 'nt': # Windows系统 105 | # 在Windows下,先删除目标文件再重命名 106 | if os.path.exists(self.tokens_file): 107 | os.remove(self.tokens_file) 108 | os.rename(temp_tokens_file, self.tokens_file) 109 | else: 110 | # Unix/Linux系统可以直接重命名覆盖 111 | os.rename(temp_tokens_file, self.tokens_file) 112 | 113 | safe_log_info(logger, "Token更新成功,文件已原子性替换") 114 | logger.debug(f"更新输出: {result.stdout}") 115 | self.update_count += 1 116 | self.last_update = datetime.now() 117 | 118 | # 通知需要重新加载token管理器 119 | self._notify_token_reload() 120 | 121 | return True 122 | except Exception as rename_error: 123 | error_msg = f"文件重命名失败: {rename_error}" 124 | safe_log_error(logger, error_msg) 125 | self.last_error = error_msg 126 | self._cleanup_temp_file(temp_tokens_file) 127 | self.error_count += 1 128 | return False 129 | else: 130 | error_msg = "Token更新失败 - 临时文件为空或不存在" 131 | safe_log_error(logger, error_msg) 132 | self.last_error = error_msg 133 | self._cleanup_temp_file(temp_tokens_file) 134 | self.error_count += 1 135 | return False 136 | else: 137 | error_msg = f"Token更新失败 - 返回码: {result.returncode}, 错误: {result.stderr}" 138 | safe_log_error(logger, error_msg) 139 | self.last_error = error_msg 140 | self._cleanup_temp_file(temp_tokens_file) 141 | self.error_count += 1 142 | return False 143 | 144 | except subprocess.TimeoutExpired: 145 | error_msg = "Token更新超时" 146 | safe_log_error(logger, error_msg) 147 | self.last_error = error_msg 148 | self._cleanup_temp_file(temp_tokens_file) 149 | self.error_count += 1 150 | return False 151 | except Exception as e: 152 | error_msg = f"Token更新异常: {e}" 153 | safe_log_error(logger, error_msg) 154 | self.last_error = error_msg 155 | self._cleanup_temp_file(temp_tokens_file) 156 | self.error_count += 1 157 | return False 158 | finally: 159 | self.is_updating = False 160 | 161 | def _cleanup_temp_file(self, temp_file: str): 162 | """清理临时文件""" 163 | try: 164 | if os.path.exists(temp_file): 165 | os.remove(temp_file) 166 | logger.debug(f"已清理临时文件: {temp_file}") 167 | except Exception as e: 168 | safe_log_warning(logger, f"清理临时文件失败: {e}") 169 | 170 | def cleanup_all_temp_files(self): 171 | """清理所有相关的临时文件""" 172 | temp_patterns = [ 173 | f"{self.tokens_file}.tmp", 174 | f"{self.tokens_file}.backup" 175 | ] 176 | 177 | cleaned_count = 0 178 | for pattern in temp_patterns: 179 | try: 180 | if os.path.exists(pattern): 181 | os.remove(pattern) 182 | safe_log_info(logger, f"已清理遗留文件: {pattern}") 183 | cleaned_count += 1 184 | except Exception as e: 185 | safe_log_warning(logger, f"清理遗留文件失败 {pattern}: {e}") 186 | 187 | if cleaned_count > 0: 188 | safe_log_info(logger, f"共清理了 {cleaned_count} 个遗留文件") 189 | else: 190 | logger.debug("没有发现需要清理的遗留文件") 191 | 192 | return cleaned_count 193 | 194 | def _notify_token_reload(self): 195 | """通知需要重新加载token管理器""" 196 | try: 197 | # 导入Config来触发token重新加载 198 | from src.config import Config 199 | if Config._token_manager is not None: 200 | Config._token_manager.reload_tokens() 201 | safe_log_info(logger, "Token管理器已重新加载") 202 | except Exception as e: 203 | safe_log_warning(logger, f"通知token重新加载失败: {e}") 204 | 205 | 206 | def _update_loop(self): 207 | """更新循环""" 208 | safe_log_info(logger, "Token更新服务启动") 209 | 210 | # # 首次启动时,如果tokens.txt中没有token(非#开头),立即更新一次 211 | # 判断tokens.txt中的token数量 212 | if os.path.exists(self.tokens_file): 213 | try: 214 | # 读取文件内容并立即关闭文件句柄 215 | with open(self.tokens_file, "r", encoding="utf-8") as f: 216 | content = f.read() 217 | 218 | # 在文件句柄关闭后处理内容 219 | lines = content.splitlines() 220 | valid_lines = [line.strip() for line in lines if line.strip() and not line.strip().startswith("#")] 221 | 222 | if len(valid_lines) < 1: 223 | # 动态导入Config避免循环导入 224 | from src.config import Config 225 | if Config.ENABLE_TOKEN_AUTO_UPDATE: 226 | safe_log_info(logger, "首次启动时,tokens.txt中没有token(非#开头),立即更新一次") 227 | # 添加小延迟确保文件句柄完全释放 228 | 229 | time.sleep(0.1) 230 | self._run_token_update() 231 | except Exception as e: 232 | safe_log_warning(logger, f"检查tokens文件时出错: {e}") 233 | 234 | while self.is_running: 235 | try: 236 | time.sleep(self.update_interval) 237 | 238 | if not self.is_running: 239 | break 240 | 241 | if self._check_files_exist(): 242 | self._run_token_update() 243 | else: 244 | safe_log_warning(logger, "跳过此次更新 - 必要文件不存在") 245 | 246 | except Exception as e: 247 | safe_log_error(logger, "更新循环异常", e) 248 | time.sleep(60) # 异常时等待1分钟再继续 249 | 250 | def start(self) -> bool: 251 | """启动token更新服务""" 252 | if self.is_running: 253 | safe_log_warning(logger, "Token更新服务已在运行") 254 | return False 255 | 256 | if not self._check_files_exist(): 257 | safe_log_error(logger, "启动失败 - 必要文件不存在") 258 | return False 259 | 260 | self.is_running = True 261 | self.update_thread = threading.Thread(target=self._update_loop, daemon=True) 262 | self.update_thread.start() 263 | 264 | safe_log_info(logger, "Token更新服务已启动") 265 | return True 266 | 267 | def stop(self): 268 | """停止token更新服务""" 269 | if not self.is_running: 270 | safe_log_warning(logger, "Token更新服务未在运行") 271 | return 272 | 273 | self.is_running = False 274 | if self.update_thread and self.update_thread.is_alive(): 275 | self.update_thread.join(timeout=5) 276 | 277 | safe_log_info(logger, "Token更新服务已停止") 278 | 279 | def force_update(self) -> bool: 280 | """强制立即更新token""" 281 | if not self._check_files_exist(): 282 | safe_log_error(logger, "强制更新失败 - 必要文件不存在") 283 | return False 284 | 285 | safe_log_info(logger, "执行强制token更新") 286 | return self._run_token_update() 287 | 288 | async def force_update_async(self) -> bool: 289 | """异步强制立即更新token""" 290 | import asyncio 291 | loop = asyncio.get_event_loop() 292 | return await loop.run_in_executor(None, self.force_update) 293 | 294 | def get_status(self) -> dict: 295 | """获取更新服务状态""" 296 | return { 297 | "is_running": self.is_running, 298 | "is_updating": self.is_updating, 299 | "update_interval": self.update_interval, 300 | "last_update": self.last_update.isoformat() if self.last_update else None, 301 | "update_count": self.update_count, 302 | "error_count": self.error_count, 303 | "last_error": self.last_error, 304 | "next_update": ( 305 | (self.last_update + timedelta(seconds=self.update_interval)).isoformat() 306 | if self.last_update else None 307 | ), 308 | "files": { 309 | "get_tokens_script": os.path.exists(self.get_tokens_script), 310 | "accounts_file": os.path.exists(self.accounts_file), 311 | "tokens_file": os.path.exists(self.tokens_file) 312 | } 313 | } -------------------------------------------------------------------------------- /k2think_proxy.py: -------------------------------------------------------------------------------- 1 | """ 2 | K2Think API 代理服务 - 重构版本 3 | 提供OpenAI兼容的API接口,代理到K2Think服务 4 | """ 5 | import os 6 | import sys 7 | import time 8 | import logging 9 | from contextlib import asynccontextmanager 10 | from fastapi import FastAPI, Request 11 | from fastapi.middleware.cors import CORSMiddleware 12 | from fastapi.responses import JSONResponse, Response 13 | 14 | # 确保使用UTF-8编码 15 | os.environ.setdefault('PYTHONIOENCODING', 'utf-8') 16 | os.environ.setdefault('PYTHONLEGACYWINDOWSSTDIO', '0') 17 | 18 | # 强制设置UTF-8编码 19 | import locale 20 | try: 21 | locale.setlocale(locale.LC_ALL, 'C.UTF-8') 22 | except locale.Error: 23 | try: 24 | locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') 25 | except locale.Error: 26 | pass # 如果设置失败,继续使用默认设置 27 | 28 | # 重新配置标准输入输出流 29 | if hasattr(sys.stdout, 'reconfigure'): 30 | sys.stdout.reconfigure(encoding='utf-8', errors='replace') 31 | if hasattr(sys.stderr, 'reconfigure'): 32 | sys.stderr.reconfigure(encoding='utf-8', errors='replace') 33 | if hasattr(sys.stdin, 'reconfigure'): 34 | sys.stdin.reconfigure(encoding='utf-8', errors='replace') 35 | 36 | from src.config import Config 37 | from src.constants import APIConstants 38 | from src.exceptions import K2ThinkProxyError 39 | from src.models import ChatCompletionRequest 40 | from src.api_handler import APIHandler 41 | 42 | # 初始化配置 43 | try: 44 | Config.validate() 45 | Config.setup_logging() 46 | except Exception as e: 47 | print(f"配置错误: {e}") 48 | exit(1) 49 | 50 | logger = logging.getLogger(__name__) 51 | 52 | # 全局HTTP客户端管理 53 | @asynccontextmanager 54 | async def lifespan(app: FastAPI): 55 | logger.info("K2Think API Proxy 启动中...") 56 | 57 | # 如果启用了token自动更新,启动更新服务 58 | if Config.ENABLE_TOKEN_AUTO_UPDATE: 59 | token_updater = Config.get_token_updater() 60 | if token_updater.start(): 61 | logger.info(f"Token自动更新服务已启动 - 更新间隔: {Config.TOKEN_UPDATE_INTERVAL}秒") 62 | else: 63 | logger.error("Token自动更新服务启动失败") 64 | else: 65 | logger.info("Token自动更新服务未启用") 66 | 67 | yield 68 | 69 | # 关闭token更新服务 70 | if Config.ENABLE_TOKEN_AUTO_UPDATE and Config._token_updater: 71 | Config._token_updater.stop() 72 | logger.info("Token自动更新服务已停止") 73 | 74 | logger.info("K2Think API Proxy 关闭中...") 75 | 76 | # 创建FastAPI应用 77 | app = FastAPI( 78 | title="K2Think API Proxy", 79 | description="OpenAI兼容的K2Think API代理服务", 80 | version="2.0.0", 81 | lifespan=lifespan 82 | ) 83 | 84 | # CORS配置 85 | app.add_middleware( 86 | CORSMiddleware, 87 | allow_origins=Config.CORS_ORIGINS, 88 | allow_credentials=True, 89 | allow_methods=["*"], 90 | allow_headers=["*"], 91 | ) 92 | 93 | # 初始化API处理器 94 | api_handler = APIHandler(Config) 95 | 96 | @app.get("/") 97 | async def homepage(): 98 | """首页 - 返回服务状态""" 99 | return JSONResponse(content={ 100 | "status": "success", 101 | "message": "K2Think API Proxy is running", 102 | "service": "K2Think API Gateway", 103 | "model": APIConstants.MODEL_ID, 104 | "version": "2.1.0", 105 | "features": [ 106 | "Token轮询和负载均衡", 107 | "自动失效检测和重试", 108 | "Token池管理", 109 | "OpenAI Function Calling 工具调用" 110 | ], 111 | "endpoints": { 112 | "chat": "/v1/chat/completions", 113 | "models": "/v1/models", 114 | "health": "/health", 115 | "admin": { 116 | "token_stats": "/admin/tokens/stats", 117 | "reset_token": "/admin/tokens/reset/{token_index}", 118 | "reset_all": "/admin/tokens/reset-all", 119 | "reload_tokens": "/admin/tokens/reload", 120 | "consecutive_failures": "/admin/tokens/consecutive-failures", 121 | "reset_consecutive": "/admin/tokens/reset-consecutive", 122 | "updater_status": "/admin/tokens/updater/status", 123 | "force_update": "/admin/tokens/updater/force-update", 124 | "cleanup_temp_files": "/admin/tokens/updater/cleanup-temp" 125 | } 126 | } 127 | }) 128 | 129 | @app.get("/health") 130 | async def health_check(): 131 | """健康检查""" 132 | token_manager = Config.get_token_manager() 133 | token_stats = token_manager.get_token_stats() 134 | 135 | return JSONResponse(content={ 136 | "status": "healthy", 137 | "timestamp": int(time.time()), 138 | "config": { 139 | "debug_logging": Config.DEBUG_LOGGING, 140 | "toolify_enabled": Config.ENABLE_TOOLIFY, 141 | "note": "思考内容输出现在通过模型名控制" 142 | }, 143 | "tokens": { 144 | "total": token_stats["total_tokens"], 145 | "active": token_stats["active_tokens"], 146 | "inactive": token_stats["inactive_tokens"], 147 | "consecutive_failures": token_manager.get_consecutive_failures(), 148 | "auto_update_enabled": Config.ENABLE_TOKEN_AUTO_UPDATE 149 | } 150 | }) 151 | 152 | @app.get("/favicon.ico") 153 | async def favicon(): 154 | """返回favicon""" 155 | return Response(content="", media_type="image/x-icon") 156 | 157 | @app.get("/v1/models") 158 | async def get_models(): 159 | """获取模型列表""" 160 | return await api_handler.get_models() 161 | 162 | @app.post("/v1/chat/completions") 163 | async def chat_completions(request: ChatCompletionRequest, auth_request: Request): 164 | """处理聊天补全请求""" 165 | return await api_handler.chat_completions(request, auth_request) 166 | 167 | @app.get("/admin/tokens/stats") 168 | async def get_token_stats(): 169 | """获取token池统计信息""" 170 | token_manager = Config.get_token_manager() 171 | stats = token_manager.get_token_stats() 172 | # 添加连续失效信息 173 | stats["consecutive_failures"] = token_manager.get_consecutive_failures() 174 | stats["consecutive_failure_threshold"] = token_manager.consecutive_failure_threshold 175 | # 添加上游服务错误信息 176 | stats["consecutive_upstream_errors"] = token_manager.get_consecutive_upstream_errors() 177 | stats["upstream_error_threshold"] = token_manager.upstream_error_threshold 178 | return JSONResponse(content={ 179 | "status": "success", 180 | "data": stats 181 | }) 182 | 183 | @app.post("/admin/tokens/reset/{token_index}") 184 | async def reset_token(token_index: int): 185 | """重置指定索引的token""" 186 | token_manager = Config.get_token_manager() 187 | success = token_manager.reset_token(token_index) 188 | if success: 189 | return JSONResponse(content={ 190 | "status": "success", 191 | "message": f"Token {token_index} 已重置" 192 | }) 193 | else: 194 | return JSONResponse( 195 | status_code=400, 196 | content={ 197 | "status": "error", 198 | "message": f"无效的token索引: {token_index}" 199 | } 200 | ) 201 | 202 | @app.post("/admin/tokens/reset-all") 203 | async def reset_all_tokens(): 204 | """重置所有token""" 205 | token_manager = Config.get_token_manager() 206 | token_manager.reset_all_tokens() 207 | return JSONResponse(content={ 208 | "status": "success", 209 | "message": "所有token已重置" 210 | }) 211 | 212 | @app.post("/admin/tokens/reload") 213 | async def reload_tokens(): 214 | """重新加载token文件""" 215 | try: 216 | Config.reload_tokens() 217 | token_manager = Config.get_token_manager() 218 | stats = token_manager.get_token_stats() 219 | return JSONResponse(content={ 220 | "status": "success", 221 | "message": "Token文件已重新加载", 222 | "data": stats 223 | }) 224 | except Exception as e: 225 | return JSONResponse( 226 | status_code=500, 227 | content={ 228 | "status": "error", 229 | "message": f"重新加载失败: {str(e)}" 230 | } 231 | ) 232 | 233 | @app.get("/admin/tokens/consecutive-failures") 234 | async def get_consecutive_failures(): 235 | """获取连续失效信息""" 236 | token_manager = Config.get_token_manager() 237 | return JSONResponse(content={ 238 | "status": "success", 239 | "data": { 240 | "consecutive_failures": token_manager.get_consecutive_failures(), 241 | "threshold": token_manager.consecutive_failure_threshold, 242 | "consecutive_upstream_errors": token_manager.get_consecutive_upstream_errors(), 243 | "upstream_error_threshold": token_manager.upstream_error_threshold, 244 | "last_upstream_error_time": token_manager.last_upstream_error_time.isoformat() if token_manager.last_upstream_error_time else None, 245 | "token_pool_size": len(token_manager.tokens), 246 | "auto_refresh_enabled": Config.ENABLE_TOKEN_AUTO_UPDATE and len(token_manager.tokens) > 2, 247 | "last_check": "实时检测" 248 | } 249 | }) 250 | 251 | @app.post("/admin/tokens/reset-consecutive") 252 | async def reset_consecutive_failures(): 253 | """重置连续失效计数""" 254 | token_manager = Config.get_token_manager() 255 | old_count = token_manager.get_consecutive_failures() 256 | token_manager.reset_consecutive_failures() 257 | return JSONResponse(content={ 258 | "status": "success", 259 | "message": f"连续失效计数已重置: {old_count} -> 0", 260 | "data": { 261 | "previous_count": old_count, 262 | "current_count": 0 263 | } 264 | }) 265 | 266 | @app.get("/admin/tokens/updater/status") 267 | async def get_updater_status(): 268 | """获取token更新器状态""" 269 | if not Config.ENABLE_TOKEN_AUTO_UPDATE: 270 | return JSONResponse(content={ 271 | "status": "disabled", 272 | "message": "Token自动更新未启用" 273 | }) 274 | 275 | token_updater = Config.get_token_updater() 276 | status = token_updater.get_status() 277 | return JSONResponse(content={ 278 | "status": "success", 279 | "data": status 280 | }) 281 | 282 | @app.post("/admin/tokens/updater/force-update") 283 | async def force_update_tokens(): 284 | """强制更新tokens""" 285 | if not Config.ENABLE_TOKEN_AUTO_UPDATE: 286 | return JSONResponse( 287 | status_code=400, 288 | content={ 289 | "status": "error", 290 | "message": "Token自动更新未启用" 291 | } 292 | ) 293 | 294 | token_updater = Config.get_token_updater() 295 | success = await token_updater.force_update_async() 296 | 297 | if success: 298 | # 更新成功后重新加载token管理器 299 | Config.reload_tokens() 300 | token_manager = Config.get_token_manager() 301 | stats = token_manager.get_token_stats() 302 | 303 | return JSONResponse(content={ 304 | "status": "success", 305 | "message": "Token强制更新成功", 306 | "data": stats 307 | }) 308 | else: 309 | return JSONResponse( 310 | status_code=500, 311 | content={ 312 | "status": "error", 313 | "message": "Token强制更新失败" 314 | } 315 | ) 316 | 317 | @app.post("/admin/tokens/updater/cleanup-temp") 318 | async def cleanup_temp_files(): 319 | """清理临时文件""" 320 | if not Config.ENABLE_TOKEN_AUTO_UPDATE: 321 | return JSONResponse( 322 | status_code=400, 323 | content={ 324 | "status": "error", 325 | "message": "Token自动更新未启用" 326 | } 327 | ) 328 | 329 | token_updater = Config.get_token_updater() 330 | cleaned_count = token_updater.cleanup_all_temp_files() 331 | 332 | return JSONResponse(content={ 333 | "status": "success", 334 | "message": f"临时文件清理完成,共清理 {cleaned_count} 个文件", 335 | "data": { 336 | "cleaned_files": cleaned_count 337 | } 338 | }) 339 | 340 | @app.exception_handler(K2ThinkProxyError) 341 | async def proxy_exception_handler(request: Request, exc: K2ThinkProxyError): 342 | """处理自定义代理异常""" 343 | return JSONResponse( 344 | status_code=exc.status_code, 345 | content={ 346 | "error": { 347 | "message": exc.message, 348 | "type": exc.error_type 349 | } 350 | } 351 | ) 352 | 353 | @app.exception_handler(404) 354 | async def not_found_handler(request: Request, exc): 355 | """处理404错误""" 356 | return JSONResponse( 357 | status_code=404, 358 | content={"error": "Not Found"} 359 | ) 360 | 361 | if __name__ == "__main__": 362 | import uvicorn 363 | 364 | # 配置日志级别 365 | log_level = "debug" if Config.DEBUG_LOGGING else "info" 366 | 367 | logger.info(f"启动服务器: {Config.HOST}:{Config.PORT}") 368 | logger.info("思考内容输出: 通过模型名控制 (MBZUAI-IFM/K2-Think vs MBZUAI-IFM/K2-Think-nothink)") 369 | 370 | uvicorn.run( 371 | app, 372 | host=Config.HOST, 373 | port=Config.PORT, 374 | access_log=Config.ENABLE_ACCESS_LOG, 375 | log_level=log_level 376 | ) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # K2Think API Proxy 2 | 3 | 基于 FastAPI 构建的 K2Think AI 模型代理服务,提供 OpenAI 兼容的 API 接口。 4 | 5 | ## 核心功能特性 6 | 7 | - 🧠 **MBZUAI K2-Think 模型**: 支持 MBZUAI 开发的 K2-Think 推理模型 8 | - 🔄 **OpenAI 兼容**: 完全兼容 OpenAI API 格式,无缝对接现有应用 9 | - ⚡ **流式响应**: 支持实时流式聊天响应,支持控制thinking输出 10 | - 🛠️ **工具调用**: 支持 OpenAI Function Calling,可集成外部工具和API 11 | - 📊 **文件上传**: 支持文件、图像上传 12 | 13 | ## 智能Token管理系统 14 | 15 | ### 🔄 Token轮询与负载均衡 16 | 17 | - 多token轮流使用,自动故障转移 18 | - 支持大规模token池(支持数百个token) 19 | 20 | ### 🛡️ 智能失效检测与自愈 21 | 22 | - **自动失效检测**: 三次失败后自动禁用失效token 23 | - **连续失效自动刷新**: 当连续两个token失效时,自动触发强制刷新(仅在token池数量>2时生效) 24 | - **智能重试机制**: 失效token会被跳过,确保服务连续性 25 | 26 | ### 📈 Token池管理 27 | 28 | - 完整的管理API查看状态、重置token等 29 | - 实时监控token使用情况和失效统计 30 | - 支持手动重置和重新加载 31 | 32 | ### 🔄 Token自动更新 33 | 34 | - 定期从账户文件自动生成新的token池 35 | - **原子性更新**: 零停机时间,更新过程中服务保持可用 36 | - **智能触发**: 支持定时更新和连续失效触发的强制更新 37 | 38 | ### 🌐 网络适应性 39 | 40 | - 支持HTTP/HTTPS代理配置,适应不同网络环境 41 | - 🚀 **高性能**: 异步处理架构,支持高并发请求 42 | - 🐳 **容器化**: 支持 Docker 部署 43 | 44 | ## 快速开始 45 | 46 | ### 本地运行 47 | 48 | 1. **安装依赖** 49 | 50 | ```bash 51 | pip install -r requirements.txt 52 | ``` 53 | 54 | 2. **配置环境变量** 55 | 56 | ```bash 57 | cp .env.example .env 58 | # 编辑 .env 文件,配置你的API密钥和其他选项 59 | ``` 60 | 61 | 3. **准备Token文件** 62 | 63 | 有两种方式管理Token: 64 | 65 | **方式一:手动管理(传统方式)** 66 | 67 | ```bash 68 | # 复制token示例文件并编辑 69 | cd data 70 | cp tokens.example.txt tokens.txt 71 | # 编辑tokens.txt文件,添加你的实际K2Think tokens 72 | ``` 73 | 74 | **方式二:自动更新(推荐)** 75 | 76 | ```bash 77 | # 准备账户文件 78 | echo '{"email": "your-email@example.com", "k2_password": "your-password"}' > accounts.txt 79 | # 可以添加多个账户,每行一个JSON对象 80 | ``` 81 | 82 | 4. **启动服务** 83 | 84 | ```bash 85 | python k2think_proxy.py 86 | ``` 87 | 88 | 服务将在 `http://localhost:8001` 启动。 89 | 90 | ### Docker 部署 91 | 92 | #### 使用 docker-compose(推荐) 93 | 94 | ```bash 95 | # 准备配置文件 96 | cp .env.example .env 97 | cd data 98 | cp accounts.example.txt accounts.txt 99 | 100 | # 编辑配置 101 | # 编辑 .env 文件配置API密钥等 102 | # 编辑 accounts.txt 添加K2Think账户信息,格式:{"email": "xxx@yyy.zzz", "k2_password": "xxx"},一行一个 103 | 104 | # 启动服务 105 | docker-compose up -d 106 | 107 | # 检查服务状态 108 | docker-compose logs -f k2think-api 109 | ``` 110 | 111 | #### 手动构建部署 112 | 113 | ```bash 114 | # 构建镜像 115 | docker build -t k2think-api . 116 | 117 | # 运行容器 118 | docker run -d \ 119 | --name k2think-api \ 120 | -p 8001:8001 \ 121 | -v $(pwd)/tokens.txt:/app/tokens.txt \ 122 | -v $(pwd)/accounts.txt:/app/accounts.txt:ro \ 123 | -v $(pwd)/.env:/app/.env:ro \ 124 | k2think-api 125 | ``` 126 | 127 | ## API 接口 128 | 129 | ### 聊天补全 130 | 131 | **POST** `/v1/chat/completions` 132 | 133 | ```bash 134 | curl -X POST http://localhost:8001/v1/chat/completions \ 135 | -H "Content-Type: application/json" \ 136 | -H "Authorization: Bearer sk-k2think" \ 137 | -d '{ 138 | "model": "MBZUAI-IFM/K2-Think", 139 | "messages": [ 140 | {"role": "user", "content": "你擅长什么?"} 141 | ], 142 | "stream": false 143 | }' 144 | ``` 145 | 146 | ### 模型列表 147 | 148 | **GET** `/v1/models` 149 | 150 | ```bash 151 | curl http://localhost:8001/v1/models \ 152 | -H "Authorization: Bearer sk-k2think" 153 | ``` 154 | 155 | ### Token管理接口 156 | 157 | 查看token池状态: 158 | 159 | ```bash 160 | curl http://localhost:8001/admin/tokens/stats 161 | ``` 162 | 163 | 查看连续失效状态: 164 | 165 | ```bash 166 | curl http://localhost:8001/admin/tokens/consecutive-failures 167 | ``` 168 | 169 | 重置连续失效计数: 170 | 171 | ```bash 172 | curl -X POST http://localhost:8001/admin/tokens/reset-consecutive 173 | ``` 174 | 175 | 重置指定token: 176 | 177 | ```bash 178 | curl -X POST http://localhost:8001/admin/tokens/reset/0 179 | ``` 180 | 181 | 重置所有token: 182 | 183 | ```bash 184 | curl -X POST http://localhost:8001/admin/tokens/reset-all 185 | ``` 186 | 187 | 重新加载token文件: 188 | 189 | ```bash 190 | curl -X POST http://localhost:8001/admin/tokens/reload 191 | ``` 192 | 193 | 查看token更新器状态(仅在启用自动更新时可用): 194 | 195 | ```bash 196 | curl http://localhost:8001/admin/tokens/updater/status 197 | ``` 198 | 199 | 强制更新tokens(仅在启用自动更新时可用): 200 | 201 | ```bash 202 | curl -X POST http://localhost:8001/admin/tokens/updater/force-update 203 | ``` 204 | 205 | ### 健康检查 206 | 207 | ```bash 208 | curl http://localhost:8001/health 209 | ``` 210 | 211 | ## 环境变量配置 212 | 213 | ### 基础配置 214 | 215 | | 变量名 | 默认值 | 说明 | 216 | | ------------------- | ------------------------------------------- | -------------------- | 217 | | `VALID_API_KEY` | 无默认值 | API 访问密钥(必需) | 218 | | `K2THINK_API_URL` | https://www.k2think.ai/api/chat/completions | K2Think API端点 | 219 | 220 | ### Token管理配置 221 | 222 | | 变量名 | 默认值 | 说明 | 223 | | ---------------------- | -------------- | ----------------- | 224 | | `TOKENS_FILE` | `tokens.txt` | Token文件路径 | 225 | | `MAX_TOKEN_FAILURES` | `3` | Token最大失败次数 | 226 | 227 | ### Token自动更新配置 228 | 229 | | 变量名 | 默认值 | 说明 | 230 | | ---------------------------- | ----------------- | --------------------------------------- | 231 | | `ENABLE_TOKEN_AUTO_UPDATE` | `false` | 是否启用token自动更新 | 232 | | `TOKEN_UPDATE_INTERVAL` | `86400` | token更新间隔(秒),默认24小时 | 233 | | `ACCOUNTS_FILE` | `accounts.txt` | 账户文件路径 | 234 | | `GET_TOKENS_SCRIPT` | `get_tokens.py` | token获取脚本路径 | 235 | | `PROXY_URL` | 空 | HTTP/HTTPS代理地址(用于get_tokens.py) | 236 | 237 | ### 服务器配置 238 | 239 | | 变量名 | 默认值 | 说明 | 240 | | -------- | ----------- | ------------ | 241 | | `HOST` | `0.0.0.0` | 服务监听地址 | 242 | | `PORT` | `8001` | 服务端口 | 243 | 244 | ### 工具调用配置 245 | 246 | | 变量名 | 默认值 | 说明 | 247 | | ------------------------- | -------- | -------------------------------- | 248 | | `ENABLE_TOOLIFY` | `true` | 是否启用工具调用功能 | 249 | | `TOOLIFY_CUSTOM_PROMPT` | `""` | 自定义工具调用提示词模板(可选) | 250 | 251 | 详细配置说明请参考 `.env.example` 文件。 252 | 253 | ## 智能Token管理系统详解 254 | 255 | ### 连续失效自动刷新机制 256 | 257 | 这是系统的核心自愈功能,当检测到连续的token失效时,自动触发强制刷新: 258 | 259 | #### 工作原理 260 | 261 | 1. **连续失效检测** 262 | 263 | - 系统跟踪连续失效的token数量 264 | - 当连续两个token失效时触发自动刷新 265 | - 仅在token池数量大于2时启用(避免小规模token池误触发) 266 | 2. **智能触发条件** 267 | 268 | - 连续失效阈值:2个token 269 | - 最小token池大小:3个token 270 | - 自动更新必须启用:`ENABLE_TOKEN_AUTO_UPDATE=true` 271 | 3. **自动刷新过程** 272 | 273 | - 异步执行,不阻塞当前API请求 274 | - 使用原子性更新机制 275 | - 刷新成功后自动重新加载token池 276 | - 重置连续失效计数器 277 | 278 | #### 监控和管理 279 | 280 | ```bash 281 | # 查看连续失效状态 282 | curl http://localhost:8001/admin/tokens/consecutive-failures 283 | 284 | # 响应示例 285 | { 286 | "status": "success", 287 | "data": { 288 | "consecutive_failures": 1, 289 | "threshold": 2, 290 | "token_pool_size": 710, 291 | "auto_refresh_enabled": true, 292 | "last_check": "实时检测" 293 | } 294 | } 295 | 296 | # 手动重置连续失效计数 297 | curl -X POST http://localhost:8001/admin/tokens/reset-consecutive 298 | ``` 299 | 300 | ### Token自动更新机制 301 | 302 | #### 功能说明 303 | 304 | Token自动更新机制允许系统定期从账户文件自动生成新的token池,无需手动维护tokens.txt文件。 305 | 306 | #### 配置步骤 307 | 308 | 1. **准备账户文件** 309 | 310 | 创建 `accounts.txt` 文件,每行一个JSON格式的账户信息: 311 | 312 | ```json 313 | {"email": "user1@example.com", "k2_password": "password1"} 314 | {"email": "user2@example.com", "k2_password": "password2"} 315 | {"email": "user3@example.com", "k2_password": "password3"} 316 | ``` 317 | 318 | 2. **启用自动更新** 319 | 320 | 在 `.env` 文件中配置: 321 | 322 | ```bash 323 | # 启用token自动更新 324 | ENABLE_TOKEN_AUTO_UPDATE=true 325 | 326 | # 设置更新间隔(秒) 327 | TOKEN_UPDATE_INTERVAL=86400 # 每24小时更新一次 328 | 329 | # 配置文件路径 330 | ACCOUNTS_FILE=accounts.txt 331 | TOKENS_FILE=tokens.txt 332 | GET_TOKENS_SCRIPT=get_tokens.py 333 | 334 | # 可选:配置代理(如果需要) 335 | PROXY_URL=http://username:password@proxy_host:proxy_port 336 | ``` 337 | 338 | 3. **更新触发方式** 339 | 340 | 系统支持多种更新触发方式: 341 | 342 | - **定时更新**: 按照设置的间隔定期更新 343 | - **连续失效触发**: 当连续两个token失效时自动触发 344 | - **手动强制更新**: 通过API手动触发更新 345 | - **启动时更新**: 如果token文件为空或无效,启动时立即更新 346 | 347 | #### 原子性更新机制 348 | 349 | 为了确保token更新过程中服务的连续性,系统采用了原子性更新机制: 350 | 351 | 1. **临时文件生成**: 新token首先写入 `tokens.txt.tmp` 临时文件 352 | 2. **验证检查**: 确认临时文件存在且不为空 353 | 3. **备份当前文件**: 将现有 `tokens.txt` 重命名为 `tokens.txt.backup` 354 | 4. **原子性替换**: 将临时文件重命名为 `tokens.txt` 355 | 5. **重新加载**: 通知token管理器重新加载新的token池 356 | 357 | #### 更新状态监控 358 | 359 | 通过管理接口可以实时监控更新状态: 360 | 361 | ```bash 362 | # 查看详细更新状态 363 | curl http://localhost:8001/admin/tokens/updater/status 364 | 365 | # 响应示例 366 | { 367 | "status": "success", 368 | "data": { 369 | "is_running": true, 370 | "is_updating": false, 371 | "update_interval": 86400, 372 | "last_update": "2024-01-01T12:00:00", 373 | "update_count": 5, 374 | "error_count": 0, 375 | "last_error": null, 376 | "next_update": "2024-01-01T13:00:00", 377 | "files": { 378 | "get_tokens_script": true, 379 | "accounts_file": true, 380 | "tokens_file": true 381 | } 382 | } 383 | } 384 | ``` 385 | 386 | #### 服务保障特性 387 | 388 | - ✅ **零停机时间**: 更新过程中API服务保持可用 389 | - ✅ **请求不中断**: 正在处理的请求不会受到影响 390 | - ✅ **自动恢复**: 连续失效时自动触发刷新 391 | - ✅ **回滚机制**: 更新失败时保留原有token文件 392 | - ✅ **状态透明**: 可实时查看更新进度和状态 393 | - ✅ **错误处理**: 更新失败时记录详细错误信息 394 | 395 | ## 工具调用功能 396 | 397 | K2Think API 代理支持 OpenAI Function Calling 规范的工具调用功能。 398 | 399 | ### 功能特性 400 | 401 | - ✅ 支持 OpenAI 标准的 `tools` 和 `tool_choice` 参数 402 | - ✅ 自动工具提示注入和消息处理 403 | - ✅ 流式和非流式响应中的工具调用检测 404 | - ✅ 智能 JSON 解析和工具调用提取 405 | - ✅ 支持多种工具调用格式(JSON 代码块、内联 JSON、自然语言) 406 | 407 | ### 使用示例 408 | 409 | ```python 410 | import openai 411 | 412 | client = openai.OpenAI( 413 | base_url="http://localhost:8001/v1", 414 | api_key="sk-k2think" 415 | ) 416 | 417 | # 定义工具 418 | tools = [ 419 | { 420 | "type": "function", 421 | "function": { 422 | "name": "get_weather", 423 | "description": "获取指定城市的天气信息", 424 | "parameters": { 425 | "type": "object", 426 | "properties": { 427 | "city": { 428 | "type": "string", 429 | "description": "城市名称,例如:北京、上海" 430 | }, 431 | "unit": { 432 | "type": "string", 433 | "enum": ["celsius", "fahrenheit"], 434 | "description": "温度单位" 435 | } 436 | }, 437 | "required": ["city"] 438 | } 439 | } 440 | } 441 | ] 442 | 443 | # 发送工具调用请求 444 | response = client.chat.completions.create( 445 | model="MBZUAI-IFM/K2-Think", 446 | messages=[ 447 | {"role": "user", "content": "北京今天天气怎么样?"} 448 | ], 449 | tools=tools, 450 | tool_choice="auto" # auto, none, required 或指定特定工具 451 | ) 452 | 453 | # 处理响应 454 | if response.choices[0].message.tool_calls: 455 | for tool_call in response.choices[0].message.tool_calls: 456 | function_name = tool_call.function.name 457 | function_args = tool_call.function.arguments 458 | print(f"调用工具: {function_name}") 459 | print(f"参数: {function_args}") 460 | ``` 461 | 462 | ### tool_choice 参数说明 463 | 464 | - `"auto"`: 让模型自动决定是否使用工具(推荐) 465 | - `"none"`: 禁用工具调用 466 | - `"required"`: 强制模型使用工具 467 | - `{"type": "function", "function": {"name": "tool_name"}}`: 强制使用特定工具 468 | 469 | ## Python SDK 使用示例 470 | 471 | ```python 472 | import openai 473 | 474 | # 配置客户端 475 | client = openai.OpenAI( 476 | base_url="http://localhost:8001/v1", 477 | api_key="sk-k2think" 478 | ) 479 | 480 | # 发送聊天请求 481 | response = client.chat.completions.create( 482 | model="MBZUAI-IFM/K2-Think", 483 | messages=[ 484 | {"role": "user", "content": "解释一下量子计算的基本原理"} 485 | ], 486 | stream=False 487 | ) 488 | 489 | print(response.choices[0].message.content) 490 | 491 | # 流式聊天 492 | stream = client.chat.completions.create( 493 | model="MBZUAI-IFM/K2-Think", 494 | messages=[ 495 | {"role": "user", "content": "写一首关于人工智能的诗"} 496 | ], 497 | stream=True 498 | ) 499 | 500 | for chunk in stream: 501 | if chunk.choices[0].delta.content is not None: 502 | print(chunk.choices[0].delta.content, end="") 503 | ``` 504 | 505 | ## 模型特性 506 | 507 | K2-Think 模型具有以下特点: 508 | 509 | - **推理能力**: 模型会先进行思考过程,然后给出答案 510 | - **响应格式**: 使用 `` 和 `` 标签结构化输出 511 | - **思考内容控制**: 512 | - `MBZUAI-IFM/K2-Think`: 包含完整的思考过程 513 | - `MBZUAI-IFM/K2-Think-nothink`: 仅输出最终答案 514 | - **多语言支持**: 支持中文、英文等多种语言 515 | - **专业领域**: 在数学、科学、编程等领域表现优秀 516 | 517 | ## 完整配置示例 518 | 519 | ### .env 文件示例 520 | 521 | ```bash 522 | # 基础配置 523 | VALID_API_KEY=sk-k2think 524 | HOST=0.0.0.0 525 | PORT=8001 526 | 527 | # Token管理 528 | TOKENS_FILE=tokens.txt 529 | MAX_TOKEN_FAILURES=3 530 | 531 | # Token自动更新(推荐) 532 | ENABLE_TOKEN_AUTO_UPDATE=true 533 | TOKEN_UPDATE_INTERVAL=86400 # 24小时 534 | ACCOUNTS_FILE=accounts.txt 535 | GET_TOKENS_SCRIPT=get_tokens.py 536 | 537 | # 代理配置(可选) 538 | PROXY_URL=http://username:password@proxy.example.com:8080 539 | 540 | # 功能开关 541 | ENABLE_TOOLIFY=true 542 | DEBUG_LOGGING=false 543 | 544 | # 工具调用配置(可选) 545 | # TOOLIFY_CUSTOM_PROMPT="自定义提示词模板" 546 | ``` 547 | 548 | ### accounts.txt 文件示例 549 | 550 | ```json 551 | {"email": "user1@example.com", "k2_password": "password1"} 552 | {"email": "user2@example.com", "k2_password": "password2"} 553 | ``` 554 | 555 | ## 故障排除 556 | 557 | ### 常见问题 558 | 559 | 1. **Token 相关问题** 560 | 561 | - **所有token失效**: 访问 `/admin/tokens/stats` 查看token状态,使用 `/admin/tokens/reset-all` 重置所有token 562 | - **连续失效**: 查看 `/admin/tokens/consecutive-failures` 了解连续失效状态,系统会自动触发刷新 563 | - **添加新token**: 564 | - 手动模式:编辑 `tokens.txt` 文件添加新token,然后访问 `/admin/tokens/reload` 重新加载 565 | - 自动模式:编辑 `accounts.txt` 添加新账户,然后访问 `/admin/tokens/updater/force-update` 强制更新 566 | - **查看token状态**: 访问 `/health` 端点查看简要统计,或 `/admin/tokens/stats` 查看详细信息 567 | - **自动更新问题**: 568 | - 访问 `/admin/tokens/updater/status` 查看更新器状态和错误信息 569 | - 检查 `is_updating` 字段确认是否正在更新中 570 | - 查看 `last_error` 字段了解最近的错误信息 571 | 2. **端口冲突** 572 | 573 | - 修改 `PORT` 环境变量 574 | - 或使用 Docker 端口映射 575 | 576 | ### 日志查看 577 | 578 | ```bash 579 | # Docker 容器日志 580 | docker logs k2think-api 581 | 582 | # docker-compose日志 583 | docker-compose logs -f k2think-api 584 | 585 | # 本地运行日志 586 | # 日志会直接输出到控制台 587 | ``` 588 | 589 | ### 配置检查 590 | 591 | 使用配置检查脚本验证你的环境变量设置: 592 | 593 | ```bash 594 | # 检查当前配置 595 | python check_config_simple.py 596 | 597 | # 查看配置示例 598 | python check_config_simple.py --example 599 | ``` 600 | 601 | ### Docker部署注意事项 602 | 603 | 1. **文件映射** 604 | 605 | - `tokens.txt` 通过volume映射到容器内,支持动态更新 606 | - 如果启用自动更新,`tokens.txt` 不能设置为只读(`:ro`) 607 | - `accounts.txt` 映射为只读,包含账户信息用于自动更新 608 | - `.env` 文件包含所有环境变量配置 609 | 2. **健康检查** 610 | 611 | - Docker容器包含健康检查机制 612 | - 可通过 `docker ps` 查看健康状态 613 | 3. **安全考虑** 614 | 615 | - 容器以非root用户运行 616 | - 敏感文件通过volume挂载而非打包到镜像中 617 | 618 | ## 许可证 619 | 620 | MIT License 621 | 622 | ## 贡献 623 | 624 | 欢迎提交 Issue 和 Pull Request! 625 | -------------------------------------------------------------------------------- /src/token_manager.py: -------------------------------------------------------------------------------- 1 | """ 2 | Token管理模块 3 | 负责管理K2Think的token池,实现轮询、负载均衡和失效标记 4 | """ 5 | import os 6 | import json 7 | import logging 8 | import threading 9 | 10 | from typing import List, Dict, Optional, Tuple 11 | from datetime import datetime, timedelta 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | # 导入安全日志函数 16 | try: 17 | from src.utils import safe_log_error, safe_log_info, safe_log_warning 18 | except ImportError: 19 | # 如果导入失败,提供简单的替代函数 20 | def safe_log_error(logger, msg, exc=None): 21 | try: 22 | if exc: 23 | logger.error(f"{msg}: {str(exc)}") 24 | else: 25 | logger.error(msg) 26 | except: 27 | print(f"Log error: {msg}") 28 | 29 | def safe_log_info(logger, msg): 30 | try: 31 | logger.info(msg) 32 | except: 33 | print(f"Log info: {msg}") 34 | 35 | def safe_log_warning(logger, msg): 36 | try: 37 | logger.warning(msg) 38 | except: 39 | print(f"Log warning: {msg}") 40 | 41 | class TokenManager: 42 | """Token管理器 - 支持轮询、负载均衡和失效标记""" 43 | 44 | def __init__(self, tokens_file: str = "tokens.txt", max_failures: int = 3, allow_empty: bool = False): 45 | """ 46 | 初始化token管理器 47 | 48 | Args: 49 | tokens_file: token文件路径 50 | max_failures: 最大失败次数,超过后标记为失效 51 | allow_empty: 是否允许空的token文件(用于自动更新模式) 52 | """ 53 | self.tokens_file = tokens_file 54 | self.max_failures = max_failures 55 | self.tokens: List[Dict] = [] 56 | self.current_index = 0 57 | self.lock = threading.Lock() 58 | self.allow_empty = allow_empty 59 | 60 | # 连续失效检测 61 | self.consecutive_failures = 0 62 | self.consecutive_failure_threshold = 2 # 连续失效阈值 63 | self.force_refresh_callback = None # 强制刷新回调函数 64 | 65 | # 上游服务连续报错检测 66 | self.consecutive_upstream_errors = 0 67 | self.upstream_error_threshold = 2 # 上游服务连续报错阈值 68 | self.last_upstream_error_time = None 69 | 70 | # 加载tokens 71 | self.load_tokens() 72 | 73 | if not self.tokens and not allow_empty: 74 | raise ValueError(f"未找到有效的token,请检查文件: {tokens_file}") 75 | 76 | def load_tokens(self) -> None: 77 | """从文件加载token列表""" 78 | try: 79 | if not os.path.exists(self.tokens_file): 80 | raise FileNotFoundError(f"Token文件不存在: {self.tokens_file}") 81 | 82 | with open(self.tokens_file, 'r', encoding='utf-8') as f: 83 | lines = f.readlines() 84 | 85 | self.tokens = [] 86 | valid_token_index = 0 87 | for line in lines: 88 | token = line.strip() 89 | # 忽略空行和注释行 90 | if token and not token.startswith('#'): 91 | self.tokens.append({ 92 | 'token': token, 93 | 'failures': 0, 94 | 'is_active': True, 95 | 'last_used': None, 96 | 'last_failure': None, 97 | 'index': valid_token_index 98 | }) 99 | valid_token_index += 1 100 | 101 | safe_log_info(logger, f"成功加载 {len(self.tokens)} 个token") 102 | 103 | except Exception as e: 104 | safe_log_error(logger, "加载token文件失败", e) 105 | raise 106 | 107 | 108 | def get_next_token(self) -> Optional[str]: 109 | """ 110 | 获取下一个可用的token(轮询算法) 111 | 112 | Returns: 113 | 可用的token字符串,如果没有可用token则返回None 114 | """ 115 | with self.lock: 116 | active_tokens = [t for t in self.tokens if t['is_active']] 117 | 118 | if not active_tokens: 119 | if self.allow_empty: 120 | safe_log_warning(logger, "没有可用的token,可能正在等待自动更新") 121 | else: 122 | safe_log_warning(logger, "没有可用的token") 123 | return None 124 | 125 | # 轮询算法:从当前索引开始寻找下一个可用token 126 | attempts = 0 127 | while attempts < len(self.tokens): 128 | token_info = self.tokens[self.current_index] 129 | 130 | if token_info['is_active']: 131 | # 更新使用时间 132 | token_info['last_used'] = datetime.now() 133 | token = token_info['token'] 134 | 135 | # 移动到下一个索引 136 | self.current_index = (self.current_index + 1) % len(self.tokens) 137 | 138 | logger.debug(f"分配token (索引: {token_info['index']}, 失败次数: {token_info['failures']})") 139 | return token 140 | 141 | # 移动到下一个token 142 | self.current_index = (self.current_index + 1) % len(self.tokens) 143 | attempts += 1 144 | 145 | safe_log_warning(logger, "所有token都已失效") 146 | return None 147 | 148 | def mark_token_failure(self, token: str, error_message: str = "") -> bool: 149 | """ 150 | 标记token使用失败 151 | 152 | Args: 153 | token: 失败的token 154 | error_message: 错误信息 155 | 156 | Returns: 157 | 如果token被标记为失效返回True,否则返回False 158 | """ 159 | with self.lock: 160 | for token_info in self.tokens: 161 | if token_info['token'] == token: 162 | token_info['failures'] += 1 163 | token_info['last_failure'] = datetime.now() 164 | 165 | # 检查是否是上游服务错误(401等认证错误) 166 | is_upstream_error = self._is_upstream_error(error_message) 167 | 168 | if is_upstream_error: 169 | # 增加上游服务连续报错计数 170 | self.consecutive_upstream_errors += 1 171 | self.last_upstream_error_time = datetime.now() 172 | 173 | safe_log_warning(logger, f"🔒 上游服务认证错误 (索引: {token_info['index']}, " 174 | f"失败次数: {token_info['failures']}/{self.max_failures}, " 175 | f"连续上游错误: {self.consecutive_upstream_errors}): {error_message}") 176 | 177 | # 401错误立即触发强制刷新(不等连续错误阈值) 178 | if "401" in error_message and self.force_refresh_callback: 179 | safe_log_warning(logger, f"🚨 检测到401认证错误,立即触发token强制刷新") 180 | self._trigger_force_refresh("401认证失败") 181 | # 重置连续计数,避免重复触发 182 | self.consecutive_upstream_errors = 0 183 | else: 184 | # 其他上游错误按原逻辑处理 185 | self._check_consecutive_upstream_errors() 186 | else: 187 | # 增加连续失效计数 188 | self.consecutive_failures += 1 189 | 190 | safe_log_warning(logger, f"Token失败 (索引: {token_info['index']}, " 191 | f"失败次数: {token_info['failures']}/{self.max_failures}, " 192 | f"连续失效: {self.consecutive_failures}): {error_message}") 193 | 194 | # 检查连续失效触发条件 195 | self._check_consecutive_failures() 196 | 197 | # 检查是否达到最大失败次数 198 | if token_info['failures'] >= self.max_failures: 199 | token_info['is_active'] = False 200 | safe_log_error(logger, f"Token已失效 (索引: {token_info['index']}, " 201 | f"失败次数: {token_info['failures']})") 202 | return True 203 | 204 | return False 205 | 206 | safe_log_warning(logger, "未找到匹配的token进行失败标记") 207 | return False 208 | 209 | def mark_token_success(self, token: str) -> None: 210 | """ 211 | 标记token使用成功(重置失败计数) 212 | 213 | Args: 214 | token: 成功的token 215 | """ 216 | with self.lock: 217 | for token_info in self.tokens: 218 | if token_info['token'] == token: 219 | if token_info['failures'] > 0: 220 | safe_log_info(logger, f"Token恢复 (索引: {token_info['index']}, " 221 | f"重置失败次数: {token_info['failures']} -> 0)") 222 | token_info['failures'] = 0 223 | 224 | # 成功请求重置上游服务错误计数 225 | if self.consecutive_upstream_errors > 0: 226 | safe_log_info(logger, f"重置上游服务连续错误计数: {self.consecutive_upstream_errors} -> 0") 227 | self.consecutive_upstream_errors = 0 228 | 229 | # 注意:不再自动重置连续失效计数,只有手动重置或强制刷新成功后才重置 230 | return 231 | 232 | def get_token_stats(self) -> Dict: 233 | """ 234 | 获取token池统计信息 235 | 236 | Returns: 237 | 包含统计信息的字典 238 | """ 239 | with self.lock: 240 | total = len(self.tokens) 241 | active = sum(1 for t in self.tokens if t['is_active']) 242 | inactive = total - active 243 | 244 | failure_distribution = {} 245 | for token_info in self.tokens: 246 | failures = token_info['failures'] 247 | failure_distribution[failures] = failure_distribution.get(failures, 0) + 1 248 | 249 | return { 250 | 'total_tokens': total, 251 | 'active_tokens': active, 252 | 'inactive_tokens': inactive, 253 | 'current_index': self.current_index, 254 | 'failure_distribution': failure_distribution, 255 | 'max_failures': self.max_failures 256 | } 257 | 258 | def reset_token(self, token_index: int) -> bool: 259 | """ 260 | 重置指定索引的token(清除失败计数,重新激活) 261 | 262 | Args: 263 | token_index: token索引 264 | 265 | Returns: 266 | 重置成功返回True,否则返回False 267 | """ 268 | with self.lock: 269 | if 0 <= token_index < len(self.tokens): 270 | token_info = self.tokens[token_index] 271 | old_failures = token_info['failures'] 272 | old_active = token_info['is_active'] 273 | 274 | token_info['failures'] = 0 275 | token_info['is_active'] = True 276 | token_info['last_failure'] = None 277 | 278 | safe_log_info(logger, f"Token重置 (索引: {token_index}, " 279 | f"失败次数: {old_failures} -> 0, " 280 | f"状态: {old_active} -> True)") 281 | return True 282 | 283 | safe_log_warning(logger, f"无效的token索引: {token_index}") 284 | return False 285 | 286 | def reset_all_tokens(self) -> None: 287 | """重置所有token(清除所有失败计数,重新激活所有token)""" 288 | with self.lock: 289 | reset_count = 0 290 | for token_info in self.tokens: 291 | if token_info['failures'] > 0 or not token_info['is_active']: 292 | token_info['failures'] = 0 293 | token_info['is_active'] = True 294 | token_info['last_failure'] = None 295 | reset_count += 1 296 | 297 | safe_log_info(logger, f"重置了 {reset_count} 个token,当前活跃token数: {len(self.tokens)}") 298 | 299 | def reload_tokens(self) -> None: 300 | """重新加载token文件""" 301 | safe_log_info(logger, "重新加载token文件...") 302 | old_count = len(self.tokens) 303 | self.load_tokens() 304 | new_count = len(self.tokens) 305 | 306 | safe_log_info(logger, f"Token重新加载完成: {old_count} -> {new_count}") 307 | 308 | def get_token_by_index(self, index: int) -> Optional[Dict]: 309 | """根据索引获取token信息""" 310 | with self.lock: 311 | if 0 <= index < len(self.tokens): 312 | return self.tokens[index].copy() 313 | return None 314 | 315 | def set_force_refresh_callback(self, callback): 316 | """ 317 | 设置强制刷新回调函数 318 | 319 | Args: 320 | callback: 当需要强制刷新时调用的异步函数 321 | """ 322 | self.force_refresh_callback = callback 323 | safe_log_info(logger, "已设置强制刷新回调函数") 324 | 325 | def _is_upstream_error(self, error_message: str) -> bool: 326 | """ 327 | 判断是否为上游服务错误 328 | 329 | Args: 330 | error_message: 错误信息 331 | 332 | Returns: 333 | 如果是上游服务错误返回True,否则返回False 334 | """ 335 | # 检查常见的上游服务错误标识 336 | upstream_error_indicators = [ 337 | "上游服务错误: 401", 338 | "上游服务错误: 403", 339 | "401", 340 | "403", 341 | "unauthorized", 342 | "forbidden", 343 | "invalid token", 344 | "authentication failed", 345 | "token expired", 346 | "authentication error", 347 | "invalid_request_error", 348 | "authentication_error" 349 | ] 350 | 351 | error_lower = error_message.lower() 352 | is_upstream = any(indicator.lower() in error_lower for indicator in upstream_error_indicators) 353 | 354 | # 特别检查HTTP状态码模式 355 | import re 356 | # 匹配 "上游服务错误: xxx" 或 "HTTP状态错误: xxx" 等格式中的401/403 357 | status_code_pattern = r'(?:上游服务错误|http状态错误|状态码):\s*(?:40[13])' 358 | if re.search(status_code_pattern, error_lower): 359 | is_upstream = True 360 | 361 | if is_upstream: 362 | safe_log_info(logger, f"检测到上游服务认证错误: {error_message}") 363 | 364 | return is_upstream 365 | 366 | def _check_consecutive_upstream_errors(self): 367 | """ 368 | 检查上游服务连续报错情况,触发强制刷新机制 369 | """ 370 | if self.consecutive_upstream_errors >= self.upstream_error_threshold: 371 | safe_log_warning(logger, f"🚨 检测到连续{self.consecutive_upstream_errors}个上游服务认证错误(401/403),触发自动刷新token池") 372 | 373 | # 重置上游错误计数,避免重复触发 374 | self.consecutive_upstream_errors = 0 375 | 376 | if self.force_refresh_callback: 377 | self._trigger_force_refresh("上游服务连续认证失败 (401/403)") 378 | else: 379 | safe_log_warning(logger, "⚠️ 未设置强制刷新回调函数,无法自动刷新token池") 380 | 381 | def _check_consecutive_failures(self): 382 | """ 383 | 检查连续失效情况,触发强制刷新机制 384 | """ 385 | # 只有在token池数量大于2时才检查连续失效 386 | if len(self.tokens) <= 2: 387 | logger.debug(f"Token池数量({len(self.tokens)})不足,跳过连续失效检查") 388 | return 389 | 390 | if self.consecutive_failures >= self.consecutive_failure_threshold: 391 | safe_log_warning(logger, f"检测到连续{self.consecutive_failures}个token失效,触发强制刷新机制") 392 | 393 | if self.force_refresh_callback: 394 | self._trigger_force_refresh("连续token失效") 395 | else: 396 | safe_log_warning(logger, "未设置强制刷新回调函数,无法自动刷新token池") 397 | 398 | def _trigger_force_refresh(self, reason: str): 399 | """ 400 | 触发强制刷新 401 | 402 | Args: 403 | reason: 触发原因 404 | """ 405 | try: 406 | # 异步调用强制刷新 407 | import asyncio 408 | import threading 409 | 410 | def run_async_callback(): 411 | try: 412 | # 创建新的事件循环(如果当前线程没有) 413 | try: 414 | loop = asyncio.get_event_loop() 415 | except RuntimeError: 416 | loop = asyncio.new_event_loop() 417 | asyncio.set_event_loop(loop) 418 | 419 | # 运行强制刷新(现在是同步函数) 420 | self.force_refresh_callback() 421 | 422 | safe_log_info(logger, f"🔄 强制刷新tokens.txt已触发 - 原因: {reason}") 423 | 424 | except Exception as e: 425 | safe_log_error(logger, "执行强制刷新回调失败", e) 426 | 427 | # 在新线程中执行,避免阻塞当前操作 428 | refresh_thread = threading.Thread(target=run_async_callback, daemon=True) 429 | refresh_thread.start() 430 | 431 | except Exception as e: 432 | safe_log_error(logger, "启动强制刷新线程失败", e) 433 | 434 | def get_consecutive_failures(self) -> int: 435 | """获取当前连续失效次数""" 436 | return self.consecutive_failures 437 | 438 | def get_consecutive_upstream_errors(self) -> int: 439 | """获取当前上游服务连续错误次数""" 440 | return self.consecutive_upstream_errors 441 | 442 | def reset_consecutive_failures(self): 443 | """重置连续失效计数""" 444 | with self.lock: 445 | old_count = self.consecutive_failures 446 | old_upstream_count = self.consecutive_upstream_errors 447 | 448 | self.consecutive_failures = 0 449 | self.consecutive_upstream_errors = 0 450 | 451 | if old_count > 0: 452 | safe_log_info(logger, f"手动重置连续失效计数: {old_count} -> 0") 453 | if old_upstream_count > 0: 454 | safe_log_info(logger, f"手动重置上游服务连续错误计数: {old_upstream_count} -> 0") 455 | 456 | 457 | -------------------------------------------------------------------------------- /src/response_processor.py: -------------------------------------------------------------------------------- 1 | """ 2 | 响应处理模块 3 | 处理流式和非流式响应的所有逻辑 4 | """ 5 | import json 6 | import time 7 | import asyncio 8 | import logging 9 | import uuid 10 | from datetime import datetime 11 | from typing import Dict, AsyncGenerator, Tuple, Optional 12 | import pytz 13 | import httpx 14 | 15 | from src.constants import ( 16 | APIConstants, ResponseConstants, ContentConstants, 17 | NumericConstants, TimeConstants, HeaderConstants 18 | ) 19 | from src.exceptions import UpstreamError, TimeoutError as ProxyTimeoutError 20 | from src.utils import safe_log_error, safe_log_info, safe_log_warning 21 | from src.toolify_config import get_toolify 22 | from src.toolify.detector import StreamingFunctionCallDetector 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | class ResponseProcessor: 27 | """响应处理器""" 28 | 29 | def __init__(self, config): 30 | self.config = config 31 | 32 | def extract_answer_content(self, full_content: str, output_thinking: bool = True) -> str: 33 | """删除第一个标签和最后一个标签,保留内容""" 34 | if not full_content: 35 | return full_content 36 | 37 | # 完全通过模型名控制思考内容输出,默认显示思考内容 38 | should_output_thinking = output_thinking 39 | 40 | if should_output_thinking: 41 | # 删除第一个 42 | answer_start = full_content.find(ContentConstants.ANSWER_START_TAG) 43 | if answer_start != -1: 44 | full_content = full_content[:answer_start] + full_content[answer_start + len(ContentConstants.ANSWER_START_TAG):] 45 | 46 | # 删除最后一个 47 | answer_end = full_content.rfind(ContentConstants.ANSWER_END_TAG) 48 | if answer_end != -1: 49 | full_content = full_content[:answer_end] + full_content[answer_end + len(ContentConstants.ANSWER_END_TAG):] 50 | 51 | return full_content.strip() 52 | else: 53 | # 删除部分(包括标签) 54 | think_start = full_content.find(ContentConstants.THINK_START_TAG) 55 | think_end = full_content.find(ContentConstants.THINK_END_TAG) 56 | if think_start != -1 and think_end != -1: 57 | full_content = full_content[:think_start] + full_content[think_end + len(ContentConstants.THINK_END_TAG):] 58 | 59 | # 删除标签及其内容之外的部分 60 | answer_start = full_content.find(ContentConstants.ANSWER_START_TAG) 61 | answer_end = full_content.rfind(ContentConstants.ANSWER_END_TAG) 62 | if answer_start != -1 and answer_end != -1: 63 | content = full_content[answer_start + len(ContentConstants.ANSWER_START_TAG):answer_end] 64 | return content.strip() 65 | 66 | return full_content.strip() 67 | 68 | def calculate_dynamic_chunk_size(self, content_length: int) -> int: 69 | """ 70 | 动态计算流式输出的chunk大小 71 | 确保总输出时间不超过MAX_STREAM_TIME秒 72 | 73 | Args: 74 | content_length: 待输出内容的总长度 75 | 76 | Returns: 77 | int: 动态计算的chunk大小,最小为50 78 | """ 79 | if content_length <= 0: 80 | return self.config.STREAM_CHUNK_SIZE 81 | 82 | # 计算需要的总chunk数量以满足时间限制 83 | # 总时间 = chunk数量 * STREAM_DELAY 84 | # chunk数量 = content_length / chunk_size 85 | # 所以:总时间 = (content_length / chunk_size) * STREAM_DELAY 86 | # 解出:chunk_size = (content_length * STREAM_DELAY) / MAX_STREAM_TIME 87 | 88 | calculated_chunk_size = int((content_length * self.config.STREAM_DELAY) / self.config.MAX_STREAM_TIME) 89 | 90 | # 确保chunk_size不小于最小值 91 | dynamic_chunk_size = max(calculated_chunk_size, NumericConstants.MIN_CHUNK_SIZE) 92 | 93 | # 如果计算出的chunk_size太大(比如内容很短),使用默认值 94 | if dynamic_chunk_size > content_length: 95 | dynamic_chunk_size = min(self.config.STREAM_CHUNK_SIZE, content_length) 96 | 97 | logger.debug(f"动态chunk_size计算: 内容长度={content_length}, 计算值={calculated_chunk_size}, 最终值={dynamic_chunk_size}") 98 | 99 | return dynamic_chunk_size 100 | 101 | def content_to_multimodal(self, content) -> str | list[dict]: 102 | """将内容转换为多模态格式用于K2Think API""" 103 | if content is None: 104 | return "" 105 | if isinstance(content, str): 106 | return content 107 | if isinstance(content, list): 108 | # 检查是否包含图像内容 109 | has_image = False 110 | result_parts = [] 111 | 112 | for p in content: 113 | if hasattr(p, 'type'): # ContentPart object 114 | if getattr(p, 'type') == ContentConstants.TEXT_TYPE and getattr(p, 'text', None): 115 | result_parts.append({ 116 | "type": ContentConstants.TEXT_TYPE, 117 | "text": getattr(p, 'text') 118 | }) 119 | elif getattr(p, 'type') == ContentConstants.IMAGE_URL_TYPE and getattr(p, 'image_url', None): 120 | has_image = True 121 | image_url_obj = getattr(p, 'image_url') 122 | if hasattr(image_url_obj, 'url'): 123 | url = getattr(image_url_obj, 'url') 124 | else: 125 | url = image_url_obj.get('url') if isinstance(image_url_obj, dict) else str(image_url_obj) 126 | 127 | result_parts.append({ 128 | "type": ContentConstants.IMAGE_URL_TYPE, 129 | "image_url": { 130 | "url": url 131 | } 132 | }) 133 | elif isinstance(p, dict): 134 | if p.get("type") == ContentConstants.TEXT_TYPE and p.get("text"): 135 | result_parts.append({ 136 | "type": ContentConstants.TEXT_TYPE, 137 | "text": p.get("text") 138 | }) 139 | elif p.get("type") == ContentConstants.IMAGE_URL_TYPE and p.get("image_url"): 140 | has_image = True 141 | result_parts.append({ 142 | "type": ContentConstants.IMAGE_URL_TYPE, 143 | "image_url": p.get("image_url") 144 | }) 145 | elif isinstance(p, str): 146 | result_parts.append({ 147 | "type": ContentConstants.TEXT_TYPE, 148 | "text": p 149 | }) 150 | 151 | # 如果包含图像,返回多模态格式;否则返回纯文本 152 | if has_image and result_parts: 153 | return result_parts 154 | else: 155 | # 提取所有文本内容 156 | text_parts = [] 157 | for part in result_parts: 158 | if part.get("type") == ContentConstants.TEXT_TYPE: 159 | text_parts.append(part.get("text", "")) 160 | return " ".join(text_parts) 161 | 162 | # 处理其他类型 163 | try: 164 | return str(content) 165 | except: 166 | return "" 167 | 168 | def get_current_datetime_info(self) -> Dict[str, str]: 169 | """获取当前时间信息""" 170 | # 设置时区为上海 171 | tz = pytz.timezone(ContentConstants.DEFAULT_TIMEZONE) 172 | now = datetime.now(tz) 173 | 174 | return { 175 | "{{USER_NAME}}": ContentConstants.DEFAULT_USER_NAME, 176 | "{{USER_LOCATION}}": ContentConstants.DEFAULT_USER_LOCATION, 177 | "{{CURRENT_DATETIME}}": now.strftime(TimeConstants.DATETIME_FORMAT), 178 | "{{CURRENT_DATE}}": now.strftime(TimeConstants.DATE_FORMAT), 179 | "{{CURRENT_TIME}}": now.strftime(TimeConstants.TIME_FORMAT), 180 | "{{CURRENT_WEEKDAY}}": now.strftime(TimeConstants.WEEKDAY_FORMAT), 181 | "{{CURRENT_TIMEZONE}}": ContentConstants.DEFAULT_TIMEZONE, 182 | "{{USER_LANGUAGE}}": ContentConstants.DEFAULT_USER_LANGUAGE 183 | } 184 | 185 | def generate_session_id(self) -> str: 186 | """生成会话ID""" 187 | return str(uuid.uuid4()) 188 | 189 | def generate_chat_id(self) -> str: 190 | """生成聊天ID""" 191 | return str(uuid.uuid4()) 192 | 193 | async def create_http_client(self) -> httpx.AsyncClient: 194 | """创建HTTP客户端""" 195 | base_kwargs = { 196 | "timeout": httpx.Timeout(timeout=None, connect=10.0), 197 | "limits": httpx.Limits( 198 | max_keepalive_connections=self.config.MAX_KEEPALIVE_CONNECTIONS, 199 | max_connections=self.config.MAX_CONNECTIONS 200 | ), 201 | "follow_redirects": True 202 | } 203 | 204 | try: 205 | return httpx.AsyncClient(**base_kwargs) 206 | except Exception as e: 207 | safe_log_error(logger, "创建客户端失败", e) 208 | raise e 209 | 210 | async def make_request( 211 | self, 212 | method: str, 213 | url: str, 214 | headers: dict, 215 | json_data: dict = None, 216 | stream: bool = False 217 | ) -> httpx.Response: 218 | """发送HTTP请求""" 219 | client = None 220 | 221 | try: 222 | client = await self.create_http_client() 223 | 224 | if stream: 225 | # 流式请求返回context manager 226 | return client.stream(method, url, headers=headers, json=json_data, timeout=None) 227 | else: 228 | response = await client.request( 229 | method, url, headers=headers, json=json_data, 230 | timeout=self.config.REQUEST_TIMEOUT 231 | ) 232 | 233 | # 详细记录非200响应 234 | if response.status_code != APIConstants.HTTP_OK: 235 | safe_log_error(logger, f"上游API返回错误状态码: {response.status_code}") 236 | safe_log_error(logger, f"响应头: {dict(response.headers)}") 237 | try: 238 | error_body = response.text 239 | safe_log_error(logger, f"错误响应体: {error_body}") 240 | except: 241 | safe_log_error(logger, "无法读取错误响应体") 242 | 243 | response.raise_for_status() 244 | return response 245 | 246 | except httpx.HTTPStatusError as e: 247 | safe_log_error(logger, f"HTTP状态错误: {e.response.status_code} - {e.response.text}") 248 | if client and not stream: 249 | await client.aclose() 250 | raise UpstreamError(f"上游服务错误: {e.response.status_code}", e.response.status_code) 251 | except httpx.TimeoutException as e: 252 | safe_log_error(logger, "请求超时", e) 253 | if client and not stream: 254 | await client.aclose() 255 | raise ProxyTimeoutError("请求超时") 256 | except Exception as e: 257 | safe_log_error(logger, "请求异常", e) 258 | if client and not stream: 259 | await client.aclose() 260 | raise e 261 | 262 | async def process_non_stream_response(self, k2think_payload: dict, headers: dict, output_thinking: bool = None) -> Tuple[str, dict]: 263 | """处理非流式响应""" 264 | try: 265 | response = await self.make_request( 266 | "POST", 267 | self.config.K2THINK_API_URL, 268 | headers, 269 | k2think_payload, 270 | stream=False 271 | ) 272 | 273 | # K2Think 非流式请求返回标准JSON格式 274 | result = response.json() 275 | 276 | # 提取内容 277 | full_content = "" 278 | if result.get('choices') and len(result['choices']) > 0: 279 | choice = result['choices'][0] 280 | if choice.get('message') and choice['message'].get('content'): 281 | raw_content = choice['message']['content'] 282 | # 提取标签中的内容,去除标签 283 | full_content = self.extract_answer_content(raw_content, output_thinking) 284 | 285 | # 提取token信息 286 | token_info = result.get('usage', { 287 | "prompt_tokens": NumericConstants.DEFAULT_PROMPT_TOKENS, 288 | "completion_tokens": NumericConstants.DEFAULT_COMPLETION_TOKENS, 289 | "total_tokens": NumericConstants.DEFAULT_TOTAL_TOKENS 290 | }) 291 | 292 | await response.aclose() 293 | return full_content, token_info 294 | 295 | except Exception as e: 296 | safe_log_error(logger, "处理非流式响应错误", e) 297 | raise 298 | 299 | async def process_stream_response( 300 | self, 301 | k2think_payload: dict, 302 | headers: dict, 303 | output_thinking: bool = None, 304 | original_model: str = None, 305 | enable_toolify: bool = False 306 | ) -> AsyncGenerator[str, None]: 307 | """处理流式响应""" 308 | try: 309 | # 发送开始chunk 310 | start_chunk = self._create_chunk_data( 311 | delta={"role": "assistant", "content": ""}, 312 | finish_reason=None, 313 | model=original_model 314 | ) 315 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(start_chunk)}\n\n" 316 | 317 | # 优化的模拟流式输出 - 立即开始获取响应并流式发送 318 | k2think_payload_copy = k2think_payload.copy() 319 | k2think_payload_copy["stream"] = False 320 | 321 | headers_copy = headers.copy() 322 | headers_copy[HeaderConstants.ACCEPT] = HeaderConstants.APPLICATION_JSON 323 | 324 | # 获取完整响应 325 | full_content, token_info = await self.process_non_stream_response(k2think_payload_copy, headers_copy, output_thinking) 326 | 327 | if not full_content: 328 | yield ResponseConstants.STREAM_DONE_MARKER 329 | return 330 | 331 | # 检测工具调用(如果启用) 332 | toolify_detector = None 333 | if enable_toolify: 334 | toolify = get_toolify() 335 | if toolify: 336 | toolify_detector = StreamingFunctionCallDetector(toolify.trigger_signal) 337 | safe_log_info(logger, "[TOOLIFY] 流式工具调用检测器已初始化") 338 | 339 | # 发送内容(支持工具调用检测) 340 | if toolify_detector: 341 | # 使用工具调用检测器处理内容 342 | async for chunk in self._stream_content_with_tool_detection( 343 | full_content, original_model, toolify_detector, k2think_payload.get("chat_id", "") 344 | ): 345 | yield chunk 346 | else: 347 | # 正常流式发送 348 | async for chunk in self._stream_content(full_content, original_model): 349 | yield chunk 350 | 351 | # 发送结束chunk 352 | end_chunk = self._create_chunk_data( 353 | delta={}, 354 | finish_reason=ResponseConstants.FINISH_REASON_STOP, 355 | model=original_model 356 | ) 357 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(end_chunk)}\n\n" 358 | yield ResponseConstants.STREAM_DONE_MARKER 359 | 360 | except Exception as e: 361 | safe_log_error(logger, "流式响应处理错误", e) 362 | 363 | # 发送错误信息作为流式响应的一部分,而不是抛出异常 364 | if "401" in str(e) or "unauthorized" in str(e).lower(): 365 | # 401错误:显示tokens强制刷新消息 366 | error_message = "🔄 tokens强制刷新已启动,请稍后再试" 367 | safe_log_info(logger, "检测到401错误,向客户端发送强制刷新提示") 368 | else: 369 | # 其他错误:显示一般错误信息 370 | error_message = f"请求处理失败: {str(e)}" 371 | 372 | # 发送错误内容作为正常的流式响应 373 | error_chunk = self._create_chunk_data( 374 | delta={"content": f"\n\n{error_message}"}, 375 | finish_reason=None, 376 | model=original_model 377 | ) 378 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(error_chunk)}\n\n" 379 | 380 | # 发送结束chunk 381 | end_chunk = self._create_chunk_data( 382 | delta={}, 383 | finish_reason=ResponseConstants.FINISH_REASON_ERROR, 384 | model=original_model 385 | ) 386 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(end_chunk)}\n\n" 387 | yield ResponseConstants.STREAM_DONE_MARKER 388 | 389 | # 重新抛出异常以便上层处理token失败(在发送友好消息之后) 390 | # 上层会捕获这个异常并调用token_manager.mark_token_failure 391 | raise e 392 | 393 | async def _stream_content(self, content: str, model: str = None) -> AsyncGenerator[str, None]: 394 | """流式发送内容""" 395 | chunk_size = self.calculate_dynamic_chunk_size(len(content)) 396 | 397 | for i in range(0, len(content), chunk_size): 398 | chunk_content = content[i:i + chunk_size] 399 | 400 | chunk = self._create_chunk_data( 401 | delta={"content": chunk_content}, 402 | finish_reason=None, 403 | model=model 404 | ) 405 | 406 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(chunk)}\n\n" 407 | # 添加延迟模拟真实流式效果 408 | await asyncio.sleep(self.config.STREAM_DELAY) 409 | 410 | async def _stream_content_with_tool_detection( 411 | self, 412 | content: str, 413 | model: str, 414 | detector: StreamingFunctionCallDetector, 415 | chat_id: str 416 | ) -> AsyncGenerator[str, None]: 417 | """流式发送内容并检测工具调用""" 418 | chunk_size = self.calculate_dynamic_chunk_size(len(content)) 419 | 420 | for i in range(0, len(content), chunk_size): 421 | chunk_content = content[i:i + chunk_size] 422 | 423 | # 使用检测器处理chunk 424 | is_tool_detected, content_to_yield = detector.process_chunk(chunk_content) 425 | 426 | if is_tool_detected: 427 | safe_log_info(logger, "[TOOLIFY] 检测到工具调用触发信号") 428 | 429 | # 输出处理后的内容 430 | if content_to_yield: 431 | chunk = self._create_chunk_data( 432 | delta={"content": content_to_yield}, 433 | finish_reason=None, 434 | model=model 435 | ) 436 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(chunk)}\n\n" 437 | 438 | await asyncio.sleep(self.config.STREAM_DELAY) 439 | 440 | # 流结束时的最终处理 441 | parsed_tools, remaining_content = detector.finalize() 442 | 443 | # 输出剩余内容 444 | if remaining_content: 445 | safe_log_info(logger, f"[TOOLIFY] 输出缓冲区剩余内容: {len(remaining_content)}字符") 446 | chunk = self._create_chunk_data( 447 | delta={"content": remaining_content}, 448 | finish_reason=None, 449 | model=model 450 | ) 451 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(chunk)}\n\n" 452 | 453 | # 如果检测到工具调用,输出工具调用结果 454 | if parsed_tools: 455 | safe_log_info(logger, f"[TOOLIFY] 检测到 {len(parsed_tools)} 个工具调用") 456 | from src.toolify_handler import format_toolify_response_for_stream 457 | tool_chunks = format_toolify_response_for_stream(parsed_tools, model, chat_id) 458 | for chunk in tool_chunks: 459 | yield chunk 460 | else: 461 | # 没有工具调用,正常结束 462 | end_chunk = self._create_chunk_data( 463 | delta={}, 464 | finish_reason=ResponseConstants.FINISH_REASON_STOP, 465 | model=model 466 | ) 467 | yield f"{ResponseConstants.STREAM_DATA_PREFIX}{json.dumps(end_chunk)}\n\n" 468 | yield ResponseConstants.STREAM_DONE_MARKER 469 | 470 | def _create_chunk_data(self, delta: dict, finish_reason: Optional[str], model: str = None) -> dict: 471 | """创建流式响应chunk数据""" 472 | return { 473 | "id": f"chatcmpl-{int(time.time() * 1000)}", 474 | "object": ResponseConstants.CHAT_COMPLETION_CHUNK_OBJECT, 475 | "created": int(time.time()), 476 | "model": model or APIConstants.MODEL_ID, 477 | "choices": [{ 478 | "index": 0, 479 | "delta": delta, 480 | "finish_reason": finish_reason 481 | }] 482 | } 483 | 484 | def create_completion_response( 485 | self, 486 | content: Optional[str], 487 | token_info: Optional[dict] = None, 488 | model: str = None 489 | ) -> dict: 490 | """创建完整的聊天补全响应""" 491 | message = { 492 | "role": "assistant", 493 | "content": content, 494 | } 495 | 496 | return { 497 | "id": f"chatcmpl-{int(time.time())}", 498 | "object": ResponseConstants.CHAT_COMPLETION_OBJECT, 499 | "created": int(time.time()), 500 | "model": model or APIConstants.MODEL_ID, 501 | "choices": [{ 502 | "index": 0, 503 | "message": message, 504 | "finish_reason": ResponseConstants.FINISH_REASON_STOP 505 | }], 506 | "usage": token_info or { 507 | "prompt_tokens": NumericConstants.DEFAULT_PROMPT_TOKENS, 508 | "completion_tokens": NumericConstants.DEFAULT_COMPLETION_TOKENS, 509 | "total_tokens": NumericConstants.DEFAULT_TOTAL_TOKENS 510 | } 511 | } -------------------------------------------------------------------------------- /src/api_handler.py: -------------------------------------------------------------------------------- 1 | """ 2 | API处理模块 3 | 处理主要的API路由逻辑 4 | """ 5 | import json 6 | import time 7 | import asyncio 8 | import logging 9 | from typing import Dict, List 10 | from fastapi import HTTPException, Request 11 | from fastapi.responses import StreamingResponse, JSONResponse 12 | 13 | from src.config import Config 14 | from src.constants import ( 15 | APIConstants, ResponseConstants, LogMessages, 16 | ErrorMessages, HeaderConstants 17 | ) 18 | from src.exceptions import ( 19 | AuthenticationError, SerializationError, 20 | K2ThinkProxyError, UpstreamError 21 | ) 22 | from src.models import ChatCompletionRequest, ModelsResponse, ModelInfo 23 | from src.response_processor import ResponseProcessor 24 | from src.token_manager import TokenManager 25 | from src.utils import safe_log_error, safe_log_info, safe_log_warning 26 | from src.toolify_handler import should_enable_toolify, prepare_toolify_request 27 | 28 | logger = logging.getLogger(__name__) 29 | 30 | class APIHandler: 31 | """API处理器""" 32 | 33 | def __init__(self, config: Config): 34 | self.config = config 35 | self.response_processor = ResponseProcessor(config) 36 | self.token_manager = config.get_token_manager() 37 | 38 | def validate_api_key(self, authorization: str) -> bool: 39 | """验证API密钥""" 40 | if not authorization or not authorization.startswith(APIConstants.BEARER_PREFIX): 41 | return False 42 | api_key = authorization[APIConstants.BEARER_PREFIX_LENGTH:] # 移除 "Bearer " 前缀 43 | return api_key == self.config.VALID_API_KEY 44 | 45 | def should_output_thinking(self, model_name: str) -> bool: 46 | """根据模型名判断是否应该输出思考内容""" 47 | return model_name != APIConstants.MODEL_ID_NOTHINK 48 | 49 | def get_actual_model_id(self, model_name: str) -> str: 50 | """获取实际的模型ID(将nothink版本映射回原始模型)""" 51 | if model_name == APIConstants.MODEL_ID_NOTHINK: 52 | return APIConstants.MODEL_ID 53 | return model_name 54 | 55 | async def get_models(self) -> ModelsResponse: 56 | """获取模型列表""" 57 | model_info_standard = ModelInfo( 58 | id=APIConstants.MODEL_ID, 59 | created=int(time.time()), 60 | owned_by=APIConstants.MODEL_OWNER, 61 | root=APIConstants.MODEL_ROOT 62 | ) 63 | model_info_nothink = ModelInfo( 64 | id=APIConstants.MODEL_ID_NOTHINK, 65 | created=int(time.time()), 66 | owned_by=APIConstants.MODEL_OWNER, 67 | root=APIConstants.MODEL_ROOT 68 | ) 69 | return ModelsResponse(data=[model_info_standard, model_info_nothink]) 70 | 71 | async def chat_completions(self, request: ChatCompletionRequest, auth_request: Request): 72 | """处理聊天补全请求""" 73 | # 验证API密钥 74 | authorization = auth_request.headers.get(HeaderConstants.AUTHORIZATION, "") 75 | if not self.validate_api_key(authorization): 76 | raise AuthenticationError() 77 | 78 | # 判断是否应该输出思考内容 79 | output_thinking = self.should_output_thinking(request.model) 80 | actual_model_id = self.get_actual_model_id(request.model) 81 | 82 | try: 83 | # 处理消息 84 | raw_messages = self._process_raw_messages(request.messages) 85 | 86 | # 检查是否需要启用工具调用 87 | request_dict = request.model_dump() 88 | enable_toolify = should_enable_toolify(request_dict) 89 | 90 | # 如果启用工具调用,预处理消息并注入提示词 91 | if enable_toolify: 92 | safe_log_info(logger, "[TOOLIFY] 工具调用功能已启用") 93 | raw_messages, _ = prepare_toolify_request(request_dict, raw_messages) 94 | 95 | self._log_request_info(raw_messages) 96 | 97 | # 构建K2Think请求 98 | k2think_payload = self._build_k2think_payload( 99 | request, raw_messages, actual_model_id 100 | ) 101 | 102 | # 验证JSON序列化 103 | self._validate_json_serialization(k2think_payload) 104 | 105 | # 处理响应(带重试机制) 106 | if request.stream: 107 | return await self._handle_stream_response_with_retry( 108 | request, k2think_payload, output_thinking, enable_toolify 109 | ) 110 | else: 111 | return await self._handle_non_stream_response_with_retry( 112 | request, k2think_payload, output_thinking, enable_toolify 113 | ) 114 | 115 | except K2ThinkProxyError: 116 | # 重新抛出自定义异常 117 | raise 118 | except Exception as e: 119 | safe_log_error(logger, "API转发错误", e) 120 | raise HTTPException( 121 | status_code=APIConstants.HTTP_INTERNAL_ERROR, 122 | detail={ 123 | "error": { 124 | "message": str(e), 125 | "type": ErrorMessages.API_ERROR 126 | } 127 | } 128 | ) 129 | 130 | def _process_raw_messages(self, messages: List) -> List[Dict]: 131 | """处理原始消息""" 132 | raw_messages = [] 133 | for msg in messages: 134 | try: 135 | raw_messages.append({ 136 | "role": msg.role, 137 | "content": msg.content # 保持原始格式,稍后再转换 138 | }) 139 | except Exception as e: 140 | safe_log_error(logger, f"处理消息时出错, 消息: {msg}", e) 141 | # 使用默认值 142 | raw_messages.append({ 143 | "role": msg.role, 144 | "content": str(msg.content) if msg.content else "" 145 | }) 146 | return raw_messages 147 | 148 | def _log_request_info(self, raw_messages: List[Dict]): 149 | """记录请求信息""" 150 | safe_log_info(logger, LogMessages.MESSAGE_RECEIVED.format(len(raw_messages))) 151 | 152 | # 记录原始消息的角色分布 153 | role_count = {} 154 | for msg in raw_messages: 155 | role = msg.get("role", "unknown") 156 | role_count[role] = role_count.get(role, 0) + 1 157 | safe_log_info(logger, LogMessages.ROLE_DISTRIBUTION.format("原始", role_count)) 158 | 159 | def _build_k2think_payload( 160 | self, 161 | request: ChatCompletionRequest, 162 | processed_messages: List[Dict], 163 | actual_model_id: str = None 164 | ) -> Dict: 165 | """构建K2Think请求负载""" 166 | # 构建K2Think格式的请求体 - 支持多模态内容 167 | k2think_messages = [] 168 | for msg in processed_messages: 169 | try: 170 | # 使用多模态内容转换函数 171 | content = self.response_processor.content_to_multimodal(msg.get("content", "")) 172 | k2think_messages.append({ 173 | "role": msg["role"], 174 | "content": content 175 | }) 176 | except Exception as e: 177 | safe_log_error(logger, f"构建K2Think消息时出错, 消息: {msg}", e) 178 | # 使用安全的默认值 179 | fallback_content = str(msg.get("content", "")) 180 | k2think_messages.append({ 181 | "role": msg.get("role", "user"), 182 | "content": fallback_content 183 | }) 184 | 185 | # 使用实际的模型ID 186 | model_id = actual_model_id or APIConstants.MODEL_ID 187 | 188 | return { 189 | "stream": request.stream, 190 | "model": model_id, 191 | "messages": k2think_messages, 192 | "params": {}, 193 | "tool_servers": [], 194 | "features": { 195 | "image_generation": False, 196 | "code_interpreter": False, 197 | "web_search": False 198 | }, 199 | "variables": self.response_processor.get_current_datetime_info(), 200 | "model_item": { 201 | "id": model_id, 202 | "object": ResponseConstants.MODEL_OBJECT, 203 | "owned_by": APIConstants.MODEL_OWNER, 204 | "root": APIConstants.MODEL_ROOT, 205 | "parent": None, 206 | "status": "active", 207 | "connection_type": "external", 208 | "name": model_id 209 | }, 210 | "background_tasks": { 211 | "title_generation": True, 212 | "tags_generation": True 213 | }, 214 | "chat_id": self.response_processor.generate_chat_id(), 215 | "id": self.response_processor.generate_session_id(), 216 | "session_id": self.response_processor.generate_session_id() 217 | } 218 | 219 | def _validate_json_serialization(self, k2think_payload: Dict): 220 | """验证JSON序列化""" 221 | try: 222 | # 测试JSON序列化 223 | json.dumps(k2think_payload, ensure_ascii=False) 224 | safe_log_info(logger, LogMessages.JSON_VALIDATION_SUCCESS) 225 | except Exception as e: 226 | safe_log_error(logger, LogMessages.JSON_VALIDATION_FAILED.format(e)) 227 | # 尝试修复序列化问题 228 | try: 229 | k2think_payload = json.loads(json.dumps(k2think_payload, default=str, ensure_ascii=False)) 230 | safe_log_info(logger, LogMessages.JSON_FIXED) 231 | except Exception as fix_error: 232 | safe_log_error(logger, "无法修复序列化问题", fix_error) 233 | raise SerializationError() 234 | 235 | def _build_request_headers(self, request: ChatCompletionRequest, k2think_payload: Dict, token: str) -> Dict[str, str]: 236 | """构建请求头""" 237 | return { 238 | HeaderConstants.ACCEPT: ( 239 | HeaderConstants.EVENT_STREAM_JSON if request.stream 240 | else HeaderConstants.APPLICATION_JSON 241 | ), 242 | HeaderConstants.CONTENT_TYPE: HeaderConstants.APPLICATION_JSON, 243 | HeaderConstants.AUTHORIZATION: f"{APIConstants.BEARER_PREFIX}{token}", 244 | HeaderConstants.ORIGIN: "https://www.k2think.ai", 245 | HeaderConstants.REFERER: "https://www.k2think.ai/c/" + k2think_payload["chat_id"], 246 | HeaderConstants.USER_AGENT: HeaderConstants.DEFAULT_USER_AGENT 247 | } 248 | 249 | async def _handle_stream_response( 250 | self, 251 | k2think_payload: Dict, 252 | headers: Dict[str, str], 253 | output_thinking: bool = True, 254 | original_model: str = None 255 | ) -> StreamingResponse: 256 | """处理流式响应""" 257 | return StreamingResponse( 258 | self.response_processor.process_stream_response( 259 | k2think_payload, headers, output_thinking, original_model 260 | ), 261 | media_type=HeaderConstants.TEXT_EVENT_STREAM, 262 | headers={ 263 | HeaderConstants.CACHE_CONTROL: HeaderConstants.NO_CACHE, 264 | HeaderConstants.CONNECTION: HeaderConstants.KEEP_ALIVE, 265 | HeaderConstants.X_ACCEL_BUFFERING: HeaderConstants.NO_BUFFERING 266 | } 267 | ) 268 | 269 | async def _handle_non_stream_response( 270 | self, 271 | k2think_payload: Dict, 272 | headers: Dict[str, str], 273 | output_thinking: bool = True, 274 | original_model: str = None 275 | ) -> JSONResponse: 276 | """处理非流式响应""" 277 | full_content, token_info = await self.response_processor.process_non_stream_response( 278 | k2think_payload, headers, output_thinking 279 | ) 280 | 281 | openai_response = self.response_processor.create_completion_response( 282 | full_content, token_info, original_model 283 | ) 284 | 285 | return JSONResponse(content=openai_response) 286 | 287 | async def _handle_stream_response_with_retry( 288 | self, 289 | request: ChatCompletionRequest, 290 | k2think_payload: Dict, 291 | output_thinking: bool = True, 292 | enable_toolify: bool = False, 293 | max_retries: int = 3 294 | ) -> StreamingResponse: 295 | """处理流式响应(带重试机制)""" 296 | last_exception = None 297 | 298 | for attempt in range(max_retries): 299 | # 获取下一个可用token 300 | token = self.token_manager.get_next_token() 301 | if not token: 302 | # 根据是否启用自动更新提供不同的错误信息 303 | if Config.ENABLE_TOKEN_AUTO_UPDATE: 304 | error_message = "Token池暂时为空,可能正在自动更新中。请稍后重试或检查自动更新服务状态。" 305 | safe_log_warning(logger, "没有可用的token,可能正在自动更新中") 306 | else: 307 | error_message = "所有token都已失效,请检查token配置或重新加载token文件。" 308 | safe_log_error(logger, "没有可用的token") 309 | 310 | raise HTTPException( 311 | status_code=APIConstants.HTTP_SERVICE_UNAVAILABLE, 312 | detail={ 313 | "error": { 314 | "message": error_message, 315 | "type": ErrorMessages.API_ERROR 316 | } 317 | } 318 | ) 319 | 320 | # 构建请求头 321 | headers = self._build_request_headers(request, k2think_payload, token) 322 | 323 | try: 324 | safe_log_info(logger, f"尝试流式请求 (第{attempt + 1}次)") 325 | 326 | # 创建流式生成器,内部处理token成功/失败标记 327 | async def stream_generator(): 328 | try: 329 | async for chunk in self.response_processor.process_stream_response( 330 | k2think_payload, headers, output_thinking, request.model, enable_toolify 331 | ): 332 | yield chunk 333 | # 流式响应成功完成,标记token成功 334 | self.token_manager.mark_token_success(token) 335 | except Exception as e: 336 | # 流式响应过程中出现错误,标记token失败 337 | safe_log_warning(logger, f"🔍 流式响应异常被捕获,准备标记token失败: {str(e)}") 338 | 339 | # 标记token失败(这会触发自动刷新逻辑) 340 | token_failed = self.token_manager.mark_token_failure(token, str(e)) 341 | 342 | # 特别处理401错误 343 | if "401" in str(e) or "unauthorized" in str(e).lower(): 344 | safe_log_warning(logger, f"🔒 流式响应中检测到401认证错误,token标记失败: {token_failed}") 345 | safe_log_info(logger, f"🚨 已调用mark_token_failure,应该触发自动刷新") 346 | else: 347 | safe_log_warning(logger, f"流式响应中检测到其他错误: {str(e)}") 348 | 349 | # 注意:不重新抛出异常,避免"response already started"错误 350 | # 错误信息已经通过response_processor发送给客户端 351 | 352 | return StreamingResponse( 353 | stream_generator(), 354 | media_type=HeaderConstants.TEXT_EVENT_STREAM, 355 | headers={ 356 | HeaderConstants.CACHE_CONTROL: HeaderConstants.NO_CACHE, 357 | HeaderConstants.CONNECTION: HeaderConstants.KEEP_ALIVE, 358 | HeaderConstants.X_ACCEL_BUFFERING: HeaderConstants.NO_BUFFERING 359 | } 360 | ) 361 | except (UpstreamError, Exception) as e: 362 | # 这里只处理流式响应启动前的异常(主要是连接错误) 363 | # 401等上游服务错误现在在流式响应内部处理,不会到达这里 364 | last_exception = e 365 | safe_log_warning(logger, f"流式请求启动失败 (第{attempt + 1}次): {e}") 366 | 367 | # 标记token失败 368 | token_failed = self.token_manager.mark_token_failure(token, str(e)) 369 | if token_failed: 370 | safe_log_error(logger, f"Token已被标记为失效") 371 | 372 | # 如果是最后一次尝试,抛出异常 373 | if attempt == max_retries - 1: 374 | break 375 | 376 | # 短暂延迟后重试 377 | await asyncio.sleep(0.5) 378 | 379 | # 所有重试都失败了 380 | safe_log_error(logger, "所有流式请求重试都失败了,最后错误", last_exception) 381 | raise HTTPException( 382 | status_code=APIConstants.HTTP_INTERNAL_ERROR, 383 | detail={ 384 | "error": { 385 | "message": f"流式请求失败: {str(last_exception)}", 386 | "type": ErrorMessages.API_ERROR 387 | } 388 | } 389 | ) 390 | 391 | async def _handle_non_stream_response_with_retry( 392 | self, 393 | request: ChatCompletionRequest, 394 | k2think_payload: Dict, 395 | output_thinking: bool = True, 396 | enable_toolify: bool = False, 397 | max_retries: int = 3 398 | ) -> JSONResponse: 399 | """处理非流式响应(带重试机制)""" 400 | last_exception = None 401 | 402 | for attempt in range(max_retries): 403 | # 获取下一个可用token 404 | token = self.token_manager.get_next_token() 405 | if not token: 406 | # 根据是否启用自动更新提供不同的错误信息 407 | if Config.ENABLE_TOKEN_AUTO_UPDATE: 408 | error_message = "Token池暂时为空,可能正在自动更新中。请稍后重试或检查自动更新服务状态。" 409 | safe_log_warning(logger, "没有可用的token,可能正在自动更新中") 410 | else: 411 | error_message = "所有token都已失效,请检查token配置或重新加载token文件。" 412 | safe_log_error(logger, "没有可用的token") 413 | 414 | raise HTTPException( 415 | status_code=APIConstants.HTTP_SERVICE_UNAVAILABLE, 416 | detail={ 417 | "error": { 418 | "message": error_message, 419 | "type": ErrorMessages.API_ERROR 420 | } 421 | } 422 | ) 423 | 424 | # 构建请求头 425 | headers = self._build_request_headers(request, k2think_payload, token) 426 | 427 | try: 428 | safe_log_info(logger, f"尝试非流式请求 (第{attempt + 1}次)") 429 | 430 | # 处理响应 431 | full_content, token_info = await self.response_processor.process_non_stream_response( 432 | k2think_payload, headers, output_thinking 433 | ) 434 | 435 | # 标记token成功 436 | self.token_manager.mark_token_success(token) 437 | 438 | # 检查是否有工具调用 439 | tool_response = None 440 | if enable_toolify: 441 | from src.toolify_handler import parse_toolify_response 442 | tool_response = parse_toolify_response(full_content, request.model) 443 | 444 | if tool_response: 445 | # 返回包含tool_calls的响应 446 | openai_response = { 447 | "id": f"chatcmpl-{int(time.time())}", 448 | "object": ResponseConstants.CHAT_COMPLETION_OBJECT, 449 | "created": int(time.time()), 450 | "model": request.model, 451 | "choices": [{ 452 | "index": 0, 453 | "message": tool_response, 454 | "finish_reason": "tool_calls" 455 | }], 456 | "usage": token_info or { 457 | "prompt_tokens": 0, 458 | "completion_tokens": 0, 459 | "total_tokens": 0 460 | } 461 | } 462 | else: 463 | openai_response = self.response_processor.create_completion_response( 464 | full_content, token_info, request.model 465 | ) 466 | 467 | return JSONResponse(content=openai_response) 468 | 469 | except (UpstreamError, Exception) as e: 470 | last_exception = e 471 | 472 | # 特别处理401错误 473 | if "401" in str(e) or "unauthorized" in str(e).lower(): 474 | safe_log_warning(logger, f"🔒 非流式请求遇到401认证错误 (第{attempt + 1}次): {e}") 475 | 476 | # 对于401错误,如果是第一次尝试,返回友好消息而不重试 477 | if attempt == 0: 478 | # 标记token失败以触发自动刷新 479 | self.token_manager.mark_token_failure(token, str(e)) 480 | 481 | # 返回友好的刷新提示消息 482 | openai_response = self.response_processor.create_completion_response( 483 | content="🔄 tokens强制刷新已启动,请稍后再试", 484 | token_info={ 485 | "prompt_tokens": 0, 486 | "completion_tokens": 10, 487 | "total_tokens": 10 488 | }, 489 | model=request.model 490 | ) 491 | return JSONResponse(content=openai_response) 492 | else: 493 | safe_log_warning(logger, f"非流式请求失败 (第{attempt + 1}次): {e}") 494 | 495 | # 标记token失败 496 | token_failed = self.token_manager.mark_token_failure(token, str(e)) 497 | if token_failed: 498 | safe_log_error(logger, f"Token已被标记为失效") 499 | 500 | # 如果是最后一次尝试,抛出异常 501 | if attempt == max_retries - 1: 502 | break 503 | 504 | # 短暂延迟后重试 505 | await asyncio.sleep(0.5) 506 | 507 | # 所有重试都失败了 508 | safe_log_error(logger, "所有非流式请求重试都失败了,最后错误", last_exception) 509 | raise HTTPException( 510 | status_code=APIConstants.HTTP_INTERNAL_ERROR, 511 | detail={ 512 | "error": { 513 | "message": f"非流式请求失败: {str(last_exception)}", 514 | "type": ErrorMessages.API_ERROR 515 | } 516 | } 517 | ) --------------------------------------------------------------------------------