├── assets ├── logo.png ├── statusBar.png └── thinkingPartDemo.png ├── .prettierrc ├── .gitignore ├── .prettierignore ├── .vscode ├── settings.json ├── tasks.json └── launch.json ├── .vscodeignore ├── .vscode-test.mjs ├── tsconfig.json ├── LICENSE ├── CONTRIBUTING.md ├── eslint.config.mjs ├── .github ├── workflows │ └── release.yml └── copilot-instructions.md ├── src ├── ollama │ ├── ollamaTypes.ts │ └── ollamaApi.ts ├── openai │ ├── openaiTypes.ts │ └── openaiApi.ts ├── anthropic │ ├── anthropicTypes.ts │ └── anthropicApi.ts ├── provideToken.ts ├── vscode.proposed.chatProvider.d.ts ├── types.ts ├── statusBar.ts ├── extension.ts ├── vscode.proposed.languageModelThinkingPart.d.ts ├── vscode.proposed.languageModelDataPart.d.ts ├── provideModel.ts ├── utils.ts ├── commonApi.ts └── provider.ts ├── CHANGELOG.md ├── package.json └── README.md /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohnnyZ93/oai-compatible-copilot/HEAD/assets/logo.png -------------------------------------------------------------------------------- /assets/statusBar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohnnyZ93/oai-compatible-copilot/HEAD/assets/statusBar.png -------------------------------------------------------------------------------- /assets/thinkingPartDemo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohnnyZ93/oai-compatible-copilot/HEAD/assets/thinkingPartDemo.png -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "semi": true, 3 | "useTabs": true, 4 | "tabWidth": 2, 5 | "printWidth": 120, 6 | "trailingComma": "es5" 7 | } 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | dist 3 | node_modules 4 | .vscode-test/ 5 | *.vsix 6 | .DS_Store 7 | CLAUDE.md 8 | .doc 9 | .clinerules 10 | AGENTS.md -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | dist/** 2 | .vscode/** 3 | .vscode-test/** 4 | .git-blame-ignore-revs 5 | **/*.md 6 | **/*.yml 7 | **/*.json 8 | **/*.mjs 9 | **/*vscode.d.ts 10 | .build/** -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "search.exclude": { 3 | "out": true 4 | }, 5 | "git.branchProtection": [ 6 | "main" 7 | ], 8 | "files.trimTrailingWhitespace": true 9 | } -------------------------------------------------------------------------------- /.vscodeignore: -------------------------------------------------------------------------------- 1 | .vscode/** 2 | .vscode-test/** 3 | src/** 4 | .gitignore 5 | .yarnrc 6 | webpack.config.js 7 | vsc-extension-quickstart.md 8 | **/tsconfig.json 9 | **/.eslintrc.json 10 | **/*.map 11 | **/*.ts -------------------------------------------------------------------------------- /.vscode-test.mjs: -------------------------------------------------------------------------------- 1 | import { defineConfig } from '@vscode/test-cli'; 2 | 3 | export default defineConfig({ 4 | files: 'out/test/**/*.test.js', 5 | mocha: { 6 | ui: 'tdd', 7 | timeout: 20000, 8 | color: true 9 | } 10 | }); -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "Node16", 4 | "target": "ES2024", 5 | "lib": ["ES2024", "dom"], 6 | "sourceMap": true, 7 | "rootDir": "src", 8 | "strict": true /* enable all strict type-checking options */, 9 | "outDir": "out", 10 | "skipLibCheck": true /* Skip type checking of declaration files */, 11 | "types": ["node", "mocha"] 12 | /* Additional Checks */ 13 | // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ 14 | // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ 15 | // "noUnusedParameters": true, /* Report errors on unused parameters. */ 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | // See https://go.microsoft.com/fwlink/?LinkId=733558 2 | // for the documentation about the tasks.json format 3 | { 4 | "version": "2.0.0", 5 | "tasks": [ 6 | { 7 | "type": "npm", 8 | "script": "watch", 9 | "problemMatcher": "$tsc-watch", 10 | "isBackground": true, 11 | "presentation": { 12 | "reveal": "never", 13 | "group": "watchers" 14 | }, 15 | "group": { 16 | "kind": "build", 17 | "isDefault": true 18 | } 19 | }, 20 | { 21 | "type": "npm", 22 | "script": "watch-tests", 23 | "problemMatcher": "$tsc-watch", 24 | "isBackground": true, 25 | "presentation": { 26 | "reveal": "never", 27 | "group": "watchers" 28 | }, 29 | "group": "build" 30 | }, 31 | { 32 | "label": "tasks: watch-tests", 33 | "dependsOn": [ 34 | "npm: watch", 35 | "npm: watch-tests" 36 | ], 37 | "problemMatcher": [] 38 | } 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | // A launch configuration that compiles the extension and then opens it inside a new window 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | { 6 | "version": "0.2.0", 7 | "configurations": [ 8 | { 9 | "name": "Run Extension", 10 | "type": "extensionHost", 11 | "request": "launch", 12 | "args": [ 13 | "--extensionDevelopmentPath=${workspaceFolder}" 14 | ], 15 | "outFiles": [ 16 | "${workspaceFolder}/out/**/*.js" 17 | ], 18 | "preLaunchTask": "${defaultBuildTask}" 19 | }, 20 | { 21 | "name": "Extension Tests", 22 | "type": "extensionHost", 23 | "request": "launch", 24 | "args": [ 25 | "--extensionDevelopmentPath=${workspaceFolder}", 26 | "--extensionTestsPath=${workspaceFolder}/out/test/suite/index" 27 | ], 28 | "outFiles": [ 29 | "${workspaceFolder}/out/**/*.js", 30 | "${workspaceFolder}/dist/**/*.js" 31 | ], 32 | "preLaunchTask": "tasks: watch-tests" 33 | } 34 | ] 35 | } 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Johnny Zhao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guide 2 | 3 | Thanks for taking the time to start contributing. This guide will help you get started with the project. 4 | 5 | Also the project welcome serious and willing maintainers. 6 | 7 | ## How to contribute? 8 | 9 | ### Creating an Issue 10 | 11 | For anything else than a typo or a bug fix, please raise an issue to discuss your proposal before submitting any code. 12 | 13 | ### License for contributions 14 | 15 | As the copyright owner, you agree to license your contributions under an irrevocable MIT license. 16 | 17 | ### For Developers: Creating a Pull Request 18 | 19 | **Requirements:** 20 | - VS Code 1.104.0 or higher. 21 | - Node.js 22. 22 | - Your OpenAI-compatible provider API key. 23 | 24 | ```bash 25 | git clone https://github.com/JohnnyZ93/oai-compatible-copilot 26 | cd oai-compatible-copilot 27 | npm install 28 | npm run compile 29 | ``` 30 | Press F5 to launch an Extension Development Host. 31 | 32 | **Common scripts:** 33 | - Build: `npm run compile` 34 | - Watch: `npm run watch` 35 | - Lint: `npm run lint` 36 | - Format: `npm run format` 37 | 38 | ### Tests 39 | 40 | You should use your own OpenAI-compatible provider API key for test. -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | /** 2 | * ESLint configuration for the project. 3 | * 4 | * See https://eslint.style and https://typescript-eslint.io for additional linting options. 5 | */ 6 | // @ts-check 7 | import js from '@eslint/js'; 8 | import tseslint from 'typescript-eslint'; 9 | import stylistic from '@stylistic/eslint-plugin'; 10 | 11 | export default tseslint.config( 12 | { 13 | ignores: [ 14 | '.vscode-test', 15 | 'out', 16 | '**/*.d.ts' 17 | ] 18 | }, 19 | { 20 | files: ['**/*.{js,mjs,cjs,ts,jsx,tsx}'], 21 | }, 22 | js.configs.recommended, 23 | ...tseslint.configs.recommended, 24 | ...tseslint.configs.stylistic, 25 | { 26 | plugins: { 27 | '@stylistic': stylistic 28 | }, 29 | rules: { 30 | 'curly': 'warn', 31 | '@stylistic/semi': ['warn', 'always'], 32 | '@typescript-eslint/no-empty-function': 'off', 33 | '@typescript-eslint/array-type': 'off', 34 | '@typescript-eslint/naming-convention': [ 35 | 'warn', 36 | { 37 | 'selector': 'import', 38 | 'format': ['camelCase', 'PascalCase'] 39 | } 40 | ], 41 | '@typescript-eslint/no-unused-vars': [ 42 | 'error', 43 | { 44 | 'argsIgnorePattern': '^_' 45 | } 46 | ] 47 | } 48 | } 49 | ); -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - 'release/**' 7 | jobs: 8 | package: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | 13 | - name: Verify version matches branch 14 | env: 15 | BRANCH: ${{ github.ref_name }} 16 | run: | 17 | TAG="${BRANCH##*/}" 18 | PKG_VERSION=$(node -p "require('./package.json').version") 19 | if [ "$TAG" != "$PKG_VERSION" ]; then 20 | echo "package.json version ($PKG_VERSION) does not match branch tag ($TAG)"; exit 1; 21 | fi 22 | 23 | - uses: actions/setup-node@v4 24 | with: 25 | node-version: 20 26 | 27 | - run: npm ci 28 | - run: npm run compile 29 | 30 | - name: Package VSIX 31 | run: npx @vscode/vsce package -o extension.vsix 32 | 33 | - name: Upload artifact 34 | uses: actions/upload-artifact@v4 35 | with: 36 | name: vsix 37 | path: extension.vsix 38 | 39 | publish: 40 | runs-on: ubuntu-latest 41 | needs: [package] 42 | steps: 43 | - uses: actions/checkout@v4 44 | 45 | - uses: actions/setup-node@v4 46 | with: 47 | node-version: 20 48 | 49 | - uses: actions/download-artifact@v4 50 | with: 51 | name: vsix 52 | path: . 53 | 54 | - name: Publish to VS Code Marketplace 55 | run: npx @vscode/vsce publish --allow-all-proposed-apis --pat ${{ secrets.MARKETPLACE_TOKEN }} --packagePath ./extension.vsix 56 | -------------------------------------------------------------------------------- /src/ollama/ollamaTypes.ts: -------------------------------------------------------------------------------- 1 | import { OpenAIFunctionToolDef } from "../openai/openaiTypes"; 2 | 3 | /** 4 | * Ollama native API message format 5 | * @see https://docs.ollama.com/api#generate-a-chat-message 6 | */ 7 | export interface OllamaMessage { 8 | role: "system" | "user" | "assistant" | "tool"; 9 | content: string; 10 | images?: string[]; 11 | thinking?: string; 12 | tool_calls?: OllamaToolCall[]; 13 | tool_name?: string; // For tool role messages 14 | } 15 | 16 | /** 17 | * Ollama native API request body 18 | * @see https://docs.ollama.com/api#generate-a-chat-message 19 | */ 20 | export interface OllamaRequestBody { 21 | model: string; 22 | messages: OllamaMessage[]; 23 | stream?: boolean; 24 | think?: boolean | string; 25 | options?: OllamaModelOptions; 26 | tools?: OpenAIFunctionToolDef[]; 27 | } 28 | 29 | /** 30 | * Ollama model options for controlling text generation 31 | * @see https://docs.ollama.com/api#generate-a-chat-message 32 | */ 33 | export interface OllamaModelOptions { 34 | seed?: number; 35 | temperature?: number; 36 | top_k?: number; 37 | top_p?: number; 38 | min_p?: number; 39 | stop?: string | string[]; 40 | num_ctx?: number; 41 | num_predict?: number; 42 | } 43 | 44 | /** 45 | * Ollama tool call format 46 | * @see https://docs.ollama.com/api#tool-calling 47 | */ 48 | export interface OllamaToolCall { 49 | function: { 50 | name: string; 51 | arguments: Record; 52 | }; 53 | } 54 | 55 | /** 56 | * Ollama native API streaming response chunk 57 | */ 58 | export interface OllamaStreamChunk { 59 | model: string; 60 | created_at: string; 61 | message: { 62 | role: string; 63 | content: string; 64 | thinking?: string; 65 | tool_calls?: OllamaToolCall[]; 66 | }; 67 | done: boolean; 68 | done_reason?: string; 69 | } 70 | -------------------------------------------------------------------------------- /src/openai/openaiTypes.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * OpenAI function-call entry emitted by assistant messages. 3 | */ 4 | export interface OpenAIToolCall { 5 | id: string; 6 | type: "function"; 7 | function: { name: string; arguments: string }; 8 | } 9 | 10 | /** 11 | * OpenAI function tool definition used to advertise tools. 12 | */ 13 | export interface OpenAIFunctionToolDef { 14 | type: "function"; 15 | function: { 16 | name: string; 17 | description?: string; 18 | parameters?: object; 19 | }; 20 | } 21 | 22 | /** 23 | * OpenAI-style chat message used for router requests. 24 | */ 25 | export interface OpenAIChatMessage { 26 | role: OpenAIChatRole; 27 | content?: string | ChatMessageContent[]; 28 | name?: string; 29 | tool_calls?: OpenAIToolCall[]; 30 | tool_call_id?: string; 31 | reasoning_content?: string; 32 | } 33 | 34 | /** 35 | * 聊天消息内容接口(支持多模态) 36 | */ 37 | export interface ChatMessageContent { 38 | type: "text" | "image_url"; 39 | text?: string; 40 | image_url?: { 41 | url: string; 42 | }; 43 | } 44 | 45 | /** OpenAI-style chat roles. */ 46 | export type OpenAIChatRole = "system" | "user" | "assistant" | "tool"; 47 | 48 | export interface ReasoningDetailCommon { 49 | id: string | null; 50 | format: string; // e.g., "anthropic-claude-v1", "openai-responses-v1" 51 | index?: number; 52 | } 53 | 54 | export interface ReasoningSummaryDetail extends ReasoningDetailCommon { 55 | type: "reasoning.summary"; 56 | summary: string; 57 | } 58 | 59 | export interface ReasoningEncryptedDetail extends ReasoningDetailCommon { 60 | type: "reasoning.encrypted"; 61 | data: string; // Base64 encoded 62 | } 63 | 64 | export interface ReasoningTextDetail extends ReasoningDetailCommon { 65 | type: "reasoning.text"; 66 | text: string; 67 | signature?: string | null; 68 | } 69 | 70 | export type ReasoningDetail = ReasoningSummaryDetail | ReasoningEncryptedDetail | ReasoningTextDetail; 71 | -------------------------------------------------------------------------------- /src/anthropic/anthropicTypes.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Anthropic API message format 3 | * @see https://docs.anthropic.com/en/api/messages 4 | */ 5 | 6 | export type AnthropicRole = "user" | "assistant"; 7 | 8 | export interface AnthropicTextBlock { 9 | type: "text"; 10 | text: string; 11 | } 12 | 13 | export interface AnthropicImageBlock { 14 | type: "image"; 15 | source: { 16 | type: "base64"; 17 | media_type: string; 18 | data: string; 19 | }; 20 | } 21 | 22 | export interface AnthropicThinkingBlock { 23 | type: "thinking"; 24 | thinking: string; 25 | signature?: string; 26 | } 27 | 28 | export interface AnthropicToolUseBlock { 29 | type: "tool_use"; 30 | id: string; 31 | name: string; 32 | input: Record; 33 | } 34 | 35 | export interface AnthropicToolResultBlock { 36 | type: "tool_result"; 37 | tool_use_id: string; 38 | content: string | AnthropicTextBlock[]; 39 | is_error?: boolean; 40 | } 41 | 42 | export type AnthropicContentBlock = 43 | | AnthropicTextBlock 44 | | AnthropicImageBlock 45 | | AnthropicThinkingBlock 46 | | AnthropicToolUseBlock 47 | | AnthropicToolResultBlock; 48 | 49 | export interface AnthropicMessage { 50 | role: AnthropicRole; 51 | content: string | AnthropicContentBlock[]; 52 | } 53 | 54 | export interface AnthropicRequestBody { 55 | model: string; 56 | messages: AnthropicMessage[]; 57 | max_tokens?: number; 58 | system?: string | AnthropicTextBlock[]; 59 | stream?: boolean; 60 | temperature?: number; 61 | top_p?: number; 62 | top_k?: number; 63 | stop_sequences?: string[]; 64 | metadata?: { 65 | user_id?: string; 66 | }; 67 | service_tier?: "auto" | "standard_only"; 68 | thinking?: { 69 | type: "enabled"; 70 | budget_tokens: number; 71 | }; 72 | tools?: AnthropicToolDefinition[]; 73 | tool_choice?: AnthropicToolChoice; 74 | } 75 | 76 | export interface AnthropicToolDefinition { 77 | name: string; 78 | description?: string; 79 | input_schema?: object; 80 | } 81 | 82 | export type AnthropicToolChoice = 83 | | { type: "auto" } 84 | | { type: "any" } 85 | | { type: "tool"; name: string } 86 | | { type: "none" }; 87 | 88 | export interface AnthropicStreamChunk { 89 | type: 90 | | "message_start" 91 | | "content_block_start" 92 | | "content_block_delta" 93 | | "content_block_stop" 94 | | "message_delta" 95 | | "message_stop" 96 | | "ping" 97 | | "error"; 98 | index?: number; 99 | message?: { 100 | id: string; 101 | type: "message"; 102 | role: "assistant"; 103 | content: AnthropicContentBlock[]; 104 | model: string; 105 | stop_reason?: string; 106 | stop_sequence?: string; 107 | }; 108 | content_block?: { 109 | type: "text" | "thinking" | "tool_use"; 110 | text?: string; 111 | thinking?: string; 112 | id?: string; 113 | name?: string; 114 | input?: Record; 115 | }; 116 | delta?: { 117 | type: "text_delta" | "thinking_delta" | "input_json_delta" | "signature_delta"; 118 | text?: string; 119 | thinking?: string; 120 | partial_json?: string; 121 | signature?: string; 122 | }; 123 | usage?: { 124 | input_tokens: number; 125 | output_tokens: number; 126 | }; 127 | error?: { 128 | type: string; 129 | message: string; 130 | }; 131 | } 132 | -------------------------------------------------------------------------------- /src/provideToken.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from "vscode"; 2 | import { CancellationToken, LanguageModelChatInformation, LanguageModelChatRequestMessage } from "vscode"; 3 | 4 | /** 5 | * Returns the number of tokens for a given text using the model specific tokenizer logic 6 | * @param model The language model to use 7 | * @param text The text to count tokens for 8 | * @param token A cancellation token for the request 9 | * @returns A promise that resolves to the number of tokens 10 | */ 11 | export async function prepareTokenCount( 12 | model: LanguageModelChatInformation, 13 | text: string | LanguageModelChatRequestMessage, 14 | _token: CancellationToken 15 | ): Promise { 16 | if (typeof text === "string") { 17 | // Estimate tokens directly for plain text 18 | return estimateTextTokens(text); 19 | } else { 20 | // For complex messages, calculate tokens for each part separately 21 | let totalTokens = 0; 22 | 23 | for (const part of text.content) { 24 | if (part instanceof vscode.LanguageModelTextPart) { 25 | // Estimate tokens directly for plain text 26 | totalTokens += estimateTextTokens(part.value); 27 | } else if (part instanceof vscode.LanguageModelDataPart) { 28 | // Estimate tokens for image or data parts based on type 29 | if (part.mimeType.startsWith("image/")) { 30 | // Images are approximately 170 tokens 31 | totalTokens += 170; 32 | } else { 33 | // For other binary data, use a more conservative estimate 34 | totalTokens += Math.ceil(part.data.length / 4); 35 | } 36 | } else if (part instanceof vscode.LanguageModelToolCallPart) { 37 | // Tool call token calculation 38 | const toolCallText = `${part.name}(${JSON.stringify(part.input)})`; 39 | totalTokens += estimateTextTokens(toolCallText); 40 | } else if (part instanceof vscode.LanguageModelToolResultPart) { 41 | // Tool result token calculation 42 | const resultText = typeof part.content === "string" ? part.content : JSON.stringify(part.content); 43 | totalTokens += estimateTextTokens(resultText); 44 | } else if (part instanceof vscode.LanguageModelThinkingPart) { 45 | // Thinking Token 46 | const thinkingText = Array.isArray(part.value) ? part.value.join("") : part.value; 47 | totalTokens += estimateTextTokens(thinkingText); 48 | } 49 | } 50 | 51 | // Add fixed overhead for roles and structure 52 | totalTokens += 4; 53 | 54 | return totalTokens; 55 | } 56 | } 57 | 58 | /** Roughly estimate tokens for VS Code chat messages (text only) */ 59 | export function estimateMessagesTokens(msgs: readonly vscode.LanguageModelChatRequestMessage[]): number { 60 | let total = 0; 61 | for (const m of msgs) { 62 | for (const part of m.content) { 63 | if (part instanceof vscode.LanguageModelTextPart) { 64 | total += estimateTextTokens(part.value); 65 | } 66 | } 67 | } 68 | return total; 69 | } 70 | 71 | /** 针对不同内容类型的 token 估算 */ 72 | export function estimateTextTokens(text: string): number { 73 | const chineseChars = (text.match(/[\u4e00-\u9fff]/g) || []).length; 74 | const englishWords = (text.match(/\b[a-zA-Z]+\b/g) || []).length; 75 | const symbols = text.length - chineseChars - englishWords; 76 | 77 | // 中文字符约1.5个token,英文单词约1个token,符号约0.5个token 78 | return Math.ceil(chineseChars * 1.5 + englishWords + symbols * 0.5); 79 | } 80 | 81 | /** Rough token estimate for tool definitions by JSON size */ 82 | export function estimateToolTokens( 83 | tools: { type: string; function: { name: string; description?: string; parameters?: object } }[] | undefined 84 | ): number { 85 | if (!tools || tools.length === 0) { 86 | return 0; 87 | } 88 | try { 89 | const json = JSON.stringify(tools); 90 | return Math.ceil(json.length / 4); 91 | } catch { 92 | return 0; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/vscode.proposed.chatProvider.d.ts: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------------------------------- 2 | * Copyright (c) Microsoft Corporation. All rights reserved. 3 | * Licensed under the MIT License. See License.txt in the project root for license information. 4 | *--------------------------------------------------------------------------------------------*/ 5 | 6 | // version: 4 7 | 8 | declare module "vscode" { 9 | /** 10 | * The provider version of {@linkcode LanguageModelChatRequestOptions} 11 | */ 12 | export interface ProvideLanguageModelChatResponseOptions { 13 | /** 14 | * What extension initiated the request to the language model 15 | */ 16 | readonly requestInitiator: string; 17 | } 18 | 19 | /** 20 | * All the information representing a single language model contributed by a {@linkcode LanguageModelChatProvider}. 21 | */ 22 | export interface LanguageModelChatInformation { 23 | /** 24 | * When present, this gates the use of `requestLanguageModelAccess` behind an authorization flow where 25 | * the user must approve of another extension accessing the models contributed by this extension. 26 | * Additionally, the extension can provide a label that will be shown in the UI. 27 | * A common example of a label is an account name that is signed in. 28 | * 29 | */ 30 | requiresAuthorization?: true | { label: string }; 31 | 32 | /** 33 | * Whether or not this will be selected by default in the model picker 34 | * NOT BEING FINALIZED 35 | */ 36 | readonly isDefault?: boolean; 37 | 38 | /** 39 | * Whether or not the model will show up in the model picker immediately upon being made known via {@linkcode LanguageModelChatProvider.provideLanguageModelChatInformation}. 40 | * NOT BEING FINALIZED 41 | */ 42 | readonly isUserSelectable?: boolean; 43 | 44 | /** 45 | * Optional category to group models by in the model picker. 46 | * The lower the order, the higher the category appears in the list. 47 | * Has no effect if `isUserSelectable` is `false`. 48 | * 49 | * WONT BE FINALIZED 50 | */ 51 | readonly category?: { label: string; order: number }; 52 | 53 | readonly statusIcon?: ThemeIcon; 54 | } 55 | 56 | export interface LanguageModelChatCapabilities { 57 | /** 58 | * The tools the model prefers for making file edits. If not provided or if none of the tools, 59 | * are recognized, the editor will try multiple edit tools and pick the best one. The available 60 | * edit tools WILL change over time and this capability only serves as a hint to the editor. 61 | * 62 | * Edit tools currently recognized include: 63 | * - 'find-replace': Find and replace text in a document. 64 | * - 'multi-find-replace': Find and replace multiple text snippets across documents. 65 | * - 'apply-patch': A file-oriented diff format used by some OpenAI models 66 | * - 'code-rewrite': A general but slower editing tool that allows the model 67 | * to rewrite and code snippet and provide only the replacement to the editor. 68 | * 69 | * The order of edit tools in this array has no significance; all of the recognized edit 70 | * tools will be made available to the model. 71 | */ 72 | readonly editTools?: string[]; 73 | } 74 | 75 | export type LanguageModelResponsePart2 = 76 | | LanguageModelResponsePart 77 | | LanguageModelDataPart 78 | | LanguageModelThinkingPart; 79 | 80 | export interface LanguageModelChatProvider { 81 | provideLanguageModelChatResponse( 82 | model: T, 83 | messages: readonly LanguageModelChatRequestMessage[], 84 | options: ProvideLanguageModelChatResponseOptions, 85 | progress: Progress, 86 | token: CancellationToken 87 | ): Thenable; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * A single underlying provider (e.g., together, groq) for a model. 3 | */ 4 | export interface HFProvider { 5 | provider: string; 6 | status: string; 7 | supports_tools?: boolean; 8 | supports_structured_output?: boolean; 9 | context_length?: number; 10 | } 11 | 12 | /** 13 | * A model entry returned by the Hugging Face router models endpoint. 14 | */ 15 | export interface HFArchitecture { 16 | input_modalities?: string[]; 17 | output_modalities?: string[]; 18 | } 19 | 20 | export interface HFModelItem { 21 | id: string; 22 | object?: string; 23 | created?: number; 24 | owned_by: string; 25 | configId?: string; 26 | displayName?: string; 27 | baseUrl?: string; 28 | providers?: HFProvider[]; 29 | architecture?: HFArchitecture; 30 | context_length?: number; 31 | vision?: boolean; 32 | max_tokens?: number; 33 | // OpenAI new standard parameter 34 | max_completion_tokens?: number; 35 | reasoning_effort?: string; 36 | enable_thinking?: boolean; 37 | thinking_budget?: number; 38 | // New thinking configuration for Zai provider 39 | thinking?: ThinkingConfig; 40 | // Allow null so user can explicitly disable sending this parameter (fall back to provider default) 41 | temperature?: number | null; 42 | // Allow null so user can explicitly disable sending this parameter (fall back to provider default) 43 | top_p?: number | null; 44 | top_k?: number; 45 | min_p?: number; 46 | frequency_penalty?: number; 47 | presence_penalty?: number; 48 | repetition_penalty?: number; 49 | reasoning?: ReasoningConfig; 50 | /** 51 | * Optional family specification for the model. This allows users to specify 52 | * the model family (e.g., "gpt-4", "claude-3", "gemini") to enable family-specific 53 | * optimizations and behaviors in the Copilot extension. If not specified, 54 | * defaults to "oai-compatible". 55 | */ 56 | family?: string; 57 | 58 | /** 59 | * Extra configuration parameters that can be used for custom functionality. 60 | * This allows users to add any additional parameters they might need 61 | * without modifying the core interface. 62 | */ 63 | extra?: Record; 64 | 65 | /** 66 | * Custom HTTP headers to be sent with every request to this model's provider. 67 | * These headers will be merged with the default headers (Authorization, Content-Type, User-Agent). 68 | * Example: { "X-API-Version": "v1", "X-Custom-Header": "value" } 69 | */ 70 | headers?: Record; 71 | 72 | /** 73 | * Whether to include reasoning_content in assistant messages sent to the API. 74 | * Support deepseek-v3.2 or others. 75 | */ 76 | include_reasoning_in_request?: boolean; 77 | 78 | /** 79 | * API mode: "openai" for OpenAI-compatible API, "ollama" for Ollama native API. 80 | * Default is "openai". 81 | */ 82 | apiMode?: "openai" | "ollama" | "anthropic"; 83 | } 84 | 85 | /** 86 | * OpenRouter reasoning configuration 87 | */ 88 | export interface ReasoningConfig { 89 | effort?: string; 90 | exclude?: boolean; 91 | max_tokens?: number; 92 | enabled?: boolean; 93 | } 94 | 95 | /** 96 | * Supplemental model info from the Hugging Face hub API. 97 | */ 98 | // Deprecated: extra model info was previously fetched from the hub API 99 | export interface HFExtraModelInfo { 100 | id: string; 101 | pipeline_tag?: string; 102 | } 103 | 104 | /** 105 | * Response envelope for the router models listing. 106 | */ 107 | export interface HFModelsResponse { 108 | object: string; 109 | data: HFModelItem[]; 110 | } 111 | 112 | /** 113 | * Thinking configuration for Zai provider 114 | */ 115 | export interface ThinkingConfig { 116 | type?: string; 117 | } 118 | 119 | /** 120 | * Retry configuration for rate limiting 121 | */ 122 | export interface RetryConfig { 123 | enabled?: boolean; 124 | max_attempts?: number; 125 | interval_ms?: number; 126 | status_codes?: number[]; 127 | } 128 | -------------------------------------------------------------------------------- /src/statusBar.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from "vscode"; 2 | import { LanguageModelChatInformation, LanguageModelChatRequestMessage, CancellationTokenSource } from "vscode"; 3 | import { prepareTokenCount } from "./provideToken"; 4 | 5 | export function initStatusBar(context: vscode.ExtensionContext): vscode.StatusBarItem { 6 | // Create status bar item for token count display 7 | const tokenCountStatusBarItem = vscode.window.createStatusBarItem(vscode.StatusBarAlignment.Right, 100); 8 | tokenCountStatusBarItem.name = "Token Count"; 9 | tokenCountStatusBarItem.text = "$(symbol-numeric) Ready"; 10 | tokenCountStatusBarItem.tooltip = "Current model token usage - Click to Manage ApiKeys"; 11 | tokenCountStatusBarItem.command = "oaicopilot.setProviderApikey"; 12 | context.subscriptions.push(tokenCountStatusBarItem); 13 | // Show the status bar item initially 14 | tokenCountStatusBarItem.show(); 15 | return tokenCountStatusBarItem; 16 | } 17 | 18 | /** 19 | * Format number to thousands (K, M, B) format 20 | * @param value The number to format 21 | * @returns Formatted string (e.g., "2.3K", "168.0K") 22 | */ 23 | export function formatTokenCount(value: number): string { 24 | if (value >= 1_000_000_000) { 25 | return (value / 1_000_000_000).toFixed(1) + "B"; 26 | } else if (value >= 1_000_000) { 27 | return (value / 1_000_000).toFixed(1) + "M"; 28 | } else if (value >= 1_000) { 29 | return (value / 1_000).toFixed(1) + "K"; 30 | } 31 | return value.toLocaleString(); 32 | } 33 | 34 | /** 35 | * Create a visual progress bar showing token usage 36 | * @param usedTokens Tokens used 37 | * @param maxTokens Maximum tokens available 38 | * @returns Progress bar string (e.g., "▆ 75%") 39 | */ 40 | export function createProgressBar(usedTokens: number, maxTokens: number): string { 41 | const blocks = ["▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"]; 42 | const usagePercentage = Math.min((usedTokens / maxTokens) * 100, 100); 43 | const blockIndex = Math.min(Math.floor((usagePercentage / 100) * blocks.length), blocks.length - 1); 44 | 45 | return `${blocks[blockIndex]} ${Math.round(usagePercentage)}%`; 46 | } 47 | 48 | /** 49 | * Update the status bar with token usage information 50 | * @param messages The chat messages to count tokens for 51 | * @param model The language model information 52 | * @param statusBarItem The status bar item to update 53 | * @param provideTokenCount Callback function to count tokens for a message 54 | */ 55 | export async function updateContextStatusBar( 56 | messages: readonly LanguageModelChatRequestMessage[], 57 | model: LanguageModelChatInformation, 58 | statusBarItem: vscode.StatusBarItem 59 | ): Promise { 60 | // Create a single CancellationTokenSource for all token count operations 61 | const cancellationTokenSource = new CancellationTokenSource(); 62 | 63 | // Calculate tokens for all messages in parallel 64 | const tokenCountPromises = messages.map((message) => 65 | prepareTokenCount(model, message, cancellationTokenSource.token) 66 | ); 67 | 68 | const tokenCounts = await Promise.all(tokenCountPromises); 69 | const totalTokenCount = tokenCounts.reduce((sum, count) => sum + count, 0); 70 | 71 | // Update status bar with token count and model context window 72 | const maxTokens = model.maxInputTokens + model.maxOutputTokens; 73 | 74 | // Create visual progress bar with single progressive block 75 | const progressBar = createProgressBar(totalTokenCount, maxTokens); 76 | const displayText = `$(symbol-parameter) ${progressBar}`; 77 | statusBarItem.text = displayText; 78 | statusBarItem.tooltip = `Token Usage: ${formatTokenCount(totalTokenCount)} / ${formatTokenCount(maxTokens)}\n\n${progressBar}\n\nClick to Manage ApiKeys`; 79 | 80 | // Add color coding based on token usage 81 | const usagePercentage = (totalTokenCount / maxTokens) * 100; 82 | if (usagePercentage >= 90) { 83 | statusBarItem.backgroundColor = new vscode.ThemeColor("statusBarItem.errorBackground"); 84 | } else if (usagePercentage >= 70) { 85 | statusBarItem.backgroundColor = new vscode.ThemeColor("statusBarItem.warningBackground"); 86 | } else { 87 | statusBarItem.backgroundColor = undefined; 88 | } 89 | 90 | statusBarItem.show(); 91 | } 92 | -------------------------------------------------------------------------------- /src/extension.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from "vscode"; 2 | import { HuggingFaceChatModelProvider } from "./provider"; 3 | import type { HFModelItem } from "./types"; 4 | import { initStatusBar } from "./statusBar"; 5 | 6 | export function activate(context: vscode.ExtensionContext) { 7 | // Build a descriptive User-Agent to help quantify API usage 8 | const ext = vscode.extensions.getExtension("johnny-zhao.oai-compatible-copilot"); 9 | const extVersion = ext?.packageJSON?.version ?? "unknown"; 10 | const vscodeVersion = vscode.version; 11 | // Keep UA minimal: only extension version and VS Code version 12 | const ua = `oai-compatible-copilot/${extVersion} VSCode/${vscodeVersion}`; 13 | 14 | const tokenCountStatusBarItem: vscode.StatusBarItem = initStatusBar(context); 15 | const provider = new HuggingFaceChatModelProvider(context.secrets, ua, tokenCountStatusBarItem); 16 | // Register the Hugging Face provider under the vendor id used in package.json 17 | vscode.lm.registerLanguageModelChatProvider("oaicopilot", provider); 18 | 19 | // Management command to configure API key 20 | context.subscriptions.push( 21 | vscode.commands.registerCommand("oaicopilot.setApikey", async () => { 22 | const existing = await context.secrets.get("oaicopilot.apiKey"); 23 | const apiKey = await vscode.window.showInputBox({ 24 | title: "OAI Compatible Provider API Key", 25 | prompt: existing ? "Update your OAI Compatible API key" : "Enter your OAI Compatible API key", 26 | ignoreFocusOut: true, 27 | password: true, 28 | value: existing ?? "", 29 | }); 30 | if (apiKey === undefined) { 31 | return; // user canceled 32 | } 33 | if (!apiKey.trim()) { 34 | await context.secrets.delete("oaicopilot.apiKey"); 35 | vscode.window.showInformationMessage("OAI Compatible API key cleared."); 36 | return; 37 | } 38 | await context.secrets.store("oaicopilot.apiKey", apiKey.trim()); 39 | vscode.window.showInformationMessage("OAI Compatible API key saved."); 40 | }) 41 | ); 42 | 43 | // Management command to configure provider-specific API keys 44 | context.subscriptions.push( 45 | vscode.commands.registerCommand("oaicopilot.setProviderApikey", async () => { 46 | // Get provider list from configuration 47 | const config = vscode.workspace.getConfiguration(); 48 | const userModels = config.get("oaicopilot.models", []); 49 | 50 | // Extract unique providers (case-insensitive) 51 | const providers = Array.from( 52 | new Set(userModels.map((m) => m.owned_by.toLowerCase()).filter((p) => p && p.trim() !== "")) 53 | ).sort(); 54 | 55 | if (providers.length === 0) { 56 | vscode.window.showErrorMessage( 57 | "No providers found in oaicopilot.models configuration. Please configure models first." 58 | ); 59 | return; 60 | } 61 | 62 | // Let user select provider 63 | const selectedProvider = await vscode.window.showQuickPick(providers, { 64 | title: "Select Provider", 65 | placeHolder: "Select a provider to configure API key", 66 | }); 67 | 68 | if (!selectedProvider) { 69 | return; // user canceled 70 | } 71 | 72 | // Get existing API key for selected provider 73 | const providerKey = `oaicopilot.apiKey.${selectedProvider}`; 74 | const existing = await context.secrets.get(providerKey); 75 | 76 | // Prompt for API key 77 | const apiKey = await vscode.window.showInputBox({ 78 | title: `OAI Compatible API Key for ${selectedProvider}`, 79 | prompt: existing ? `Update API key for ${selectedProvider}` : `Enter API key for ${selectedProvider}`, 80 | ignoreFocusOut: true, 81 | password: true, 82 | value: existing ?? "", 83 | }); 84 | 85 | if (apiKey === undefined) { 86 | return; // user canceled 87 | } 88 | 89 | if (!apiKey.trim()) { 90 | await context.secrets.delete(providerKey); 91 | vscode.window.showInformationMessage(`API key for ${selectedProvider} cleared.`); 92 | return; 93 | } 94 | 95 | await context.secrets.store(providerKey, apiKey.trim()); 96 | vscode.window.showInformationMessage(`API key for ${selectedProvider} saved.`); 97 | }) 98 | ); 99 | } 100 | 101 | export function deactivate() {} 102 | -------------------------------------------------------------------------------- /src/vscode.proposed.languageModelThinkingPart.d.ts: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------------------------------- 2 | * Copyright (c) Microsoft Corporation. All rights reserved. 3 | * Licensed under the MIT License. See License.txt in the project root for license information. 4 | *--------------------------------------------------------------------------------------------*/ 5 | 6 | // version: 1 7 | 8 | declare module "vscode" { 9 | /** 10 | * A language model response part containing thinking/reasoning content. 11 | * Thinking tokens represent the model's internal reasoning process that 12 | * typically streams before the final response. 13 | */ 14 | export class LanguageModelThinkingPart { 15 | /** 16 | * The thinking/reasoning text content. 17 | */ 18 | value: string | string[]; 19 | 20 | /** 21 | * Optional unique identifier for this thinking sequence. 22 | * This ID is typically provided at the end of the thinking stream 23 | * and can be used for retrieval or reference purposes. 24 | */ 25 | id?: string; 26 | 27 | /** 28 | * Optional metadata associated with this thinking sequence. 29 | */ 30 | metadata?: { readonly [key: string]: any }; 31 | 32 | /** 33 | * Construct a thinking part with the given content. 34 | * @param value The thinking text content. 35 | * @param id Optional unique identifier for this thinking sequence. 36 | * @param metadata Optional metadata associated with this thinking sequence. 37 | */ 38 | constructor(value: string | string[], id?: string, metadata?: { readonly [key: string]: any }); 39 | } 40 | 41 | export interface LanguageModelChatResponse { 42 | /** 43 | * An async iterable that is a stream of text, thinking, and tool-call parts forming the overall response. 44 | * This includes {@link LanguageModelThinkingPart} which represents the model's internal reasoning process. 45 | */ 46 | stream: AsyncIterable; 47 | } 48 | 49 | export interface LanguageModelChat { 50 | sendRequest( 51 | messages: Array, 52 | options?: LanguageModelChatRequestOptions, 53 | token?: CancellationToken 54 | ): Thenable; 55 | countTokens( 56 | text: string | LanguageModelChatMessage | LanguageModelChatMessage2, 57 | token?: CancellationToken 58 | ): Thenable; 59 | } 60 | 61 | /** 62 | * Represents a message in a chat. Can assume different roles, like user or assistant. 63 | */ 64 | export class LanguageModelChatMessage2 { 65 | /** 66 | * Utility to create a new user message. 67 | * 68 | * @param content The content of the message. 69 | * @param name The optional name of a user for the message. 70 | */ 71 | static User( 72 | content: string | Array, 73 | name?: string 74 | ): LanguageModelChatMessage2; 75 | 76 | /** 77 | * Utility to create a new assistant message. 78 | * 79 | * @param content The content of the message. 80 | * @param name The optional name of a user for the message. 81 | */ 82 | static Assistant( 83 | content: string | Array, 84 | name?: string 85 | ): LanguageModelChatMessage2; 86 | 87 | /** 88 | * The role of this message. 89 | */ 90 | role: LanguageModelChatMessageRole; 91 | 92 | /** 93 | * A string or heterogeneous array of things that a message can contain as content. Some parts may be message-type 94 | * specific for some models. 95 | */ 96 | content: Array< 97 | | LanguageModelTextPart 98 | | LanguageModelToolResultPart 99 | | LanguageModelToolCallPart 100 | | LanguageModelDataPart 101 | | LanguageModelThinkingPart 102 | >; 103 | 104 | /** 105 | * The optional name of a user for this message. 106 | */ 107 | name: string | undefined; 108 | 109 | /** 110 | * Create a new user message. 111 | * 112 | * @param role The role of the message. 113 | * @param content The content of the message. 114 | * @param name The optional name of a user for the message. 115 | */ 116 | constructor( 117 | role: LanguageModelChatMessageRole, 118 | content: 119 | | string 120 | | Array< 121 | | LanguageModelTextPart 122 | | LanguageModelToolResultPart 123 | | LanguageModelToolCallPart 124 | | LanguageModelDataPart 125 | | LanguageModelThinkingPart 126 | >, 127 | name?: string 128 | ); 129 | } 130 | 131 | /** 132 | * Temporary alias for LanguageModelToolResultPart to avoid breaking changes in chat. 133 | */ 134 | export class LanguageModelToolResultPart2 extends LanguageModelToolResultPart {} 135 | 136 | /** 137 | * Temporary alias for LanguageModelToolResult to avoid breaking changes in chat. 138 | */ 139 | export class LanguageModelToolResult2 extends LanguageModelToolResult {} 140 | } 141 | -------------------------------------------------------------------------------- /.github/copilot-instructions.md: -------------------------------------------------------------------------------- 1 | # OAI Compatible Copilot - AI Agent Guidelines 2 | 3 | ## Project Overview 4 | This is a VS Code extension that integrates OpenAI-compatible inference providers into GitHub Copilot Chat. It enables users to use frontier LLMs (Qwen3 Coder, Kimi K2, DeepSeek V3.2, GLM 4.6, etc.) through any OpenAI-compatible API provider. 5 | 6 | ## Architecture Patterns 7 | 8 | ### Core Components 9 | 1. **Provider System** (`src/provider.ts`): Main entry point implementing `LanguageModelChatProvider` 10 | 2. **API Abstraction Layer** (`src/commonApi.ts`): Base class for all API implementations 11 | 3. **Specific API Implementations**: 12 | - `src/openai/openaiApi.ts` - OpenAI-compatible API 13 | - `src/ollama/ollamaApi.ts` - Ollama local API 14 | - `src/anthropic/anthropicApi.ts` - Anthropic Claude API 15 | 4. **Type System** (`src/types.ts`): Centralized type definitions for model configurations 16 | 5. **Utility Functions** (`src/utils.ts`): Shared helpers for retry logic, tool conversion, etc. 17 | 18 | ### Key Design Decisions 19 | - **Multi-provider support**: Users can configure models from multiple providers simultaneously 20 | - **Configuration IDs**: Model IDs can include `::configId` suffix for different configurations of the same model 21 | - **Retry mechanism**: Automatic retry for HTTP errors (429, 500, 502, 503, 504) with exponential backoff 22 | - **Thinking support**: Integration with VS Code's `languageModelThinkingPart` proposal for reasoning content 23 | 24 | ## Development Workflows 25 | 26 | ### Build Commands 27 | ```bash 28 | npm run compile # TypeScript compilation 29 | npm run lint # ESLint checking 30 | npm run format # Prettier formatting 31 | ``` 32 | 33 | ### Testing & Debugging 34 | - **Run Extension**: Use VS Code's "Run Extension" launch configuration 35 | - **Extension Tests**: Use "Extension Tests" launch configuration (requires `tasks: watch-tests`) 36 | - **Watch Tasks**: Two background tasks run automatically: 37 | - `npm: watch` - TypeScript compilation 38 | - `npm: watch-tests` - Test compilation 39 | 40 | ### VS Code Integration 41 | - **API Proposals**: Uses `chatProvider` and `languageModelThinkingPart` proposals 42 | - **Secret Storage**: API keys stored via `vscode.SecretStorage` 43 | - **Status Bar**: Token usage displayed in status bar (`src/statusBar.ts`) 44 | 45 | ## Code Conventions 46 | 47 | ### TypeScript Patterns 48 | - **Strict mode**: Enabled in `tsconfig.json` 49 | - **ES2024 target**: Modern JavaScript features 50 | - **Module resolution**: `Node16` module system 51 | - **Type imports**: Use `import type` for type-only imports 52 | - write code comments in English. 53 | 54 | ### Error Handling 55 | - **Retry logic**: Implement retry with `createRetryConfig()` and `executeWithRetry()` from `utils.ts` 56 | - **HTTP errors**: Retry on specific status codes (429, 500, 502, 503, 504) 57 | - **User feedback**: Show appropriate messages via `vscode.window.showInformationMessage()` 58 | 59 | ### Model Configuration 60 | - **Model items**: Defined in `HFModelItem` interface (`src/types.ts`) 61 | - **Provider-specific keys**: Support for multiple API keys via `oaicopilot.setProviderApikey` command 62 | - **Configuration inheritance**: Model-specific `baseUrl` falls back to global `oaicopilot.baseUrl` 63 | 64 | ### Message Conversion 65 | - **Role mapping**: Convert VS Code chat roles to provider-specific roles in API implementations 66 | - **Content handling**: Support for text, images (via data URLs), and tool calls 67 | - **Thinking parts**: Parse and emit `LanguageModelThinkingPart` for reasoning models 68 | 69 | ## File Organization 70 | 71 | ### Source Structure 72 | ``` 73 | src/ 74 | ├── extension.ts # Extension activation 75 | ├── provider.ts # Main provider implementation 76 | ├── commonApi.ts # Base API class 77 | ├── types.ts # Type definitions 78 | ├── utils.ts # Utility functions 79 | ├── statusBar.ts # Status bar integration 80 | ├── provideModel.ts # Model information provider 81 | ├── provideToken.ts # Token counting 82 | ├── openai/ # OpenAI-compatible API 83 | ├── ollama/ # Ollama API 84 | └── anthropic/ # Anthropic API 85 | ``` 86 | 87 | ### Configuration Files 88 | - `package.json` - Extension metadata and dependencies 89 | - `tsconfig.json` - TypeScript configuration 90 | - `eslint.config.mjs` - ESLint configuration (ES modules) 91 | - `.prettierrc` - Code formatting rules 92 | 93 | ## Integration Points 94 | 95 | ### VS Code APIs 96 | - `vscode.lm.registerLanguageModelChatProvider()` - Register chat provider 97 | - `vscode.SecretStorage` - Secure API key storage 98 | - `vscode.StatusBarItem` - Display token usage 99 | - `vscode.commands.registerCommand()` - Extension commands 100 | 101 | ### External Dependencies 102 | - **No runtime dependencies** - Extension uses VS Code APIs only 103 | - **Dev dependencies**: TypeScript, ESLint, Prettier, VS Code test utilities 104 | - **API Proposals**: Experimental VS Code APIs enabled via `enabledApiProposals` 105 | 106 | ## Common Tasks 107 | 108 | ### Adding New API Provider 109 | 1. Create new directory under `src/` (e.g., `src/newprovider/`) 110 | 2. Create API class extending `CommonApi` 111 | 3. Implement `convertMessages()` and `sendRequest()` methods 112 | 4. Add to provider instantiation logic in `provider.ts` 113 | 5. Update type definitions if needed 114 | 115 | ### Modifying Model Configuration 116 | 1. Update `HFModelItem` interface in `src/types.ts` 117 | 2. Update configuration parsing in `src/provider.ts` 118 | 3. Update API implementations to handle new fields 119 | 4. Update documentation in `README.md` 120 | 121 | ### Testing Changes 122 | 1. Run `npm run watch` in background 123 | 2. Use "Run Extension" launch configuration 124 | 3. Test in Extension Development Host window 125 | 4. Check status bar updates and error handling 126 | 127 | ## Important Notes 128 | - **API Key Management**: Users can set global or provider-specific API keys 129 | - **Model Families**: `family` field enables model-specific optimizations 130 | - **Vision Support**: Enabled via `vision: true` in model configuration 131 | - **Tool Support**: Convert VS Code tools to OpenAI function definitions 132 | - **Streaming**: Support for streaming responses with tool call buffering 133 | 134 | ## Troubleshooting 135 | - **Compilation errors**: Check TypeScript strict mode requirements 136 | - **API errors**: Verify retry logic in `utils.ts` 137 | - **Missing models**: Check `provideLanguageModelChatInformation()` in `provider.ts` 138 | - **Thinking not working**: Ensure `languageModelThinkingPart` proposal is enabled -------------------------------------------------------------------------------- /src/vscode.proposed.languageModelDataPart.d.ts: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------------------------------- 2 | * Copyright (c) Microsoft Corporation. All rights reserved. 3 | * Licensed under the MIT License. See License.txt in the project root for license information. 4 | *--------------------------------------------------------------------------------------------*/ 5 | 6 | // version: 3 7 | 8 | declare module "vscode" { 9 | export interface LanguageModelChat { 10 | sendRequest( 11 | messages: Array, 12 | options?: LanguageModelChatRequestOptions, 13 | token?: CancellationToken 14 | ): Thenable; 15 | countTokens( 16 | text: string | LanguageModelChatMessage | LanguageModelChatMessage2, 17 | token?: CancellationToken 18 | ): Thenable; 19 | } 20 | 21 | /** 22 | * Represents a message in a chat. Can assume different roles, like user or assistant. 23 | */ 24 | export class LanguageModelChatMessage2 { 25 | /** 26 | * Utility to create a new user message. 27 | * 28 | * @param content The content of the message. 29 | * @param name The optional name of a user for the message. 30 | */ 31 | static User( 32 | content: string | Array, 33 | name?: string 34 | ): LanguageModelChatMessage2; 35 | 36 | /** 37 | * Utility to create a new assistant message. 38 | * 39 | * @param content The content of the message. 40 | * @param name The optional name of a user for the message. 41 | */ 42 | static Assistant( 43 | content: string | Array, 44 | name?: string 45 | ): LanguageModelChatMessage2; 46 | 47 | /** 48 | * The role of this message. 49 | */ 50 | role: LanguageModelChatMessageRole; 51 | 52 | /** 53 | * A string or heterogeneous array of things that a message can contain as content. Some parts may be message-type 54 | * specific for some models. 55 | */ 56 | content: Array< 57 | | LanguageModelTextPart 58 | | LanguageModelToolResultPart2 59 | | LanguageModelToolCallPart 60 | | LanguageModelDataPart 61 | | LanguageModelThinkingPart 62 | >; 63 | 64 | /** 65 | * The optional name of a user for this message. 66 | */ 67 | name: string | undefined; 68 | 69 | /** 70 | * Create a new user message. 71 | * 72 | * @param role The role of the message. 73 | * @param content The content of the message. 74 | * @param name The optional name of a user for the message. 75 | */ 76 | constructor( 77 | role: LanguageModelChatMessageRole, 78 | content: 79 | | string 80 | | Array< 81 | | LanguageModelTextPart 82 | | LanguageModelToolResultPart2 83 | | LanguageModelToolCallPart 84 | | LanguageModelDataPart 85 | | LanguageModelThinkingPart 86 | >, 87 | name?: string 88 | ); 89 | } 90 | 91 | /** 92 | * A language model response part containing arbitrary data, returned from a {@link LanguageModelChatResponse}. 93 | */ 94 | export class LanguageModelDataPart { 95 | /** 96 | * Factory function to create a `LanguageModelDataPart` for an image. 97 | * @param data Binary image data 98 | * @param mimeType The MIME type of the image 99 | */ 100 | static image(data: Uint8Array, mimeType: ChatImageMimeType): LanguageModelDataPart; 101 | 102 | static json(value: object): LanguageModelDataPart; 103 | 104 | static text(value: string): LanguageModelDataPart; 105 | 106 | /** 107 | * The mime type which determines how the data property is interpreted. 108 | */ 109 | mimeType: string; 110 | 111 | /** 112 | * The data of the part. 113 | */ 114 | data: Uint8Array; 115 | 116 | /** 117 | * Construct a generic data part with the given content. 118 | * @param value The data of the part. 119 | */ 120 | constructor(data: Uint8Array, mimeType: string); 121 | } 122 | 123 | /** 124 | * Enum for supported image MIME types. 125 | */ 126 | export enum ChatImageMimeType { 127 | PNG = "image/png", 128 | JPEG = "image/jpeg", 129 | GIF = "image/gif", 130 | WEBP = "image/webp", 131 | BMP = "image/bmp", 132 | } 133 | 134 | /** 135 | * The result of a tool call. This is the counterpart of a {@link LanguageModelToolCallPart tool call} and 136 | * it can only be included in the content of a User message 137 | */ 138 | export class LanguageModelToolResultPart2 { 139 | /** 140 | * The ID of the tool call. 141 | * 142 | * *Note* that this should match the {@link LanguageModelToolCallPart.callId callId} of a tool call part. 143 | */ 144 | callId: string; 145 | 146 | /** 147 | * The value of the tool result. 148 | */ 149 | content: Array; 150 | 151 | /** 152 | * @param callId The ID of the tool call. 153 | * @param content The content of the tool result. 154 | */ 155 | constructor( 156 | callId: string, 157 | content: Array 158 | ); 159 | } 160 | 161 | /** 162 | * A tool that can be invoked by a call to a {@link LanguageModelChat}. 163 | */ 164 | export interface LanguageModelTool { 165 | /** 166 | * Invoke the tool with the given input and return a result. 167 | * 168 | * The provided {@link LanguageModelToolInvocationOptions.input} has been validated against the declared schema. 169 | */ 170 | invoke( 171 | options: LanguageModelToolInvocationOptions, 172 | token: CancellationToken 173 | ): ProviderResult; 174 | } 175 | 176 | /** 177 | * A result returned from a tool invocation. If using `@vscode/prompt-tsx`, this result may be rendered using a `ToolResult`. 178 | */ 179 | export class LanguageModelToolResult2 { 180 | /** 181 | * A list of tool result content parts. Includes `unknown` becauses this list may be extended with new content types in 182 | * the future. 183 | * @see {@link lm.invokeTool}. 184 | */ 185 | content: Array; 186 | 187 | /** 188 | * Create a LanguageModelToolResult 189 | * @param content A list of tool result content parts 190 | */ 191 | constructor(content: Array); 192 | } 193 | 194 | export namespace lm { 195 | export function invokeTool( 196 | name: string, 197 | options: LanguageModelToolInvocationOptions, 198 | token?: CancellationToken 199 | ): Thenable; 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## 0.1.8 (2025-12-17) 4 | 5 | - Feat: [Add Ollama /api/chat](https://github.com/JohnnyZ93/oai-compatible-copilot/pull/65) 6 | - Feat: [Add Anthropic /v1/messages](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/60) 7 | - Enhanced `oaicopilot.models` configuration including: 8 | - `apiMode`: API mode: 'openai' (Default) for API (/v1/chat/completions), 'ollama' for API (/api/chat), 'anthropic' for API (/v1/messages). 9 | 10 | ## 0.1.7 (2025-12-10) 11 | 12 | - Feat: [Expand oaicopilot.retry to handle other type of errors](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/62) 13 | - Fix: [Add buffer for think content](https://github.com/JohnnyZ93/oai-compatible-copilot/pull/61) 14 | - Add `oaicopilot.retry` configuration including: 15 | > Retry configuration for handling api errors like [429, 500, 502, 503, 504]. 16 | - `status_codes`: Additional HTTP status codes that will be merged. Default is [429, 500, 502, 503, 504]. 17 | 18 | ## 0.1.6 (2025-12-08) 19 | 20 | - Feat: [Сontext window state in statusBar](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/59) 21 | 22 | ## 0.1.5 (2025-12-05) 23 | 24 | - Fix: [Deepseek v3.2 reasoning tool call failed](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/54) 25 | - Enhanced `oaicopilot.models` configuration including: 26 | - `include_reasoning_in_request`: Whether to include reasoning_content in assistant messages sent to the API. Support deepseek-v3.2 or others. 27 | 28 | ## 0.1.4 (2025-11-03) 29 | 30 | - Feat: [Add headers support](https://github.com/JohnnyZ93/oai-compatible-copilot/pull/31) 31 | - Feat: [Add displayName option for models in Copilot interface](https://github.com/JohnnyZ93/oai-compatible-copilot/pull/32) 32 | - Enhanced `oaicopilot.models` configuration including: 33 | - `displayName`: Display name for the model that will be shown in the Copilot interface. 34 | - `headers`: Custom HTTP headers to be sent with every request to this model's provider (e.g., `{"X-API-Version": "v1", "X-Custom-Header": "value"}`). 35 | 36 | ## 0.1.3 (2025-10-31) 37 | 38 | - Fix: [Forces a prompt to set the default API key every time VS Code starts](https://github.com/JohnnyZ93/oai-compatible-copilot/pull/30) 39 | 40 | ## 0.1.2 (2025-10-29) 41 | 42 | - Feat: [add support for extra configuration parameters](https://github.com/JohnnyZ93/oai-compatible-copilot/pull/28) 43 | - Enhanced `oaicopilot.models` configuration including: 44 | - `extra`: Extra request parameters that will be used in /chat/completions. 45 | 46 | ## 0.1.1 (2025-10-28) 47 | 48 | - Fix: Cannot change apiKey when the `oaicopilot.models` have no baseUrl. 49 | 50 | ## 0.1.0 (2025-10-28) 51 | 52 | - Feat: [Add request delay to prevent 429 Errors](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/24) 53 | - Fix: [Not Asking for Key when add new provider](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/26) 54 | - Add `oaicopilot.delay` configuration: Fixed delay in milliseconds between consecutive requests. Default is 0 (no delay). 55 | 56 | ## 0.0.9 (2025-10-27) 57 | 58 | - Feat: [Add Retry Mechanism for Model 429 Errors](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/24) 59 | - Fix: [Thinking block not end and show in new chat](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/25) 60 | - Add `oaicopilot.retry` configuration including: 61 | > Retry configuration for handling api errors like [429, 500, 502, 503, 504]. 62 | - `enabled`: Enable retry mechanism for api errors. Default is true. 63 | - `max_attempts`: Maximum number of retry attempts. Default is 3. 64 | - `interval_ms`: Interval between retry attempts in milliseconds. Default is 1000 (1 seconds). 65 | 66 | ## 0.0.8 (2025-10-21) 67 | 68 | - Fix: [LLM output missing `<`](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/19) 69 | - Remove inline tool call response processing, significantly accelerating model response speed. 70 | 71 | ## 0.0.7 (2025-10-15) 72 | 73 | - Feat: [`` block is not detected properly for Perplexity Sonar models](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/21) 74 | - Update VS Code proposed api version. 75 | 76 | ## 0.0.6 (2025-10-10) 77 | 78 | - Feat: [OpenAI use `max_completion_tokens` instead of `max_tokens` for GPT-5](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/19) 79 | - Enhanced `oaicopilot.models` configuration including: 80 | - `max_completion_tokens`: Maximum number of tokens to generate (OpenAI new standard parameter) 81 | - `reasoning_effort`: Reasoning effort level (OpenAI reasoning configuration) 82 | 83 | 84 | ## 0.0.5 (2025-10-09) 85 | 86 | - Feat: [GLM 4.6 - no thinking tags](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/15) 87 | - Feat: [Multi-config for the same model](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/18) 88 | - Enhanced `oaicopilot.models` configuration including: 89 | - `configId`: Configuration ID for this model. Allows defining the same model with different settings (e.g. 'glm-4.6::thinking', 'glm-4.6::no-thinking') 90 | - `thinking`: Thinking configuration for Zai provider 91 | - `type`: Set to 'enabled' to enable thinking, 'disabled' to disable thinking 92 | 93 | ## 0.0.4 (2025-09-23) 94 | 95 | - Fix: [Base url should be model specific](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/4) 96 | - Fix: [Set the effort variable of the reasoning model](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/5) 97 | - Fix: [Allow setting a custom model 'family'](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/8) 98 | 99 | ## 0.0.3 (2025-09-18) 100 | 101 | - Now you can see the model reasoning content in chat interface. 102 | > ![thinkingPartDemo](./assets/thinkingPartDemo.png) 103 | - Fix: [Thinking Budget #2](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/2) 104 | - Fix: [iflow api key no response was returned #1](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/1) 105 | 106 | ## 0.0.2 (2025-09-18) 107 | 108 | - Deleted settings including: 109 | - `oaicopilot.enableThinking` 110 | - `oaicopilot.maxTokens` 111 | - `oaicopilot.temperature` 112 | - `oaicopilot.topP` 113 | - Enhanced `oaicopilot.models` configuration with support for per-model settings including: 114 | - `max_tokens`: Maximum number of tokens to generate 115 | - `enable_thinking`: Switches between thinking and non-thinking modes 116 | - `temperature`: Sampling temperature (range: [0, 2]) 117 | - `top_p`: Top-p sampling value (range: (0, 1]) 118 | - `top_k`: Top-k sampling value 119 | - `min_p`: Minimum probability threshold 120 | - `frequency_penalty`: Frequency penalty (range: [-2, 2]) 121 | - `presence_penalty`: Presence penalty (range: [-2, 2]) 122 | - `repetition_penalty`: Repetition penalty (range: (0, 2]) 123 | - Improved token estimation algorithm with better support for Chinese characters 124 | - Enhanced multi-modal message handling for image and text content 125 | 126 | ## 0.0.1 (2025-09-16) 127 | 128 | - Initial release -------------------------------------------------------------------------------- /src/provideModel.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from "vscode"; 2 | import { CancellationToken, LanguageModelChatInformation } from "vscode"; 3 | 4 | import type { HFModelItem, HFModelsResponse } from "./types"; 5 | 6 | const DEFAULT_CONTEXT_LENGTH = 128000; 7 | const DEFAULT_MAX_TOKENS = 4096; 8 | 9 | /** 10 | * Get the list of available language models contributed by this provider 11 | * @param options Options which specify the calling context of this function 12 | * @param token A cancellation token which signals if the user cancelled the request or not 13 | * @returns A promise that resolves to the list of available language models 14 | */ 15 | export async function prepareLanguageModelChatInformation( 16 | options: { silent: boolean }, 17 | _token: CancellationToken, 18 | secrets: vscode.SecretStorage, 19 | userAgent: string 20 | ): Promise { 21 | // Check for user-configured models first 22 | const config = vscode.workspace.getConfiguration(); 23 | const userModels = config.get("oaicopilot.models", []); 24 | 25 | let infos: LanguageModelChatInformation[]; 26 | if (userModels && userModels.length > 0) { 27 | // Return user-provided models directly 28 | infos = userModels.map((m) => { 29 | const contextLen = m?.context_length ?? DEFAULT_CONTEXT_LENGTH; 30 | const maxOutput = m?.max_completion_tokens ?? m?.max_tokens ?? DEFAULT_MAX_TOKENS; 31 | const maxInput = Math.max(1, contextLen - maxOutput); 32 | 33 | // 使用配置ID(如果存在)来生成唯一的模型ID 34 | const modelId = m.configId ? `${m.id}::${m.configId}` : m.id; 35 | const modelName = 36 | m.displayName || (m.configId ? `${m.id}::${m.configId} via ${m.owned_by}` : `${m.id} via ${m.owned_by}`); 37 | 38 | return { 39 | id: modelId, 40 | name: modelName, 41 | tooltip: m.configId 42 | ? `OAI Compatible ${m.id} (config: ${m.configId}) via ${m.owned_by}` 43 | : `OAI Compatible via ${m.owned_by}`, 44 | family: m.family ?? "oai-compatible", 45 | version: "1.0.0", 46 | maxInputTokens: maxInput, 47 | maxOutputTokens: maxOutput, 48 | capabilities: { 49 | toolCalling: true, 50 | imageInput: m?.vision ?? false, 51 | }, 52 | } satisfies LanguageModelChatInformation; 53 | }); 54 | } else { 55 | // Fallback: Fetch models from API 56 | const apiKey = await ensureApiKey(options.silent, secrets); 57 | if (!apiKey) { 58 | if (options.silent) { 59 | return []; 60 | } else { 61 | throw new Error("OAI Compatible API key not found"); 62 | } 63 | } 64 | const { models } = await fetchModels(apiKey, userAgent); 65 | 66 | infos = models.flatMap((m) => { 67 | const providers = m?.providers ?? []; 68 | const modalities = m.architecture?.input_modalities ?? []; 69 | const vision = Array.isArray(modalities) && modalities.includes("image"); 70 | 71 | // Build entries for all providers that support tool calling 72 | const toolProviders = providers.filter((p) => p.supports_tools === true); 73 | const entries: LanguageModelChatInformation[] = []; 74 | 75 | for (const p of toolProviders) { 76 | const contextLen = p?.context_length ?? DEFAULT_CONTEXT_LENGTH; 77 | const maxOutput = DEFAULT_MAX_TOKENS; 78 | const maxInput = Math.max(1, contextLen - maxOutput); 79 | entries.push({ 80 | id: `${m.id}:${p.provider}`, 81 | name: `${m.id} via ${p.provider}`, 82 | tooltip: `OAI Compatible via ${p.provider}`, 83 | family: m.family ?? "oai-compatible", 84 | version: "1.0.0", 85 | maxInputTokens: maxInput, 86 | maxOutputTokens: maxOutput, 87 | capabilities: { 88 | toolCalling: true, 89 | imageInput: vision, 90 | }, 91 | } satisfies LanguageModelChatInformation); 92 | } 93 | 94 | if (entries.length === 0) { 95 | const base = providers.length > 0 ? providers[0] : null; 96 | const contextLen = base?.context_length ?? DEFAULT_CONTEXT_LENGTH; 97 | const maxOutput = DEFAULT_MAX_TOKENS; 98 | const maxInput = Math.max(1, contextLen - maxOutput); 99 | entries.push({ 100 | id: `${m.id}`, 101 | name: `${m.id} via OAI Compatible`, 102 | tooltip: "OAI Compatible", 103 | family: m.family ?? "oai-compatible", 104 | version: "1.0.0", 105 | maxInputTokens: maxInput, 106 | maxOutputTokens: maxOutput, 107 | capabilities: { 108 | toolCalling: true, 109 | imageInput: true, 110 | }, 111 | } satisfies LanguageModelChatInformation); 112 | } 113 | 114 | return entries; 115 | }); 116 | } 117 | 118 | // console.debug("[OAI Compatible Model Provider] Loaded models:", infos); 119 | return infos; 120 | } 121 | 122 | /** 123 | * Fetch the list of models and supplementary metadata from Hugging Face. 124 | * @param apiKey The HF API key used to authenticate. 125 | */ 126 | async function fetchModels(apiKey: string, userAgent: string): Promise<{ models: HFModelItem[] }> { 127 | const config = vscode.workspace.getConfiguration(); 128 | const BASE_URL = config.get("oaicopilot.baseUrl", ""); 129 | if (!BASE_URL || !BASE_URL.startsWith("http")) { 130 | throw new Error(`Invalid base URL configuration.`); 131 | } 132 | const modelsList = (async () => { 133 | const resp = await fetch(`${BASE_URL.replace(/\/+$/, "")}/models`, { 134 | method: "GET", 135 | headers: { Authorization: `Bearer ${apiKey}`, "User-Agent": userAgent }, 136 | }); 137 | if (!resp.ok) { 138 | let text = ""; 139 | try { 140 | text = await resp.text(); 141 | } catch (error) { 142 | console.error("[OAI Compatible Model Provider] Failed to read response text", error); 143 | } 144 | const err = new Error( 145 | `Failed to fetch OAI Compatible models: ${resp.status} ${resp.statusText}${text ? `\n${text}` : ""}` 146 | ); 147 | console.error("[OAI Compatible Model Provider] Failed to fetch OAI Compatible models", err); 148 | throw err; 149 | } 150 | const parsed = (await resp.json()) as HFModelsResponse; 151 | return parsed.data ?? []; 152 | })(); 153 | 154 | try { 155 | const models = await modelsList; 156 | return { models }; 157 | } catch (err) { 158 | console.error("[OAI Compatible Model Provider] Failed to fetch OAI Compatible models", err); 159 | throw err; 160 | } 161 | } 162 | 163 | /** 164 | * Ensure an API key exists in SecretStorage, optionally prompting the user when not silent. 165 | * @param silent If true, do not prompt the user. 166 | * @param secrets vscode.SecretStorage 167 | */ 168 | async function ensureApiKey(silent: boolean, secrets: vscode.SecretStorage): Promise { 169 | // Fall back to generic API key 170 | let apiKey = await secrets.get("oaicopilot.apiKey"); 171 | 172 | if (!apiKey && !silent) { 173 | const entered = await vscode.window.showInputBox({ 174 | title: "OAI Compatible API Key", 175 | prompt: "Enter your OAI Compatible API key", 176 | ignoreFocusOut: true, 177 | password: true, 178 | }); 179 | if (entered && entered.trim()) { 180 | apiKey = entered.trim(); 181 | await secrets.store("oaicopilot.apiKey", apiKey); 182 | } 183 | } 184 | return apiKey; 185 | } 186 | -------------------------------------------------------------------------------- /src/utils.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from "vscode"; 2 | import type { RetryConfig } from "./types"; 3 | import { OpenAIFunctionToolDef } from "./openai/openaiTypes"; 4 | 5 | const RETRY_MAX_ATTEMPTS = 3; 6 | const RETRY_INTERVAL_MS = 1000; 7 | 8 | // HTTP status codes that should trigger a retry 9 | const RETRYABLE_STATUS_CODES = [429, 500, 502, 503, 504]; 10 | 11 | // Model ID parsing helper 12 | export interface ParsedModelId { 13 | baseId: string; 14 | configId?: string; 15 | } 16 | 17 | /** 18 | * Parse a model ID that may contain a configuration ID separator. 19 | * Format: "baseId::configId" or just "baseId" 20 | */ 21 | export function parseModelId(modelId: string): ParsedModelId { 22 | const parts = modelId.split("::"); 23 | if (parts.length >= 2) { 24 | return { 25 | baseId: parts[0], 26 | configId: parts.slice(1).join("::"), // In case configId itself contains '::' 27 | }; 28 | } 29 | return { 30 | baseId: modelId, 31 | }; 32 | } 33 | 34 | /** 35 | * Map VS Code message role to OpenAI message role string. 36 | * @param message The message whose role is mapped. 37 | */ 38 | export function mapRole(message: vscode.LanguageModelChatRequestMessage): "user" | "assistant" | "system" { 39 | const USER = vscode.LanguageModelChatMessageRole.User as unknown as number; 40 | const ASSISTANT = vscode.LanguageModelChatMessageRole.Assistant as unknown as number; 41 | const r = message.role as unknown as number; 42 | if (r === USER) { 43 | return "user"; 44 | } 45 | if (r === ASSISTANT) { 46 | return "assistant"; 47 | } 48 | return "system"; 49 | } 50 | 51 | /** 52 | * Convert VS Code tool definitions to OpenAI function tool definitions. 53 | * @param options Request options containing tools and toolMode. 54 | */ 55 | export function convertToolsToOpenAI(options: vscode.ProvideLanguageModelChatResponseOptions): { 56 | tools?: OpenAIFunctionToolDef[]; 57 | tool_choice?: "auto" | { type: "function"; function: { name: string } }; 58 | } { 59 | const tools = options.tools ?? []; 60 | if (!tools || tools.length === 0) { 61 | return {}; 62 | } 63 | 64 | const toolDefs: OpenAIFunctionToolDef[] = tools 65 | .filter((t) => t && typeof t === "object") 66 | .map((t) => { 67 | const name = t.name; 68 | const description = typeof t.description === "string" ? t.description : ""; 69 | const params = t.inputSchema ?? { type: "object", properties: {} }; 70 | return { 71 | type: "function" as const, 72 | function: { 73 | name, 74 | description, 75 | parameters: params, 76 | }, 77 | } satisfies OpenAIFunctionToolDef; 78 | }); 79 | 80 | let tool_choice: "auto" | { type: "function"; function: { name: string } } = "auto"; 81 | if (options.toolMode === vscode.LanguageModelChatToolMode.Required) { 82 | if (tools.length !== 1) { 83 | console.error("[OAI Compatible Model Provider] ToolMode.Required but multiple tools:", tools.length); 84 | throw new Error("LanguageModelChatToolMode.Required is not supported with more than one tool"); 85 | } 86 | tool_choice = { type: "function", function: { name: tools[0].name } }; 87 | } 88 | 89 | return { tools: toolDefs, tool_choice }; 90 | } 91 | 92 | /** 93 | * 检查是否为图片MIME类型 94 | */ 95 | export function isImageMimeType(mimeType: string): boolean { 96 | return mimeType.startsWith("image/") && ["image/jpeg", "image/png", "image/gif", "image/webp"].includes(mimeType); 97 | } 98 | 99 | /** 100 | * 创建图片的data URL 101 | */ 102 | export function createDataUrl(dataPart: vscode.LanguageModelDataPart): string { 103 | const base64Data = Buffer.from(dataPart.data).toString("base64"); 104 | return `data:${dataPart.mimeType};base64,${base64Data}`; 105 | } 106 | 107 | /** 108 | * Type guard for LanguageModelToolResultPart-like values. 109 | * @param value Unknown value to test. 110 | */ 111 | export function isToolResultPart(value: unknown): value is { callId: string; content?: ReadonlyArray } { 112 | if (!value || typeof value !== "object") { 113 | return false; 114 | } 115 | const obj = value as Record; 116 | const hasCallId = typeof obj.callId === "string"; 117 | const hasContent = "content" in obj; 118 | return hasCallId && hasContent; 119 | } 120 | 121 | /** 122 | * Concatenate tool result content into a single text string. 123 | * @param pr Tool result-like object with content array. 124 | */ 125 | export function collectToolResultText(pr: { content?: ReadonlyArray }): string { 126 | let text = ""; 127 | for (const c of pr.content ?? []) { 128 | if (c instanceof vscode.LanguageModelTextPart) { 129 | text += c.value; 130 | } else if (typeof c === "string") { 131 | text += c; 132 | } else if (c instanceof vscode.LanguageModelDataPart && c.mimeType === "cache_control") { 133 | /* ignore */ 134 | } else { 135 | try { 136 | text += JSON.stringify(c); 137 | } catch { 138 | /* ignore */ 139 | } 140 | } 141 | } 142 | return text; 143 | } 144 | 145 | /** 146 | * Try to parse a JSON object from a string. 147 | * @param text The input string. 148 | * @returns Parsed object or ok:false. 149 | */ 150 | export function tryParseJSONObject(text: string): { ok: true; value: Record } | { ok: false } { 151 | try { 152 | if (!text || !/[{]/.test(text)) { 153 | return { ok: false }; 154 | } 155 | const value = JSON.parse(text); 156 | if (value && typeof value === "object" && !Array.isArray(value)) { 157 | return { ok: true, value }; 158 | } 159 | return { ok: false }; 160 | } catch { 161 | return { ok: false }; 162 | } 163 | } 164 | 165 | /** 166 | * Create retry configuration from VS Code workspace settings. 167 | * @returns Retry configuration with default values. 168 | */ 169 | export function createRetryConfig(): RetryConfig { 170 | const config = vscode.workspace.getConfiguration(); 171 | const retryConfig = config.get("oaicopilot.retry", { 172 | enabled: true, 173 | max_attempts: RETRY_MAX_ATTEMPTS, 174 | interval_ms: RETRY_INTERVAL_MS, 175 | }); 176 | 177 | return { 178 | enabled: retryConfig.enabled ?? true, 179 | max_attempts: retryConfig.max_attempts ?? RETRY_MAX_ATTEMPTS, 180 | interval_ms: retryConfig.interval_ms ?? RETRY_INTERVAL_MS, 181 | status_codes: retryConfig.status_codes, 182 | }; 183 | } 184 | 185 | /** 186 | * Execute a function with retry logic for rate limiting. 187 | * @param fn The async function to execute 188 | * @param retryConfig Retry configuration 189 | * @param token Cancellation token 190 | * @returns Result of the function execution 191 | */ 192 | export async function executeWithRetry(fn: () => Promise, retryConfig: RetryConfig): Promise { 193 | if (!retryConfig.enabled) { 194 | return await fn(); 195 | } 196 | 197 | const maxAttempts = retryConfig.max_attempts ?? RETRY_MAX_ATTEMPTS; 198 | const intervalMs = retryConfig.interval_ms ?? RETRY_INTERVAL_MS; 199 | // Merge user-configured status codes with default ones, removing duplicates 200 | const retryableStatusCodes = retryConfig.status_codes 201 | ? [...new Set([...RETRYABLE_STATUS_CODES, ...retryConfig.status_codes])] 202 | : RETRYABLE_STATUS_CODES; 203 | let lastError: Error | undefined; 204 | 205 | for (let attempt = 0; attempt <= maxAttempts; attempt++) { 206 | try { 207 | return await fn(); 208 | } catch (error) { 209 | lastError = error instanceof Error ? error : new Error(String(error)); 210 | 211 | // Check if error is retryable based on status codes 212 | const isRetryableError = retryableStatusCodes.some((code) => lastError?.message.includes(`[${code}]`)); 213 | 214 | if (!isRetryableError || attempt === maxAttempts) { 215 | throw lastError; 216 | } 217 | 218 | console.error( 219 | `[OAI Compatible Model Provider] Retryable error detected, retrying in ${intervalMs}ms (attempt ${attempt + 1}/${maxAttempts})` 220 | ); 221 | 222 | // Wait for the specified interval before retrying 223 | await new Promise((resolve) => setTimeout(resolve, intervalMs)); 224 | } 225 | } 226 | 227 | // This should never be reached, but TypeScript needs it 228 | throw lastError || new Error("Retry failed"); 229 | } 230 | -------------------------------------------------------------------------------- /src/ollama/ollamaApi.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from "vscode"; 2 | import { 3 | CancellationToken, 4 | LanguageModelChatRequestMessage, 5 | ProvideLanguageModelChatResponseOptions, 6 | LanguageModelResponsePart2, 7 | Progress, 8 | } from "vscode"; 9 | 10 | import type { HFModelItem } from "../types"; 11 | 12 | import type { OllamaMessage, OllamaRequestBody, OllamaStreamChunk, OllamaToolCall } from "./ollamaTypes"; 13 | 14 | import { isToolResultPart, collectToolResultText, convertToolsToOpenAI, mapRole } from "../utils"; 15 | 16 | import { CommonApi } from "../commonApi"; 17 | 18 | export class OllamaApi extends CommonApi { 19 | constructor() { 20 | super(); 21 | } 22 | 23 | /** 24 | * Convert VS Code chat messages to Ollama native message format. 25 | * @param messages The VS Code chat messages to convert. 26 | * @returns Ollama-compatible messages array. 27 | */ 28 | convertMessages( 29 | messages: readonly LanguageModelChatRequestMessage[], 30 | _modelConfig: { includeReasoningInRequest: boolean } 31 | ): OllamaMessage[] { 32 | const out: OllamaMessage[] = []; 33 | 34 | for (const m of messages) { 35 | const role = mapRole(m); 36 | const textParts: string[] = []; 37 | const imageParts: string[] = []; 38 | let thinkingContent = ""; 39 | const toolCalls: OllamaToolCall[] = []; 40 | const toolResults: { toolName: string; content: string }[] = []; 41 | 42 | for (const part of m.content ?? []) { 43 | if (part instanceof vscode.LanguageModelTextPart) { 44 | textParts.push(part.value); 45 | } else if (part instanceof vscode.LanguageModelDataPart) { 46 | // Convert image data to base64 for Ollama 47 | if (part.mimeType.startsWith("image/")) { 48 | const base64Data = Buffer.from(part.data).toString("base64"); 49 | imageParts.push(base64Data); 50 | } 51 | } else if (part instanceof vscode.LanguageModelThinkingPart) { 52 | // Capture thinking content 53 | const content = Array.isArray(part.value) ? part.value.join("") : part.value; 54 | thinkingContent += content; 55 | } else if (part instanceof vscode.LanguageModelToolCallPart) { 56 | // Capture tool calls from assistant 57 | toolCalls.push({ 58 | function: { 59 | name: part.name, 60 | arguments: (part.input as Record) ?? {}, 61 | }, 62 | }); 63 | } else if (isToolResultPart(part)) { 64 | // Capture tool results 65 | const content = collectToolResultText(part); 66 | const toolName = (part as { toolName?: string }).toolName ?? "unknown"; 67 | toolResults.push({ toolName, content }); 68 | } 69 | } 70 | 71 | // Handle tool results as separate "tool" role messages 72 | for (const tr of toolResults) { 73 | out.push({ 74 | role: "tool", 75 | content: tr.content, 76 | tool_name: tr.toolName, 77 | }); 78 | } 79 | 80 | // Handle regular messages 81 | if (textParts.length > 0 || imageParts.length > 0 || toolCalls.length > 0) { 82 | const content = textParts.join("\n"); 83 | 84 | const ollamaMessage: OllamaMessage = { 85 | role, 86 | content, 87 | }; 88 | 89 | if (imageParts.length > 0) { 90 | ollamaMessage.images = imageParts; 91 | } 92 | 93 | if (thinkingContent && role === "assistant") { 94 | ollamaMessage.thinking = thinkingContent; 95 | } 96 | 97 | if (toolCalls.length > 0 && role === "assistant") { 98 | ollamaMessage.tool_calls = toolCalls; 99 | } 100 | 101 | out.push(ollamaMessage); 102 | } 103 | } 104 | 105 | return out; 106 | } 107 | 108 | prepareRequestBody( 109 | rb: OllamaRequestBody, 110 | um: HFModelItem | undefined, 111 | options: ProvideLanguageModelChatResponseOptions 112 | ): OllamaRequestBody { 113 | // Add model options if configured 114 | if ( 115 | um?.temperature !== undefined || 116 | um?.top_p !== undefined || 117 | um?.top_k !== undefined || 118 | um?.max_tokens !== undefined 119 | ) { 120 | rb.options = {}; 121 | if (um.temperature !== undefined && um.temperature !== null) { 122 | rb.options.temperature = um.temperature; 123 | } 124 | if (um.top_p !== undefined && um.top_p !== null) { 125 | rb.options.top_p = um.top_p; 126 | } 127 | if (um.top_k !== undefined) { 128 | rb.options.top_k = um.top_k; 129 | } 130 | if (um.max_tokens !== undefined) { 131 | rb.options.num_predict = um.max_tokens; 132 | } 133 | } 134 | 135 | // Add tools if provided 136 | const toolConfig = convertToolsToOpenAI(options); 137 | if (toolConfig.tools) { 138 | rb.tools = toolConfig.tools; 139 | } 140 | 141 | // Process extra configuration parameters 142 | if (um?.extra && typeof um.extra === "object") { 143 | // Add all extra parameters directly to the request body 144 | for (const [key, value] of Object.entries(um.extra)) { 145 | if (value !== undefined) { 146 | (rb as unknown as Record)[key] = value; 147 | } 148 | } 149 | } 150 | 151 | return rb; 152 | } 153 | 154 | /** 155 | * Process Ollama native API streaming response (JSON lines format). 156 | * @param responseBody The readable stream body. 157 | * @param progress Progress reporter for streamed parts. 158 | * @param token Cancellation token. 159 | */ 160 | async processStreamingResponse( 161 | responseBody: ReadableStream, 162 | progress: Progress, 163 | token: CancellationToken 164 | ): Promise { 165 | const reader = responseBody.getReader(); 166 | const decoder = new TextDecoder(); 167 | let buffer = ""; 168 | 169 | try { 170 | while (true) { 171 | if (token.isCancellationRequested) { 172 | break; 173 | } 174 | 175 | const { done, value } = await reader.read(); 176 | if (done) { 177 | break; 178 | } 179 | 180 | buffer += decoder.decode(value, { stream: true }); 181 | const lines = buffer.split("\n"); 182 | buffer = lines.pop() || ""; 183 | 184 | for (const line of lines) { 185 | if (!line.trim()) { 186 | continue; 187 | } 188 | 189 | try { 190 | const chunk: OllamaStreamChunk = JSON.parse(line); 191 | // console.debug("[OAI Compatible Model Provider] data:", JSON.stringify(chunk)); 192 | 193 | await this.processOllamaDelta(chunk, progress); 194 | 195 | // Check if this is the final chunk 196 | if (chunk.done) { 197 | // End any active thinking sequence 198 | this.reportEndThinking(progress); 199 | } 200 | } catch { 201 | // Silently ignore malformed JSON lines 202 | } 203 | } 204 | } 205 | } finally { 206 | reader.releaseLock(); 207 | // End any active thinking sequence 208 | this.reportEndThinking(progress); 209 | } 210 | } 211 | 212 | /** 213 | * Process a single Ollama streaming chunk. 214 | * @param chunk Parsed Ollama stream chunk. 215 | * @param progress Progress reporter for parts. 216 | */ 217 | private async processOllamaDelta( 218 | chunk: OllamaStreamChunk, 219 | progress: Progress 220 | ): Promise { 221 | const message = chunk.message; 222 | if (!message) { 223 | return; 224 | } 225 | 226 | // Process thinking content first 227 | if (message.thinking) { 228 | // Buffer and emit thinking content 229 | this.bufferThinkingContent(message.thinking, progress); 230 | } 231 | 232 | // Process tool calls 233 | if (message.tool_calls && message.tool_calls.length > 0) { 234 | // End thinking if active 235 | this.reportEndThinking(progress); 236 | 237 | for (const tc of message.tool_calls) { 238 | const id = `ollama_tc_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; 239 | progress.report(new vscode.LanguageModelToolCallPart(id, tc.function.name, tc.function.arguments)); 240 | } 241 | } 242 | 243 | // Process regular content 244 | if (message.content) { 245 | // If we have thinking content and now receiving regular content, end thinking first 246 | this.reportEndThinking(progress); 247 | 248 | // Emit text content 249 | progress.report(new vscode.LanguageModelTextPart(message.content)); 250 | } 251 | } 252 | } 253 | -------------------------------------------------------------------------------- /src/commonApi.ts: -------------------------------------------------------------------------------- 1 | import { 2 | ProvideLanguageModelChatResponseOptions, 3 | LanguageModelChatRequestMessage, 4 | LanguageModelToolCallPart, 5 | LanguageModelResponsePart2, 6 | LanguageModelThinkingPart, 7 | Progress, 8 | CancellationToken, 9 | } from "vscode"; 10 | 11 | import type { OllamaMessage, OllamaRequestBody } from "./ollama/ollamaTypes"; 12 | 13 | import type { OpenAIChatMessage } from "./openai/openaiTypes"; 14 | import type { AnthropicMessage, AnthropicRequestBody } from "./anthropic/anthropicTypes"; 15 | import { HFModelItem } from "./types"; 16 | import { tryParseJSONObject } from "./utils"; 17 | 18 | export abstract class CommonApi { 19 | /** Buffer for assembling streamed tool calls by index. */ 20 | protected _toolCallBuffers: Map = new Map< 21 | number, 22 | { id?: string; name?: string; args: string } 23 | >(); 24 | 25 | /** Indices for which a tool call has been fully emitted. */ 26 | protected _completedToolCallIndices = new Set(); 27 | 28 | /** Track if we emitted any assistant text before seeing tool calls (SSE-like begin-tool-calls hint). */ 29 | protected _hasEmittedAssistantText = false; 30 | 31 | /** Track if we emitted the begin-tool-calls whitespace flush. */ 32 | protected _emittedBeginToolCallsHint = false; 33 | 34 | // XML think block parsing state 35 | protected _xmlThinkActive = false; 36 | protected _xmlThinkDetectionAttempted = false; 37 | 38 | // Thinking content state management 39 | protected _currentThinkingId: string | null = null; 40 | 41 | /** Buffer for accumulating thinking content before emitting. */ 42 | protected _thinkingBuffer = ""; 43 | 44 | /** Timer for delayed flushing of thinking buffer. */ 45 | protected _thinkingFlushTimer: NodeJS.Timeout | null = null; 46 | 47 | constructor() {} 48 | 49 | /** 50 | * Convert VS Code chat messages to specific api message format. 51 | * @param messages The VS Code chat messages to convert. 52 | * @param modelConfig Config for special model. 53 | * @returns Specific api messages array. 54 | */ 55 | abstract convertMessages( 56 | messages: readonly LanguageModelChatRequestMessage[], 57 | modelConfig: { includeReasoningInRequest: boolean } 58 | ): Array; 59 | 60 | /** 61 | * Construct request body for Specific api 62 | * @param rb Specific api Request body 63 | * @param um Current Model Info 64 | * @param options From VS Code 65 | */ 66 | abstract prepareRequestBody( 67 | rb: Record | OllamaRequestBody | AnthropicRequestBody, 68 | um: HFModelItem | undefined, 69 | options: ProvideLanguageModelChatResponseOptions 70 | ): Record | OllamaRequestBody | AnthropicRequestBody; 71 | 72 | /** 73 | * Process specific api streaming response (JSON lines format). 74 | * @param responseBody The readable stream body. 75 | * @param progress Progress reporter for streamed parts. 76 | * @param token Cancellation token. 77 | */ 78 | abstract processStreamingResponse( 79 | responseBody: ReadableStream, 80 | progress: Progress, 81 | token: CancellationToken 82 | ): Promise; 83 | 84 | /** 85 | * Try to emit a buffered tool call when a valid name and JSON arguments are available. 86 | * @param index The tool call index from the stream. 87 | * @param progress Progress reporter for parts. 88 | */ 89 | protected async tryEmitBufferedToolCall( 90 | index: number, 91 | progress: Progress 92 | ): Promise { 93 | const buf = this._toolCallBuffers.get(index); 94 | if (!buf) { 95 | return; 96 | } 97 | if (!buf.name) { 98 | return; 99 | } 100 | const canParse = tryParseJSONObject(buf.args); 101 | if (!canParse.ok) { 102 | return; 103 | } 104 | const id = buf.id ?? `call_${Math.random().toString(36).slice(2, 10)}`; 105 | const parameters = canParse.value; 106 | progress.report(new LanguageModelToolCallPart(id, buf.name, parameters)); 107 | this._toolCallBuffers.delete(index); 108 | this._completedToolCallIndices.add(index); 109 | } 110 | 111 | /** 112 | * Flush all buffered tool calls, optionally throwing if arguments are not valid JSON. 113 | * @param progress Progress reporter for parts. 114 | * @param throwOnInvalid If true, throw when a tool call has invalid JSON args. 115 | */ 116 | protected async flushToolCallBuffers( 117 | progress: Progress, 118 | throwOnInvalid: boolean 119 | ): Promise { 120 | if (this._toolCallBuffers.size === 0) { 121 | return; 122 | } 123 | for (const [idx, buf] of Array.from(this._toolCallBuffers.entries())) { 124 | const parsed = tryParseJSONObject(buf.args); 125 | if (!parsed.ok) { 126 | if (throwOnInvalid) { 127 | console.error("[OAI Compatible Model Provider] Invalid JSON for tool call", { 128 | idx, 129 | snippet: (buf.args || "").slice(0, 200), 130 | }); 131 | throw new Error("Invalid JSON for tool call"); 132 | } 133 | // When not throwing (e.g. on [DONE]), drop silently to reduce noise 134 | continue; 135 | } 136 | const id = buf.id ?? `call_${Math.random().toString(36).slice(2, 10)}`; 137 | const name = buf.name ?? "unknown_tool"; 138 | progress.report(new LanguageModelToolCallPart(id, name, parsed.value)); 139 | this._toolCallBuffers.delete(idx); 140 | this._completedToolCallIndices.add(idx); 141 | } 142 | } 143 | 144 | /** 145 | * Report to VS Code for ending thinking 146 | * @param progress Progress reporter for parts 147 | */ 148 | protected reportEndThinking(progress: Progress) { 149 | if (!this._currentThinkingId) { 150 | return; 151 | } 152 | // Always clean up state after attempting to end the thinking sequence 153 | try { 154 | this.flushThinkingBuffer(progress); 155 | // End the current thinking sequence with empty content and same ID 156 | progress.report(new LanguageModelThinkingPart("", this._currentThinkingId)); 157 | } catch (e) { 158 | console.error("[OAI Compatible Model Provider] Failed to end thinking sequence:", e); 159 | } 160 | this._currentThinkingId = null; 161 | // Clear thinking buffer and timer since sequence ended 162 | this._thinkingBuffer = ""; 163 | if (this._thinkingFlushTimer) { 164 | clearTimeout(this._thinkingFlushTimer); 165 | this._thinkingFlushTimer = null; 166 | } 167 | } 168 | 169 | /** 170 | * Generate a unique thinking ID based on request start time and random suffix 171 | */ 172 | protected generateThinkingId(): string { 173 | return `thinking_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; 174 | } 175 | 176 | /** 177 | * Buffer and schedule a flush for thinking content. 178 | * @param text The thinking text to buffer 179 | * @param progress Progress reporter for parts 180 | */ 181 | protected bufferThinkingContent(text: string, progress: Progress): void { 182 | // Generate thinking ID if not provided by the model 183 | if (!this._currentThinkingId) { 184 | this._currentThinkingId = this.generateThinkingId(); 185 | } 186 | 187 | // Append to thinking buffer 188 | this._thinkingBuffer += text; 189 | 190 | // Schedule flush with 100ms delay 191 | if (!this._thinkingFlushTimer) { 192 | this._thinkingFlushTimer = setTimeout(() => { 193 | this.flushThinkingBuffer(progress); 194 | }, 100); 195 | } 196 | } 197 | 198 | /** 199 | * Flush the thinking buffer to the progress reporter. 200 | * @param progress Progress reporter for parts. 201 | */ 202 | protected flushThinkingBuffer(progress: Progress): void { 203 | // Always clear existing timer first 204 | if (this._thinkingFlushTimer) { 205 | clearTimeout(this._thinkingFlushTimer); 206 | this._thinkingFlushTimer = null; 207 | } 208 | 209 | // Flush current buffer if we have content 210 | if (this._thinkingBuffer && this._currentThinkingId) { 211 | const text = this._thinkingBuffer; 212 | this._thinkingBuffer = ""; 213 | progress.report(new LanguageModelThinkingPart(text, this._currentThinkingId)); 214 | } 215 | } 216 | } 217 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "oai-compatible-copilot", 3 | "publisher": "johnny-zhao", 4 | "displayName": "OAI Compatible Provider for Copilot", 5 | "description": "An extension that integrates OpenAI/Ollama/Anthropic API Providers into GitHub Copilot Chat", 6 | "icon": "assets/logo.png", 7 | "keywords": [ 8 | "ai", 9 | "chat", 10 | "copilot", 11 | "github-copilot", 12 | "language-model", 13 | "openai", 14 | "ollama", 15 | "anthropic", 16 | "claude" 17 | ], 18 | "repository": { 19 | "type": "git", 20 | "url": "https://github.com/JohnnyZ93/oai-compatible-copilot" 21 | }, 22 | "version": "0.1.8", 23 | "engines": { 24 | "vscode": "^1.104.0" 25 | }, 26 | "extensionDependencies": [ 27 | "github.copilot-chat" 28 | ], 29 | "categories": [ 30 | "AI", 31 | "Chat" 32 | ], 33 | "badges": [ 34 | { 35 | "url": "https://img.shields.io/github/stars/JohnnyZ93/oai-compatible-copilot?style=social", 36 | "description": "Star oai-compatible-copilot on Github", 37 | "href": "https://github.com/JohnnyZ93/oai-compatible-copilot" 38 | } 39 | ], 40 | "bugs": { 41 | "url": "https://github.com/JohnnyZ93/oai-compatible-copilot/issues" 42 | }, 43 | "license": "MIT", 44 | "enabledApiProposals": [ 45 | "chatProvider", 46 | "languageModelThinkingPart" 47 | ], 48 | "contributes": { 49 | "languageModelChatProviders": [ 50 | { 51 | "vendor": "oaicopilot", 52 | "displayName": "OAI Compatible", 53 | "managementCommand": "oaicopilot.setApikey" 54 | } 55 | ], 56 | "commands": [ 57 | { 58 | "command": "oaicopilot.setApikey", 59 | "title": "OAICopilot: Set OAI Compatible Apikey" 60 | }, 61 | { 62 | "command": "oaicopilot.setProviderApikey", 63 | "title": "OAICopilot: Set OAI Compatible Multi-Provider Apikey" 64 | } 65 | ], 66 | "configuration": { 67 | "title": "OAI Compatible Copilot", 68 | "properties": { 69 | "oaicopilot.baseUrl": { 70 | "type": "string", 71 | "default": "https://router.huggingface.co/v1", 72 | "description": "The base URL for the Openai Compatible Inference API. Default value is Hugging Face." 73 | }, 74 | "oaicopilot.models": { 75 | "type": "array", 76 | "default": [], 77 | "items": { 78 | "type": "object", 79 | "properties": { 80 | "id": { 81 | "type": "string", 82 | "description": "Model ID (e.g., 'glm-4.6')." 83 | }, 84 | "displayName": { 85 | "type": "string", 86 | "description": "(Optional) Display name for the model that will be shown in the Copilot interface. If not provided, will be generated automatically." 87 | }, 88 | "configId": { 89 | "type": "string", 90 | "description": "(Optional) Configuration ID for this model. Allows defining the same model with different settings (e.g. 'glm-4.6::thinking', 'glm-4.6::no-thinking')." 91 | }, 92 | "owned_by": { 93 | "type": "string", 94 | "description": "Model provider (e.g., 'zai', 'openai')." 95 | }, 96 | "family": { 97 | "type": "string", 98 | "description": "Model family (e.g., 'gpt-4', 'claude-3', 'gemini'). Enables model-specific optimizations and behaviors. Defaults to 'oai-compatible' if not specified." 99 | }, 100 | "baseUrl": { 101 | "type": "string", 102 | "description": "Base URL for the model provider. If not provided, the global oaicopilot.baseUrl will be used." 103 | }, 104 | "context_length": { 105 | "type": "number", 106 | "default": 128000, 107 | "minimum": 1000, 108 | "maximum": 10000000, 109 | "description": "Model support context length. Default is 128000." 110 | }, 111 | "vision": { 112 | "type": "boolean", 113 | "default": false, 114 | "description": "Model support vision. Default is false." 115 | }, 116 | "max_tokens": { 117 | "type": "number", 118 | "default": 4096, 119 | "minimum": 1, 120 | "maximum": 10000000, 121 | "description": "Maximum number of tokens to generate (range: [1, context_length)). Default is 4096." 122 | }, 123 | "max_completion_tokens": { 124 | "type": "number", 125 | "default": 4096, 126 | "minimum": 1, 127 | "maximum": 10000000, 128 | "description": "Maximum number of tokens to generate (OpenAI new standard parameter)." 129 | }, 130 | "reasoning_effort": { 131 | "type": "string", 132 | "default": "medium", 133 | "enum": [ 134 | "high", 135 | "medium", 136 | "low", 137 | "minimal" 138 | ], 139 | "description": "Reasoning effort level (OpenAI reasoning configuration)" 140 | }, 141 | "thinking": { 142 | "type": "object", 143 | "description": "Thinking configuration for Zai provider", 144 | "properties": { 145 | "type": { 146 | "type": "string", 147 | "enum": [ 148 | "enabled", 149 | "disabled" 150 | ], 151 | "description": "Set to 'enabled' to enable thinking, 'disabled' to disable thinking" 152 | } 153 | } 154 | }, 155 | "enable_thinking": { 156 | "type": "boolean", 157 | "default": false, 158 | "description": "Switches between thinking and non-thinking modes. Not required." 159 | }, 160 | "thinking_budget": { 161 | "type": "number", 162 | "default": 128, 163 | "minimum": 128, 164 | "maximum": 10000000, 165 | "description": "Maximum number of tokens for chain-of-thought output. Not required." 166 | }, 167 | "temperature": { 168 | "type": "number", 169 | "default": 0, 170 | "minimum": 0, 171 | "maximum": 2, 172 | "description": "Sampling temperature (range: [0, 2]). Lower values make output more deterministic, higher values make it more creative. Default is 0." 173 | }, 174 | "top_p": { 175 | "type": "number", 176 | "default": 1, 177 | "minimum": 0, 178 | "maximum": 1, 179 | "description": "Top-p sampling value (range: (0, 1]). Not required." 180 | }, 181 | "top_k": { 182 | "type": "number", 183 | "default": 50, 184 | "minimum": 1, 185 | "description": "Top-k sampling value (range: [1, Infinity)). Not required." 186 | }, 187 | "min_p": { 188 | "type": "number", 189 | "default": 0, 190 | "minimum": 0, 191 | "maximum": 1, 192 | "description": "Minimum probability threshold (range: [0, 1]). Not required." 193 | }, 194 | "frequency_penalty": { 195 | "type": "number", 196 | "default": 0, 197 | "minimum": -2, 198 | "maximum": 2, 199 | "description": "Frequency penalty (range: [-2, 2]). Not required." 200 | }, 201 | "presence_penalty": { 202 | "type": "number", 203 | "default": 0, 204 | "minimum": -2, 205 | "maximum": 2, 206 | "description": "Presence penalty (range: [-2, 2]). Not required." 207 | }, 208 | "repetition_penalty": { 209 | "type": "number", 210 | "default": 0, 211 | "minimum": 0, 212 | "maximum": 2, 213 | "description": "Repetition penalty (range: (0, 2]). Not required." 214 | }, 215 | "reasoning": { 216 | "type": "object", 217 | "default": { 218 | "effort": "medium" 219 | }, 220 | "properties": { 221 | "effort": { 222 | "type": "string", 223 | "default": "medium", 224 | "enum": [ 225 | "high", 226 | "medium", 227 | "low", 228 | "minimal", 229 | "auto" 230 | ], 231 | "description": "Reasoning effort level for OpenRouter/xAI (high, medium, low, minimal, auto)" 232 | }, 233 | "exclude": { 234 | "type": "boolean", 235 | "default": false, 236 | "description": "Exclude reasoning tokens from the final response" 237 | }, 238 | "max_tokens": { 239 | "type": "number", 240 | "default": 2000, 241 | "minimum": 1, 242 | "description": "Specific token limit for reasoning (Anthropic-style, alternative to effort)" 243 | }, 244 | "enabled": { 245 | "type": "boolean", 246 | "default": true, 247 | "description": "Enable reasoning (inferred from effort or max_tokens if not specified)" 248 | } 249 | }, 250 | "description": "Reasoning configuration for OpenRouter-compatible providers" 251 | }, 252 | "extra": { 253 | "type": "object", 254 | "description": "Extra request body parameters." 255 | }, 256 | "headers": { 257 | "type": "object", 258 | "additionalProperties": { 259 | "type": "string" 260 | }, 261 | "description": "Custom HTTP headers to be sent with every request to this model's provider. These headers will be merged with the default headers (Authorization, Content-Type, User-Agent)." 262 | }, 263 | "include_reasoning_in_request": { 264 | "type": "boolean", 265 | "default": false, 266 | "description": "Whether to include reasoning_content in assistant messages sent to the API. Support deepseek-v3.2 or others." 267 | }, 268 | "apiMode": { 269 | "type": "string", 270 | "enum": [ 271 | "openai", 272 | "ollama", 273 | "anthropic" 274 | ], 275 | "default": "openai", 276 | "description": "API mode: 'openai' (Default) for API (/v1/chat/completions), 'ollama' for API (/api/chat), 'anthropic' for API (/v1/messages)." 277 | } 278 | }, 279 | "required": [ 280 | "id", 281 | "owned_by" 282 | ] 283 | }, 284 | "description": "A list of preferred models to use. If provided, these models will be used directly instead of fetching from the API." 285 | }, 286 | "oaicopilot.retry": { 287 | "type": "object", 288 | "default": { 289 | "enabled": true, 290 | "max_attempts": 3, 291 | "interval_ms": 1000 292 | }, 293 | "properties": { 294 | "enabled": { 295 | "type": "boolean", 296 | "default": true, 297 | "description": "Enable retry mechanism for api errors. Default is true." 298 | }, 299 | "max_attempts": { 300 | "type": "number", 301 | "default": 3, 302 | "minimum": 1, 303 | "description": "Maximum number of retry attempts. Default is 3." 304 | }, 305 | "interval_ms": { 306 | "type": "number", 307 | "default": 1000, 308 | "minimum": 1, 309 | "description": "Interval between retry attempts in milliseconds. Default is 1000 (1 seconds)." 310 | }, 311 | "status_codes": { 312 | "type": "array", 313 | "items": { 314 | "type": "number" 315 | }, 316 | "description": "Additional HTTP status codes that will be merged. Default is [429, 500, 502, 503, 504]." 317 | } 318 | }, 319 | "description": "Retry configuration for handling api errors like [429, 500, 502, 503, 504]." 320 | }, 321 | "oaicopilot.delay": { 322 | "type": "number", 323 | "default": 0, 324 | "minimum": 0, 325 | "description": "Fixed delay in milliseconds between consecutive requests. Default is 0 (no delay)." 326 | } 327 | } 328 | } 329 | }, 330 | "main": "./out/extension.js", 331 | "scripts": { 332 | "vscode:prepublish": "npm run compile", 333 | "download-api": "dts dev && mv vscode.proposed.*.ts src", 334 | "compile": "tsc -p ./", 335 | "lint": "eslint", 336 | "format": "prettier --write .", 337 | "watch": "tsc -watch -p ./", 338 | "test": "npm run compile && vscode-test", 339 | "build": "npx @vscode/vsce package -o extension.vsix" 340 | }, 341 | "dependencies": {}, 342 | "devDependencies": { 343 | "@eslint/js": "^9.13.0", 344 | "@stylistic/eslint-plugin": "^2.9.0", 345 | "@types/node": "^22", 346 | "@types/mocha": "^10.0.6", 347 | "@vscode/dts": "^0.4.1", 348 | "@types/vscode": "^1.104.0", 349 | "@vscode/test-cli": "^0.0.11", 350 | "@vscode/test-electron": "^2.5.2", 351 | "eslint": "^9.13.0", 352 | "prettier": "^3.1.0", 353 | "typescript": "^5.9.2", 354 | "typescript-eslint": "^8.39.0" 355 | } 356 | } 357 | -------------------------------------------------------------------------------- /src/anthropic/anthropicApi.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from "vscode"; 2 | import { 3 | CancellationToken, 4 | LanguageModelChatRequestMessage, 5 | ProvideLanguageModelChatResponseOptions, 6 | LanguageModelResponsePart2, 7 | Progress, 8 | } from "vscode"; 9 | 10 | import type { HFModelItem } from "../types"; 11 | 12 | import type { 13 | AnthropicMessage, 14 | AnthropicRequestBody, 15 | AnthropicContentBlock, 16 | AnthropicToolUseBlock, 17 | AnthropicToolResultBlock, 18 | AnthropicStreamChunk, 19 | } from "./anthropicTypes"; 20 | 21 | import { isImageMimeType, isToolResultPart, collectToolResultText, convertToolsToOpenAI, mapRole } from "../utils"; 22 | 23 | import { CommonApi } from "../commonApi"; 24 | 25 | export class AnthropicApi extends CommonApi { 26 | private _systemContent: string | undefined; 27 | 28 | constructor() { 29 | super(); 30 | } 31 | 32 | /** 33 | * Convert VS Code chat messages to Anthropic message format. 34 | * @param messages The VS Code chat messages to convert. 35 | * @param modelConfig model configuration that may affect message conversion. 36 | * @returns Anthropic-compatible messages array. 37 | */ 38 | convertMessages( 39 | messages: readonly LanguageModelChatRequestMessage[], 40 | modelConfig: { includeReasoningInRequest: boolean } 41 | ): AnthropicMessage[] { 42 | const out: AnthropicMessage[] = []; 43 | 44 | for (const m of messages) { 45 | const role = mapRole(m); 46 | const textParts: string[] = []; 47 | const imageParts: vscode.LanguageModelDataPart[] = []; 48 | const toolCalls: AnthropicToolUseBlock[] = []; 49 | const toolResults: AnthropicToolResultBlock[] = []; 50 | const thinkingParts: string[] = []; 51 | 52 | for (const part of m.content ?? []) { 53 | if (part instanceof vscode.LanguageModelTextPart) { 54 | textParts.push(part.value); 55 | } else if (part instanceof vscode.LanguageModelDataPart && isImageMimeType(part.mimeType)) { 56 | imageParts.push(part); 57 | } else if (part instanceof vscode.LanguageModelToolCallPart) { 58 | const id = part.callId || `toolu_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; 59 | toolCalls.push({ 60 | type: "tool_use", 61 | id, 62 | name: part.name, 63 | input: (part.input as Record) ?? {}, 64 | }); 65 | } else if (isToolResultPart(part)) { 66 | const callId = (part as { callId?: string }).callId ?? ""; 67 | const content = collectToolResultText(part as { content?: ReadonlyArray }); 68 | toolResults.push({ 69 | type: "tool_result", 70 | tool_use_id: callId, 71 | content, 72 | }); 73 | } else if (part instanceof vscode.LanguageModelThinkingPart) { 74 | const content = Array.isArray(part.value) ? part.value.join("") : part.value; 75 | thinkingParts.push(content); 76 | } 77 | } 78 | 79 | // Handle system messages separately (Anthropic uses top-level system field) 80 | if (role === "system") { 81 | if (textParts.length > 0) { 82 | this._systemContent = textParts.join("\n"); 83 | } 84 | continue; 85 | } 86 | 87 | // Build content blocks for user/assistant messages 88 | const contentBlocks: AnthropicContentBlock[] = []; 89 | 90 | // Add text content 91 | if (textParts.length > 0) { 92 | contentBlocks.push({ 93 | type: "text", 94 | text: textParts.join("\n"), 95 | }); 96 | } 97 | 98 | // Add image content 99 | for (const imagePart of imageParts) { 100 | const base64Data = Buffer.from(imagePart.data).toString("base64"); 101 | contentBlocks.push({ 102 | type: "image", 103 | source: { 104 | type: "base64", 105 | media_type: imagePart.mimeType, 106 | data: base64Data, 107 | }, 108 | }); 109 | } 110 | 111 | // Add thinking content for assistant messages 112 | if (role === "assistant" && thinkingParts.length > 0 && modelConfig.includeReasoningInRequest) { 113 | contentBlocks.push({ 114 | type: "thinking", 115 | thinking: thinkingParts.join("\n"), 116 | }); 117 | } 118 | 119 | // Add tool calls for assistant messages 120 | for (const toolCall of toolCalls) { 121 | contentBlocks.push(toolCall); 122 | } 123 | 124 | // For tool results, they should be added to user messages 125 | // We'll add them to the current message if it's a user message 126 | if (role === "user" && toolResults.length > 0) { 127 | for (const toolResult of toolResults) { 128 | contentBlocks.push(toolResult); 129 | } 130 | } else if (toolResults.length > 0) { 131 | // If tool results appear in non-user messages, log warning 132 | console.warn("[Anthropic Provider] Tool results found in non-user message, ignoring"); 133 | } 134 | 135 | // Only add message if we have content blocks 136 | if (contentBlocks.length > 0) { 137 | out.push({ 138 | role, 139 | content: contentBlocks, 140 | }); 141 | } 142 | } 143 | 144 | return out; 145 | } 146 | 147 | prepareRequestBody( 148 | rb: AnthropicRequestBody, 149 | um: HFModelItem | undefined, 150 | options: ProvideLanguageModelChatResponseOptions 151 | ): AnthropicRequestBody { 152 | // Set max_tokens (required for Anthropic) 153 | if (um?.max_tokens !== undefined) { 154 | rb.max_tokens = um.max_tokens; 155 | } 156 | 157 | // Add system content if we extracted it 158 | if (this._systemContent) { 159 | rb.system = this._systemContent; 160 | } 161 | 162 | // Add temperature 163 | const oTemperature = options.modelOptions?.temperature ?? 0; 164 | const temperature = um?.temperature ?? oTemperature; 165 | rb.temperature = temperature; 166 | if (um && um.temperature === null) { 167 | delete rb.temperature; 168 | } 169 | 170 | // Add top_p if configured 171 | if (um?.top_p !== undefined && um.top_p !== null) { 172 | rb.top_p = um.top_p; 173 | } 174 | 175 | // Add top_k if configured 176 | if (um?.top_k !== undefined) { 177 | rb.top_k = um.top_k; 178 | } 179 | 180 | // Add tools configuration 181 | const toolConfig = convertToolsToOpenAI(options); 182 | if (toolConfig.tools) { 183 | // Convert OpenAI tool definitions to Anthropic format 184 | rb.tools = toolConfig.tools.map((tool) => ({ 185 | name: tool.function.name, 186 | description: tool.function.description, 187 | input_schema: tool.function.parameters, 188 | })); 189 | } 190 | 191 | // Add tool_choice 192 | if (toolConfig.tool_choice) { 193 | if (toolConfig.tool_choice === "auto") { 194 | rb.tool_choice = { type: "auto" }; 195 | } else if (typeof toolConfig.tool_choice === "object" && toolConfig.tool_choice.type === "function") { 196 | rb.tool_choice = { type: "tool", name: toolConfig.tool_choice.function.name }; 197 | } 198 | } 199 | 200 | // Process extra configuration parameters 201 | if (um?.extra && typeof um.extra === "object") { 202 | // Add all extra parameters directly to the request body 203 | for (const [key, value] of Object.entries(um.extra)) { 204 | if (value !== undefined) { 205 | (rb as unknown as Record)[key] = value; 206 | } 207 | } 208 | } 209 | 210 | return rb; 211 | } 212 | 213 | /** 214 | * Process Anthropic streaming response (SSE format). 215 | * @param responseBody The readable stream body. 216 | * @param progress Progress reporter for streamed parts. 217 | * @param token Cancellation token. 218 | */ 219 | async processStreamingResponse( 220 | responseBody: ReadableStream, 221 | progress: Progress, 222 | token: CancellationToken 223 | ): Promise { 224 | const reader = responseBody.getReader(); 225 | const decoder = new TextDecoder(); 226 | let buffer = ""; 227 | 228 | try { 229 | while (true) { 230 | if (token.isCancellationRequested) { 231 | break; 232 | } 233 | 234 | const { done, value } = await reader.read(); 235 | if (done) { 236 | break; 237 | } 238 | 239 | buffer += decoder.decode(value, { stream: true }); 240 | const lines = buffer.split("\n"); 241 | buffer = lines.pop() || ""; 242 | 243 | for (const line of lines) { 244 | if (line.trim() === "") { 245 | continue; 246 | } 247 | if (!line.startsWith("data: ")) { 248 | continue; 249 | } 250 | 251 | const data = line.slice(6); 252 | if (data === "[DONE]") { 253 | // Do not throw on [DONE]; any incomplete/empty buffers are ignored. 254 | await this.flushToolCallBuffers(progress, /*throwOnInvalid*/ false); 255 | continue; 256 | } 257 | 258 | try { 259 | const chunk: AnthropicStreamChunk = JSON.parse(data); 260 | // console.debug("[OAI Compatible Model Provider] data:", JSON.stringify(chunk)); 261 | 262 | await this.processAnthropicChunk(chunk, progress); 263 | } catch (e) { 264 | console.error("[Anthropic Provider] Failed to parse SSE chunk:", e, "data:", data); 265 | } 266 | } 267 | } 268 | } finally { 269 | reader.releaseLock(); 270 | // If there's an active thinking sequence, end it first 271 | this.reportEndThinking(progress); 272 | } 273 | } 274 | 275 | /** 276 | * Process a single Anthropic streaming chunk. 277 | * @param chunk Parsed Anthropic stream chunk. 278 | * @param progress Progress reporter for parts. 279 | */ 280 | private async processAnthropicChunk( 281 | chunk: AnthropicStreamChunk, 282 | progress: Progress 283 | ): Promise { 284 | // Handle ping events (ignore) 285 | if (chunk.type === "ping") { 286 | return; 287 | } 288 | 289 | // Handle error events 290 | if (chunk.type === "error") { 291 | const errorType = chunk.error?.type || "unknown_error"; 292 | const errorMessage = chunk.error?.message || "Anthropic API streaming error"; 293 | console.error(`[Anthropic Provider] Streaming error: ${errorType} - ${errorMessage}`); 294 | // We could throw here, but for now just log and continue 295 | return; 296 | } 297 | 298 | if (chunk.type === "message_start" && chunk.message) { 299 | // Extract message metadata (id, model, etc.) 300 | // Could store for later use, but not required for basic streaming 301 | return; 302 | } 303 | 304 | if (chunk.type === "message_delta" && chunk.delta) { 305 | // Extract stop_reason and usage information 306 | // We're not processing usage per user request, but could log if needed 307 | return; 308 | } 309 | 310 | if (chunk.type === "content_block_start" && chunk.content_block) { 311 | // Start of a content block 312 | if (chunk.content_block.type === "thinking") { 313 | // Start thinking block 314 | if (chunk.content_block.thinking) { 315 | this.bufferThinkingContent(chunk.content_block.thinking, progress); 316 | } 317 | } else if (chunk.content_block.type === "tool_use") { 318 | // Start tool call block 319 | // SSEProcessor-like: if first tool call appears after text, emit a whitespace 320 | // to ensure any UI buffers/linkifiers are flushed without adding visible noise. 321 | if (!this._emittedBeginToolCallsHint && this._hasEmittedAssistantText) { 322 | progress.report(new vscode.LanguageModelTextPart(" ")); 323 | this._emittedBeginToolCallsHint = true; 324 | } 325 | const idx = (chunk.index as number) ?? 0; 326 | this._toolCallBuffers.set(idx, { 327 | id: chunk.content_block.id, 328 | name: chunk.content_block.name, 329 | args: "", 330 | }); 331 | } else if (chunk.content_block.type === "text") { 332 | // Text block start - nothing special to do 333 | // The text content will come via content_block_delta events 334 | } 335 | } else if (chunk.type === "content_block_delta" && chunk.delta) { 336 | if (chunk.delta.type === "text_delta" && chunk.delta.text) { 337 | // Emit text content 338 | progress.report(new vscode.LanguageModelTextPart(chunk.delta.text)); 339 | this._hasEmittedAssistantText = true; 340 | } else if (chunk.delta.type === "thinking_delta" && chunk.delta.thinking) { 341 | // Buffer thinking content 342 | this.bufferThinkingContent(chunk.delta.thinking, progress); 343 | } else if (chunk.delta.type === "input_json_delta" && chunk.delta.partial_json) { 344 | // Handle tool call argument streaming 345 | // Find the latest tool call buffer and append partial JSON 346 | const idx = (chunk.index as number) ?? 0; 347 | const buf = this._toolCallBuffers.get(idx); 348 | if (buf) { 349 | buf.args += chunk.delta.partial_json; 350 | this._toolCallBuffers.set(idx, buf); 351 | // Try to emit if we have valid JSON 352 | await this.tryEmitBufferedToolCall(idx, progress); 353 | } 354 | } else if (chunk.delta.type === "signature_delta" && chunk.delta.signature) { 355 | // Signature for thinking block - ignore for now 356 | // Could store for verification if needed later 357 | } 358 | } else if (chunk.type === "content_block_stop" || chunk.type === "message_stop") { 359 | // End of message - ensure thinking is ended and flush all tool calls 360 | await this.flushToolCallBuffers(progress, false); 361 | this.reportEndThinking(progress); 362 | } 363 | } 364 | } 365 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🤗 OAI Compatible Provider for Copilot 2 | 3 | [![CI](https://github.com/JohnnyZ93/oai-compatible-copilot/actions/workflows/release.yml/badge.svg)](https://github.com/JohnnyZ93/oai-compatible-copilot/actions) 4 | [![License](https://img.shields.io/github/license/JohnnyZ93/oai-compatible-copilot?color=orange&label=License)](https://github.com/JohnnyZ93/oai-compatible-copilot/blob/main/LICENSE) 5 | 6 | Use frontier open LLMs like Qwen3 Coder, Kimi K2, DeepSeek V3.2, GLM 4.6 and more in VS Code with GitHub Copilot Chat powered by any OpenAI-compatible provider 🔥 7 | 8 | ## ✨ Features 9 | - Supports almost all OpenAI-compatible providers, such as ModelScope, SiliconFlow, DeepSeek... 10 | - Supports vision models. 11 | - Offers additional configuration options for chat requests. 12 | - Supports control model thinking and reasoning content show in chat interface. 13 | > ![thinkingPartDemo](./assets/thinkingPartDemo.png) 14 | - Supports configuring models from multiple providers simultaneously, automatically managing API keys without switch them repeatedly. 15 | - Supports defining multiple configurations for the same model ID with different settings (e.g. thinking enable/disable for GLM-4.6). 16 | - Support auto retry mechanism for handling api errors like [429, 500, 502, 503, 504]. 17 | - Support token usage count and set provider api keys in status bar. 18 | > ![statusBar](./assets/statusBar.png) 19 | --- 20 | 21 | ## Requirements 22 | - VS Code 1.104.0 or higher. 23 | - OpenAI-compatible provider API key. 24 | --- 25 | 26 | ## ⚡ Quick Start 27 | 1. Install the OAI Compatible Provider for Copilot extension [here](https://marketplace.visualstudio.com/items?itemName=johnny-zhao.oai-compatible-copilot). 28 | 2. Open VS Code Settings and configure `oaicopilot.baseUrl` and `oaicopilot.models`. 29 | 3. Open Github Copilot Chat interface. 30 | 4. Click the model picker and select "Manage Models...". 31 | 5. Choose "OAI Compatible" provider. 32 | 6. Enter your API key — it will be saved locally. 33 | 7. Select the models you want to add to the model picker. 34 | 35 | ### Settings Example 36 | 37 | ```json 38 | "oaicopilot.baseUrl": "https://api-inference.modelscope.cn/v1", 39 | "oaicopilot.models": [ 40 | { 41 | "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", 42 | "owned_by": "modelscope", 43 | "context_length": 256000, 44 | "max_tokens": 8192, 45 | "temperature": 0, 46 | "top_p": 1 47 | } 48 | ] 49 | ``` 50 | --- 51 | 52 | ## * Multi Api Mode 53 | 54 | The extension supports three different API protocols to work with various model providers. You can specify which API mode to use for each model via the `apiMode` parameter. 55 | 56 | ### Supported API Modes 57 | 58 | 1. **`openai`** (default) - OpenAI-compatible API 59 | - Endpoint: `/chat/completions` 60 | - Header: `Authorization: Bearer ` 61 | - Use for: Most OpenAI-compatible providers (ModelScope, SiliconFlow, etc.) 62 | 63 | 2. **`ollama`** - Ollama native API 64 | - Endpoint: `/api/chat` 65 | - Header: `Authorization: Bearer ` (or no header for local Ollama) 66 | - Use for: Local Ollama instances 67 | 68 | 3. **`anthropic`** - Anthropic Claude API 69 | - Endpoint: `/v1/messages` 70 | - Header: `x-api-key: ` 71 | - Use for: Anthropic Claude models 72 | 73 | ### Configuration Examples 74 | Mixed configuration with multiple API modes: 75 | 76 | ```json 77 | "oaicopilot.models": [ 78 | { 79 | "id": "GLM-4.6", 80 | "owned_by": "modelscope", 81 | }, 82 | { 83 | "id": "llama3.2", 84 | "owned_by": "ollama", 85 | "baseUrl": "http://localhost:11434", 86 | "apiMode": "ollama" 87 | }, 88 | { 89 | "id": "claude-3-5-sonnet-20241022", 90 | "owned_by": "anthropic", 91 | "baseUrl": "https://api.anthropic.com", 92 | "apiMode": "anthropic" 93 | } 94 | ] 95 | ``` 96 | 97 | ### Important Notes 98 | - The `apiMode` parameter defaults to `"openai"` if not specified. 99 | - When using `ollama` mode, you can omit the API key (`ollama` by default) or set it to any string. 100 | - Each API mode uses different message conversion logic internally to match provider-specific formats (tools, images, thinking). 101 | --- 102 | 103 | ## * Multi-Provider Guide 104 | 105 | > `owned_by` in model config is used for group apiKey. The storage key is `oaicopilot.apiKey.${owned_by}`. 106 | 107 | 1. Open VS Code Settings and configure `oaicopilot.models`. 108 | 2. Open command center ( Ctrl + Shift + P ), and search "OAICopilot: Set OAI Compatible Multi-Provider Apikey" to configure provider-specific API keys. 109 | 3. Open Github Copilot Chat interface. 110 | 4. Click the model picker and select "Manage Models...". 111 | 5. Choose "OAI Compatible" provider. 112 | 6. Select the models you want to add to the model picker. 113 | 114 | ### Settings Example 115 | 116 | ```json 117 | "oaicopilot.baseUrl": "https://api-inference.modelscope.cn/v1", 118 | "oaicopilot.models": [ 119 | { 120 | "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", 121 | "owned_by": "modelscope", 122 | "context_length": 256000, 123 | "max_tokens": 8192, 124 | "temperature": 0, 125 | "top_p": 1 126 | }, 127 | { 128 | "id": "qwen3-coder", 129 | "owned_by": "iflow", 130 | "baseUrl": "https://apis.iflow.cn/v1", 131 | "context_length": 256000, 132 | "max_tokens": 8192, 133 | "temperature": 0, 134 | "top_p": 1 135 | } 136 | ] 137 | ``` 138 | 139 | --- 140 | 141 | ## * Multi-config for the same model 142 | 143 | You can define multiple configurations for the same model ID by using the `configId` field. This allows you to have the same base model with different settings for different use cases. 144 | 145 | To use this feature: 146 | 147 | 1. Add the `configId` field to your model configuration 148 | 2. Each configuration with the same `id` must have a unique `configId` 149 | 3. The model will appear as separate entries in the VS Code model picker 150 | 151 | ### Settings Example 152 | 153 | ```json 154 | "oaicopilot.models": [ 155 | { 156 | "id": "glm-4.6", 157 | "configId": "thinking", 158 | "owned_by": "zai", 159 | "temperature": 0.7, 160 | "top_p": 1, 161 | "thinking": { 162 | "type": "enabled" 163 | } 164 | }, 165 | { 166 | "id": "glm-4.6", 167 | "configId": "no-thinking", 168 | "owned_by": "zai", 169 | "temperature": 0, 170 | "top_p": 1, 171 | "thinking": { 172 | "type": "disabled" 173 | } 174 | } 175 | ] 176 | ``` 177 | 178 | In this example, you'll have three different configurations of the glm-4.6 model available in VS Code: 179 | - `glm-4.6::thinking` - use GLM-4.6 with thinking 180 | - `glm-4.6::no-thinking` - use GLM-4.6 without thinking 181 | 182 | --- 183 | 184 | ## * Custom Headers 185 | 186 | You can specify custom HTTP headers that will be sent with every request to a specific model's provider. This is useful for: 187 | 188 | - API versioning headers 189 | - Custom authentication headers (in addition to the standard Authorization header) 190 | - Provider-specific headers required by certain APIs 191 | - Request tracking or debugging headers 192 | 193 | ### Custom Headers Example 194 | 195 | ```json 196 | "oaicopilot.models": [ 197 | { 198 | "id": "custom-model", 199 | "owned_by": "provider", 200 | "baseUrl": "https://api.example.com/v1", 201 | "headers": { 202 | "X-API-Version": "2024-01", 203 | "X-Request-Source": "vscode-copilot", 204 | "Custom-Auth-Token": "additional-token-if-needed" 205 | } 206 | } 207 | ] 208 | ``` 209 | 210 | **Important Notes:** 211 | - Custom headers are merged with default headers (Authorization, Content-Type, User-Agent) 212 | - If a custom header conflicts with a default header, the custom header takes precedence 213 | - Headers are applied on a per-model basis, allowing different headers for different providers 214 | - Header values must be strings 215 | --- 216 | 217 | ## * Custom Request body parameters 218 | 219 | The `extra` field allows you to add arbitrary parameters to the API request body. This is useful for provider-specific features that aren't covered by the standard parameters. 220 | 221 | ### How it works 222 | - Parameters in `extra` are merged directly into the request body 223 | - Works with all API modes (`openai`, `ollama`, `anthropic`) 224 | - Values can be any valid JSON type (string, number, boolean, object, array) 225 | 226 | ### Common use cases 227 | - **OpenAI-specific parameters**: `seed`, `logprobs`, `top_logprobs`, `suffix`, `presence_penalty` (if not using standard parameter) 228 | - **Provider-specific features**: Custom sampling methods, debugging flags 229 | - **Experimental parameters**: Beta features from API providers 230 | 231 | ### Configuration Example 232 | 233 | ```json 234 | "oaicopilot.models": [ 235 | { 236 | "id": "custom-model", 237 | "owned_by": "openai", 238 | "extra": { 239 | "seed": 42, 240 | "logprobs": true, 241 | "top_logprobs": 5, 242 | "suffix": "###", 243 | "presence_penalty": 0.1 244 | } 245 | }, 246 | { 247 | "id": "local-model", 248 | "owned_by": "ollama", 249 | "baseUrl": "http://localhost:11434", 250 | "apiMode": "ollama", 251 | "extra": { 252 | "keep_alive": "5m", 253 | "raw": true 254 | } 255 | }, 256 | { 257 | "id": "claude-model", 258 | "owned_by": "anthropic", 259 | "baseUrl": "https://api.anthropic.com", 260 | "apiMode": "anthropic", 261 | "extra": { 262 | "service_tier": "standard_only" 263 | } 264 | } 265 | ] 266 | ``` 267 | 268 | ### Important Notes 269 | - Parameters in `extra` are added after standard parameters 270 | - If an `extra` parameter conflicts with a standard parameter, the `extra` value takes precedence 271 | - Use this for provider-specific features only 272 | - Standard parameters (temperature, top_p, etc.) should use their dedicated fields when possible 273 | - API provider must support the parameters you specify 274 | 275 | --- 276 | 277 | ## Model Parameters 278 | All parameters support individual configuration for different models, providing highly flexible model tuning capabilities. 279 | 280 | - `id` (required): Model identifier 281 | - `owned_by` (required): Model provider 282 | - `displayName`: Display name for the model that will be shown in the Copilot interface. 283 | - `configId`: Configuration ID for this model. Allows defining the same model with different settings (e.g. 'glm-4.6::thinking', 'glm-4.6::no-thinking') 284 | - `family`: Model family (e.g., 'gpt-4', 'claude-3', 'gemini'). Enables model-specific optimizations and behaviors. Defaults to 'oai-compatible' if not specified. 285 | - `baseUrl`: Model-specific base URL. If not provided, the global `oaicopilot.baseUrl` will be used 286 | - `context_length`: The context length supported by the model. Default value is 128000 287 | - `max_tokens`: Maximum number of tokens to generate (range: [1, context_length]). Default value is 4096 288 | - `max_completion_tokens`: Maximum number of tokens to generate (OpenAI new standard parameter) 289 | - `vision`: Whether the model supports vision capabilities. Defaults to false 290 | - `temperature`: Sampling temperature (range: [0, 2]). Lower values make the output more deterministic, higher values more creative. Default value is 0 291 | - `top_p`: Top-p sampling value (range: (0, 1]). Optional parameter 292 | - `top_k`: Top-k sampling value (range: [1, ∞)). Optional parameter 293 | - `min_p`: Minimum probability threshold (range: [0, 1]). Optional parameter 294 | - `frequency_penalty`: Frequency penalty (range: [-2, 2]). Optional parameter 295 | - `presence_penalty`: Presence penalty (range: [-2, 2]). Optional parameter 296 | - `repetition_penalty`: Repetition penalty (range: (0, 2]). Optional parameter 297 | - `enable_thinking`: Enable model thinking and reasoning content display (for non-OpenRouter providers) 298 | - `thinking_budget`: Maximum token count for thinking chain output. Optional parameter 299 | - `reasoning`: OpenRouter reasoning configuration, includes the following options: 300 | - `enabled`: Enable reasoning functionality (if not specified, will be inferred from effort or max_tokens) 301 | - `effort`: Reasoning effort level (high, medium, low, minimal, auto) 302 | - `exclude`: Exclude reasoning tokens from the final response 303 | - `max_tokens`: Specific token limit for reasoning (Anthropic style, as an alternative to effort) 304 | - `thinking`: Thinking configuration for Zai provider 305 | - `type`: Set to 'enabled' to enable thinking, 'disabled' to disable thinking 306 | - `reasoning_effort`: Reasoning effort level (OpenAI reasoning configuration) 307 | - `headers`: Custom HTTP headers to be sent with every request to this model's provider (e.g., `{"X-API-Version": "v1", "X-Custom-Header": "value"}`). These headers will be merged with the default headers (Authorization, Content-Type, User-Agent) 308 | - `extra`: Extra request body parameters. 309 | - `include_reasoning_in_request`: Whether to include reasoning_content in assistant messages sent to the API. Support deepseek-v3.2 or others. 310 | - `apiMode`: API mode: 'openai' (Default) for API (/v1/chat/completions), 'ollama' for API (/api/chat), 'anthropic' for API (/v1/messages). 311 | --- 312 | 313 | ## Thanks to 314 | 315 | Thanks to all the people who contribute. 316 | 317 | - [Contributors](https://github.com/JohnnyZ93/oai-compatible-copilot/graphs/contributors) 318 | - [Hugging Face Chat Extension](https://github.com/huggingface/huggingface-vscode-chat) 319 | - [VS Code Chat Provider API](https://code.visualstudio.com/api/extension-guides/ai/language-model-chat-provider) 320 | 321 | --- 322 | 323 | ## Support & License 324 | - Open issues: https://github.com/JohnnyZ93/oai-compatible-copilot/issues 325 | - License: MIT License Copyright (c) 2025 Johnny Zhao 326 | -------------------------------------------------------------------------------- /src/provider.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from "vscode"; 2 | import { 3 | CancellationToken, 4 | LanguageModelChatInformation, 5 | LanguageModelChatProvider, 6 | LanguageModelChatRequestMessage, 7 | ProvideLanguageModelChatResponseOptions, 8 | LanguageModelResponsePart2, 9 | Progress, 10 | } from "vscode"; 11 | 12 | import type { HFModelItem } from "./types"; 13 | 14 | import type { OllamaRequestBody } from "./ollama/ollamaTypes"; 15 | 16 | import { parseModelId, createRetryConfig, executeWithRetry } from "./utils"; 17 | 18 | import { prepareLanguageModelChatInformation } from "./provideModel"; 19 | import { prepareTokenCount } from "./provideToken"; 20 | import { updateContextStatusBar } from "./statusBar"; 21 | import { OllamaApi } from "./ollama/ollamaApi"; 22 | import { OpenaiApi } from "./openai/openaiApi"; 23 | import { AnthropicApi } from "./anthropic/anthropicApi"; 24 | import { AnthropicRequestBody } from "./anthropic/anthropicTypes"; 25 | 26 | /** 27 | * VS Code Chat provider backed by Hugging Face Inference Providers. 28 | */ 29 | export class HuggingFaceChatModelProvider implements LanguageModelChatProvider { 30 | /** Track last request completion time for delay calculation. */ 31 | private _lastRequestTime: number | null = null; 32 | 33 | /** 34 | * Create a provider using the given secret storage for the API key. 35 | * @param secrets VS Code secret storage. 36 | */ 37 | constructor( 38 | private readonly secrets: vscode.SecretStorage, 39 | private readonly userAgent: string, 40 | private readonly statusBarItem: vscode.StatusBarItem 41 | ) {} 42 | 43 | /** 44 | * Get the list of available language models contributed by this provider 45 | * @param options Options which specify the calling context of this function 46 | * @param token A cancellation token which signals if the user cancelled the request or not 47 | * @returns A promise that resolves to the list of available language models 48 | */ 49 | async provideLanguageModelChatInformation( 50 | options: { silent: boolean }, 51 | _token: CancellationToken 52 | ): Promise { 53 | return prepareLanguageModelChatInformation( 54 | { silent: options.silent ?? false }, 55 | _token, 56 | this.secrets, 57 | this.userAgent 58 | ); 59 | } 60 | 61 | /** 62 | * Returns the number of tokens for a given text using the model specific tokenizer logic 63 | * @param model The language model to use 64 | * @param text The text to count tokens for 65 | * @param token A cancellation token for the request 66 | * @returns A promise that resolves to the number of tokens 67 | */ 68 | async provideTokenCount( 69 | model: LanguageModelChatInformation, 70 | text: string | LanguageModelChatRequestMessage, 71 | _token: CancellationToken 72 | ): Promise { 73 | return prepareTokenCount(model, text, _token); 74 | } 75 | 76 | /** 77 | * Returns the response for a chat request, passing the results to the progress callback. 78 | * The {@linkcode LanguageModelChatProvider} must emit the response parts to the progress callback as they are received from the language model. 79 | * @param model The language model to use 80 | * @param messages The messages to include in the request 81 | * @param options Options for the request 82 | * @param progress The progress to emit the streamed response chunks to 83 | * @param token A cancellation token for the request 84 | * @returns A promise that resolves when the response is complete. Results are actually passed to the progress callback. 85 | */ 86 | async provideLanguageModelChatResponse( 87 | model: LanguageModelChatInformation, 88 | messages: readonly LanguageModelChatRequestMessage[], 89 | options: ProvideLanguageModelChatResponseOptions, 90 | progress: Progress, 91 | token: CancellationToken 92 | ): Promise { 93 | // Update Token Usage 94 | updateContextStatusBar(messages, model, this.statusBarItem); 95 | 96 | // Apply delay between consecutive requests 97 | const config = vscode.workspace.getConfiguration(); 98 | const delayMs = config.get("oaicopilot.delay", 0); 99 | 100 | if (delayMs > 0 && this._lastRequestTime !== null) { 101 | const elapsed = Date.now() - this._lastRequestTime; 102 | if (elapsed < delayMs) { 103 | const remainingDelay = delayMs - elapsed; 104 | await new Promise((resolve) => { 105 | const timeout = setTimeout(() => { 106 | clearTimeout(timeout); 107 | resolve(); 108 | }, remainingDelay); 109 | }); 110 | } 111 | } 112 | 113 | const trackingProgress: Progress = { 114 | report: (part) => { 115 | try { 116 | progress.report(part); 117 | } catch (e) { 118 | console.error("[OAI Compatible Model Provider] Progress.report failed", { 119 | modelId: model.id, 120 | error: e instanceof Error ? { name: e.name, message: e.message } : String(e), 121 | }); 122 | } 123 | }, 124 | }; 125 | try { 126 | // get model config from user settings 127 | const config = vscode.workspace.getConfiguration(); 128 | const userModels = config.get("oaicopilot.models", []); 129 | 130 | // 解析模型ID以处理配置ID 131 | const parsedModelId = parseModelId(model.id); 132 | 133 | // 查找匹配的用户模型配置 134 | // 优先匹配同时具有相同基础ID和配置ID的模型 135 | // 如果没有配置ID,则匹配基础ID相同的模型 136 | let um: HFModelItem | undefined = userModels.find( 137 | (um) => 138 | um.id === parsedModelId.baseId && 139 | ((parsedModelId.configId && um.configId === parsedModelId.configId) || 140 | (!parsedModelId.configId && !um.configId)) 141 | ); 142 | 143 | // 如果仍然没有找到模型,尝试查找任何匹配基础ID的模型(最宽松的匹配,用于向后兼容) 144 | if (!um) { 145 | um = userModels.find((um) => um.id === parsedModelId.baseId); 146 | } 147 | 148 | // Prepare model configuration for message conversion 149 | const modelConfig = { 150 | includeReasoningInRequest: um?.include_reasoning_in_request ?? false, 151 | }; 152 | 153 | // Get API key for the model's provider 154 | const provider = um?.owned_by; 155 | const useGenericKey = !um?.baseUrl; 156 | const modelApiKey = await this.ensureApiKey(useGenericKey, provider); 157 | if (!modelApiKey) { 158 | throw new Error("OAI Compatible API key not found"); 159 | } 160 | 161 | // send chat request 162 | const BASE_URL = um?.baseUrl || config.get("oaicopilot.baseUrl", ""); 163 | if (!BASE_URL || !BASE_URL.startsWith("http")) { 164 | throw new Error(`Invalid base URL configuration.`); 165 | } 166 | 167 | // get retry config 168 | const retryConfig = createRetryConfig(); 169 | 170 | // Check if using Ollama native API mode 171 | const apiMode = um?.apiMode ?? "openai"; 172 | 173 | // prepare headers with custom headers if specified 174 | const requestHeaders = this.prepareHeaders(modelApiKey, apiMode, um?.headers); 175 | 176 | // console.debug("[OAI Compatible Model Provider] messages:", JSON.stringify(messages)); 177 | if (apiMode === "ollama") { 178 | // Ollama native API mode 179 | const ollamaApi = new OllamaApi(); 180 | const ollamaMessages = ollamaApi.convertMessages(messages, modelConfig); 181 | 182 | let ollamaRequestBody: OllamaRequestBody = { 183 | model: parsedModelId.baseId, 184 | messages: ollamaMessages, 185 | stream: true, 186 | }; 187 | ollamaRequestBody = ollamaApi.prepareRequestBody(ollamaRequestBody, um, options); 188 | // console.debug("[OAI Compatible Model Provider] RequestBody:", JSON.stringify(ollamaRequestBody)); 189 | 190 | // send Ollama chat request with retry 191 | const response = await executeWithRetry(async () => { 192 | const res = await fetch(`${BASE_URL.replace(/\/+$/, "")}/api/chat`, { 193 | method: "POST", 194 | headers: requestHeaders, 195 | body: JSON.stringify(ollamaRequestBody), 196 | }); 197 | 198 | if (!res.ok) { 199 | const errorText = await res.text(); 200 | console.error("[Ollama Provider] Ollama API error response", errorText); 201 | throw new Error(`Ollama API error: [${res.status}] ${res.statusText}${errorText ? `\n${errorText}` : ""}`); 202 | } 203 | 204 | return res; 205 | }, retryConfig); 206 | 207 | if (!response.body) { 208 | throw new Error("No response body from Ollama API"); 209 | } 210 | await ollamaApi.processStreamingResponse(response.body, trackingProgress, token); 211 | } else if (apiMode === "anthropic") { 212 | // Anthropic API mode 213 | const anthropicApi = new AnthropicApi(); 214 | const anthropicMessages = anthropicApi.convertMessages(messages, modelConfig); 215 | 216 | // requestBody 217 | let requestBody: AnthropicRequestBody = { 218 | model: parsedModelId.baseId, 219 | messages: anthropicMessages, 220 | stream: true, 221 | }; 222 | requestBody = anthropicApi.prepareRequestBody(requestBody, um, options); 223 | // console.debug("[OAI Compatible Model Provider] RequestBody:", JSON.stringify(requestBody)); 224 | 225 | // send Anthropic chat request with retry 226 | const response = await executeWithRetry(async () => { 227 | const res = await fetch(`${BASE_URL.replace(/\/+$/, "")}/v1/messages`, { 228 | method: "POST", 229 | headers: requestHeaders, 230 | body: JSON.stringify(requestBody), 231 | }); 232 | 233 | if (!res.ok) { 234 | const errorText = await res.text(); 235 | console.error("[Anthropic Provider] Anthropic API error response", errorText); 236 | throw new Error( 237 | `Anthropic API error: [${res.status}] ${res.statusText}${errorText ? `\n${errorText}` : ""}` 238 | ); 239 | } 240 | 241 | return res; 242 | }, retryConfig); 243 | 244 | if (!response.body) { 245 | throw new Error("No response body from Anthropic API"); 246 | } 247 | await anthropicApi.processStreamingResponse(response.body, trackingProgress, token); 248 | } else { 249 | // OpenAI compatible API mode (default) 250 | const openaiApi = new OpenaiApi(); 251 | const openaiMessages = openaiApi.convertMessages(messages, modelConfig); 252 | 253 | // requestBody 254 | let requestBody: Record = { 255 | model: parsedModelId.baseId, 256 | messages: openaiMessages, 257 | stream: true, 258 | stream_options: { include_usage: true }, 259 | }; 260 | requestBody = openaiApi.prepareRequestBody(requestBody, um, options); 261 | // console.debug("[OAI Compatible Model Provider] RequestBody:", JSON.stringify(requestBody)); 262 | 263 | // send chat request with retry 264 | const response = await executeWithRetry(async () => { 265 | const res = await fetch(`${BASE_URL.replace(/\/+$/, "")}/chat/completions`, { 266 | method: "POST", 267 | headers: requestHeaders, 268 | body: JSON.stringify(requestBody), 269 | }); 270 | 271 | if (!res.ok) { 272 | const errorText = await res.text(); 273 | console.error("[OAI Compatible Model Provider] OAI Compatible API error response", errorText); 274 | throw new Error( 275 | `OAI Compatible API error: [${res.status}] ${res.statusText}${errorText ? `\n${errorText}` : ""}` 276 | ); 277 | } 278 | 279 | return res; 280 | }, retryConfig); 281 | 282 | if (!response.body) { 283 | throw new Error("No response body from OAI Compatible API"); 284 | } 285 | await openaiApi.processStreamingResponse(response.body, trackingProgress, token); 286 | } 287 | } catch (err) { 288 | console.error("[OAI Compatible Model Provider] Chat request failed", { 289 | modelId: model.id, 290 | messageCount: messages.length, 291 | error: err instanceof Error ? { name: err.name, message: err.message } : String(err), 292 | }); 293 | throw err; 294 | } finally { 295 | // Update last request time after successful completion 296 | this._lastRequestTime = Date.now(); 297 | } 298 | } 299 | 300 | /** 301 | * Prepare headers for API request. 302 | * @param apiKey The API key to use. 303 | * @param apiMode The apiMode (affects header format). 304 | * @param customHeaders Optional custom headers from model config. 305 | * @returns Headers object. 306 | */ 307 | private prepareHeaders( 308 | apiKey: string, 309 | apiMode: string, 310 | customHeaders?: Record 311 | ): Record { 312 | const headers: Record = { 313 | "Content-Type": "application/json", 314 | "User-Agent": this.userAgent, 315 | }; 316 | 317 | // Provider-specific header formats 318 | if (apiMode === "anthropic") { 319 | headers["x-api-key"] = apiKey; 320 | } else if (apiMode === "ollama" && apiKey !== "ollama") { 321 | headers["Authorization"] = `Bearer ${apiKey}`; 322 | } else { 323 | headers["Authorization"] = `Bearer ${apiKey}`; 324 | } 325 | 326 | // Merge custom headers 327 | if (customHeaders) { 328 | return { ...headers, ...customHeaders }; 329 | } 330 | 331 | return headers; 332 | } 333 | 334 | /** 335 | * Ensure an API key exists in SecretStorage, optionally prompting the user when not silent. 336 | * @param useGenericKey If true, use generic API key. 337 | * @param provider Optional provider name to get provider-specific API key. 338 | */ 339 | private async ensureApiKey(useGenericKey: boolean, provider?: string): Promise { 340 | // Try to get provider-specific API key first 341 | let apiKey: string | undefined; 342 | if (provider && provider.trim() !== "") { 343 | const normalizedProvider = provider.toLowerCase(); 344 | const providerKey = `oaicopilot.apiKey.${normalizedProvider}`; 345 | apiKey = await this.secrets.get(providerKey); 346 | 347 | if (!apiKey && !useGenericKey) { 348 | const entered = await vscode.window.showInputBox({ 349 | title: `OAI Compatible API Key for ${normalizedProvider}`, 350 | prompt: `Enter your OAI Compatible API key for ${normalizedProvider}`, 351 | ignoreFocusOut: true, 352 | password: true, 353 | }); 354 | if (entered && entered.trim()) { 355 | apiKey = entered.trim(); 356 | await this.secrets.store(providerKey, apiKey); 357 | } 358 | } 359 | } 360 | 361 | // Fall back to generic API key 362 | if (!apiKey) { 363 | apiKey = await this.secrets.get("oaicopilot.apiKey"); 364 | } 365 | 366 | if (!apiKey && useGenericKey) { 367 | const entered = await vscode.window.showInputBox({ 368 | title: "OAI Compatible API Key", 369 | prompt: "Enter your OAI Compatible API key", 370 | ignoreFocusOut: true, 371 | password: true, 372 | }); 373 | if (entered && entered.trim()) { 374 | apiKey = entered.trim(); 375 | await this.secrets.store("oaicopilot.apiKey", apiKey); 376 | } 377 | } 378 | return apiKey; 379 | } 380 | } 381 | -------------------------------------------------------------------------------- /src/openai/openaiApi.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from "vscode"; 2 | import { 3 | CancellationToken, 4 | LanguageModelChatRequestMessage, 5 | ProvideLanguageModelChatResponseOptions, 6 | LanguageModelResponsePart2, 7 | Progress, 8 | } from "vscode"; 9 | 10 | import type { HFModelItem, ReasoningConfig } from "../types"; 11 | 12 | import type { 13 | OpenAIChatMessage, 14 | OpenAIToolCall, 15 | ChatMessageContent, 16 | ReasoningDetail, 17 | ReasoningSummaryDetail, 18 | ReasoningTextDetail, 19 | } from "./openaiTypes"; 20 | 21 | import { 22 | isImageMimeType, 23 | createDataUrl, 24 | isToolResultPart, 25 | collectToolResultText, 26 | convertToolsToOpenAI, 27 | mapRole, 28 | } from "../utils"; 29 | 30 | import { CommonApi } from "../commonApi"; 31 | 32 | export class OpenaiApi extends CommonApi { 33 | constructor() { 34 | super(); 35 | } 36 | 37 | /** 38 | * Convert VS Code chat request messages into OpenAI-compatible message objects. 39 | * @param messages The VS Code chat messages to convert. 40 | * @param modelConfig model configuration that may affect message conversion. 41 | * @returns OpenAI-compatible messages array. 42 | */ 43 | convertMessages( 44 | messages: readonly LanguageModelChatRequestMessage[], 45 | modelConfig: { includeReasoningInRequest: boolean } 46 | ): OpenAIChatMessage[] { 47 | const out: OpenAIChatMessage[] = []; 48 | for (const m of messages) { 49 | const role = mapRole(m); 50 | const textParts: string[] = []; 51 | const imageParts: vscode.LanguageModelDataPart[] = []; 52 | const toolCalls: OpenAIToolCall[] = []; 53 | const toolResults: { callId: string; content: string }[] = []; 54 | const reasoningParts: string[] = []; 55 | 56 | for (const part of m.content ?? []) { 57 | if (part instanceof vscode.LanguageModelTextPart) { 58 | textParts.push(part.value); 59 | } else if (part instanceof vscode.LanguageModelDataPart && isImageMimeType(part.mimeType)) { 60 | imageParts.push(part); 61 | } else if (part instanceof vscode.LanguageModelToolCallPart) { 62 | const id = part.callId || `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; 63 | let args = "{}"; 64 | try { 65 | args = JSON.stringify(part.input ?? {}); 66 | } catch { 67 | args = "{}"; 68 | } 69 | toolCalls.push({ id, type: "function", function: { name: part.name, arguments: args } }); 70 | } else if (isToolResultPart(part)) { 71 | const callId = (part as { callId?: string }).callId ?? ""; 72 | const content = collectToolResultText(part as { content?: ReadonlyArray }); 73 | toolResults.push({ callId, content }); 74 | } else if (part instanceof vscode.LanguageModelThinkingPart) { 75 | // 处理思考内容 76 | const content = Array.isArray(part.value) ? part.value.join("") : part.value; 77 | reasoningParts.push(content); 78 | } 79 | } 80 | 81 | // 构建 assistant 消息,包含思考内容 82 | if (role === "assistant") { 83 | const assistantMessage: OpenAIChatMessage = { 84 | role: "assistant", 85 | content: textParts.join("\n") || undefined, 86 | }; 87 | 88 | // 添加思考内容(根据配置决定是否包含) 89 | if (modelConfig.includeReasoningInRequest && reasoningParts.length > 0) { 90 | assistantMessage.reasoning_content = reasoningParts.join("\n"); 91 | } 92 | 93 | // 添加工具调用 94 | if (toolCalls.length > 0) { 95 | assistantMessage.tool_calls = toolCalls; 96 | } 97 | 98 | // 只有当消息有内容、思考内容或工具调用时才添加 99 | if (assistantMessage.content || assistantMessage.reasoning_content || assistantMessage.tool_calls) { 100 | out.push(assistantMessage); 101 | } 102 | } 103 | 104 | // 处理工具结果 105 | for (const tr of toolResults) { 106 | out.push({ role: "tool", tool_call_id: tr.callId, content: tr.content || "" }); 107 | } 108 | 109 | // 处理用户和系统消息 110 | if (textParts.length > 0 && role !== "assistant") { 111 | if (role === "user") { 112 | if (imageParts.length > 0) { 113 | // 多模态消息:包含图片、文本 114 | const contentArray: ChatMessageContent[] = []; 115 | contentArray.push({ 116 | type: "text", 117 | text: textParts.join("\n"), 118 | }); 119 | 120 | // 添加图片内容 121 | for (const imagePart of imageParts) { 122 | const dataUrl = createDataUrl(imagePart); 123 | contentArray.push({ 124 | type: "image_url", 125 | image_url: { 126 | url: dataUrl, 127 | }, 128 | }); 129 | } 130 | out.push({ role, content: contentArray }); 131 | } else { 132 | // 纯文本消息 133 | out.push({ role, content: textParts.join("\n") }); 134 | } 135 | } else if (role === "system") { 136 | out.push({ role, content: textParts.join("\n") }); 137 | } 138 | } 139 | } 140 | return out; 141 | } 142 | 143 | prepareRequestBody( 144 | rb: Record, 145 | um: HFModelItem | undefined, 146 | options: ProvideLanguageModelChatResponseOptions 147 | ): Record { 148 | // temperature 149 | const oTemperature = options.modelOptions?.temperature ?? 0; 150 | const temperature = um?.temperature ?? oTemperature; 151 | rb.temperature = temperature; 152 | if (um && um.temperature === null) { 153 | delete rb.temperature; 154 | } 155 | 156 | // top_p 157 | if (um?.top_p !== undefined && um.top_p !== null) { 158 | rb.top_p = um.top_p; 159 | } 160 | 161 | // max_tokens 162 | if (um?.max_tokens !== undefined) { 163 | rb.max_tokens = um.max_tokens; 164 | } 165 | 166 | // max_completion_tokens (OpenAI new standard parameter) 167 | if (um?.max_completion_tokens !== undefined) { 168 | rb.max_completion_tokens = um.max_completion_tokens; 169 | } 170 | 171 | // OpenAI reasoning configuration 172 | if (um?.reasoning_effort !== undefined) { 173 | rb.reasoning_effort = um.reasoning_effort; 174 | } 175 | 176 | // enable_thinking (non-OpenRouter only) 177 | const enableThinking = um?.enable_thinking; 178 | if (enableThinking !== undefined) { 179 | rb.enable_thinking = enableThinking; 180 | 181 | if (um?.thinking_budget !== undefined) { 182 | rb.thinking_budget = um.thinking_budget; 183 | } 184 | } 185 | 186 | // thinking (Zai provider) 187 | if (um?.thinking?.type !== undefined) { 188 | rb.thinking = { 189 | type: um.thinking.type, 190 | }; 191 | } 192 | 193 | // OpenRouter reasoning configuration 194 | if (um?.reasoning !== undefined) { 195 | const reasoningConfig: ReasoningConfig = um.reasoning as ReasoningConfig; 196 | if (reasoningConfig.enabled !== false) { 197 | const reasoningObj: Record = {}; 198 | const effort = reasoningConfig.effort; 199 | const maxTokensReasoning = reasoningConfig.max_tokens || 2000; // Default 2000 as per docs 200 | if (effort && effort !== "auto") { 201 | reasoningObj.effort = effort; 202 | } else { 203 | // If auto or unspecified, use max_tokens (Anthropic-style fallback) 204 | reasoningObj.max_tokens = maxTokensReasoning; 205 | } 206 | if (reasoningConfig.exclude !== undefined) { 207 | reasoningObj.exclude = reasoningConfig.exclude; 208 | } 209 | rb.reasoning = reasoningObj; 210 | } 211 | } 212 | 213 | // stop 214 | if (options.modelOptions) { 215 | const mo = options.modelOptions as Record; 216 | if (typeof mo.stop === "string" || Array.isArray(mo.stop)) { 217 | rb.stop = mo.stop; 218 | } 219 | } 220 | 221 | // tools 222 | const toolConfig = convertToolsToOpenAI(options); 223 | if (toolConfig.tools) { 224 | rb.tools = toolConfig.tools; 225 | } 226 | if (toolConfig.tool_choice) { 227 | rb.tool_choice = toolConfig.tool_choice; 228 | } 229 | 230 | // Configure user-defined additional parameters 231 | if (um?.top_k !== undefined) { 232 | rb.top_k = um.top_k; 233 | } 234 | if (um?.min_p !== undefined) { 235 | rb.min_p = um.min_p; 236 | } 237 | if (um?.frequency_penalty !== undefined) { 238 | rb.frequency_penalty = um.frequency_penalty; 239 | } 240 | if (um?.presence_penalty !== undefined) { 241 | rb.presence_penalty = um.presence_penalty; 242 | } 243 | if (um?.repetition_penalty !== undefined) { 244 | rb.repetition_penalty = um.repetition_penalty; 245 | } 246 | 247 | // Process extra configuration parameters 248 | if (um?.extra && typeof um.extra === "object") { 249 | // Add all extra parameters directly to the request body 250 | for (const [key, value] of Object.entries(um.extra)) { 251 | if (value !== undefined) { 252 | rb[key] = value; 253 | } 254 | } 255 | } 256 | 257 | return rb; 258 | } 259 | 260 | /** 261 | * Read and parse the HF Router streaming (SSE-like) response and report parts. 262 | * @param responseBody The readable stream body. 263 | * @param progress Progress reporter for streamed parts. 264 | * @param token Cancellation token. 265 | */ 266 | async processStreamingResponse( 267 | responseBody: ReadableStream, 268 | progress: Progress, 269 | token: CancellationToken 270 | ): Promise { 271 | const reader = responseBody.getReader(); 272 | const decoder = new TextDecoder(); 273 | let buffer = ""; 274 | 275 | try { 276 | while (true) { 277 | if (token.isCancellationRequested) { 278 | break; 279 | } 280 | 281 | const { done, value } = await reader.read(); 282 | if (done) { 283 | break; 284 | } 285 | 286 | buffer += decoder.decode(value, { stream: true }); 287 | const lines = buffer.split("\n"); 288 | buffer = lines.pop() || ""; 289 | 290 | for (const line of lines) { 291 | if (!line.startsWith("data:")) { 292 | continue; 293 | } 294 | const data = line.slice(5).trim(); 295 | if (data === "[DONE]") { 296 | // Do not throw on [DONE]; any incomplete/empty buffers are ignored. 297 | await this.flushToolCallBuffers(progress, /*throwOnInvalid*/ false); 298 | continue; 299 | } 300 | 301 | try { 302 | const parsed = JSON.parse(data); 303 | // console.debug("[OAI Compatible Model Provider] data:", JSON.stringify(parsed)); 304 | 305 | await this.processDelta(parsed, progress); 306 | } catch { 307 | // Silently ignore malformed SSE lines temporarily 308 | } 309 | } 310 | } 311 | } finally { 312 | reader.releaseLock(); 313 | // If there's an active thinking sequence, end it first 314 | this.reportEndThinking(progress); 315 | } 316 | } 317 | 318 | /** 319 | * Handle a single streamed delta chunk, emitting text and tool call parts. 320 | * @param delta Parsed SSE chunk from the Router. 321 | * @param progress Progress reporter for parts. 322 | */ 323 | private async processDelta( 324 | delta: Record, 325 | progress: Progress 326 | ): Promise { 327 | let emitted = false; 328 | const choice = (delta.choices as Record[] | undefined)?.[0]; 329 | if (!choice) { 330 | return false; 331 | } 332 | 333 | const deltaObj = choice.delta as Record | undefined; 334 | 335 | // Process thinking content first (before regular text content) 336 | try { 337 | let maybeThinking = 338 | (choice as Record | undefined)?.thinking ?? 339 | (deltaObj as Record | undefined)?.thinking ?? 340 | (deltaObj as Record | undefined)?.reasoning_content; 341 | 342 | // OpenRouter/Claude reasoning_details array handling (new) 343 | const maybeReasoningDetails = 344 | (deltaObj as Record)?.reasoning_details ?? 345 | (choice as Record)?.reasoning_details; 346 | if (maybeReasoningDetails && Array.isArray(maybeReasoningDetails) && maybeReasoningDetails.length > 0) { 347 | // Prioritize details array over simple reasoning 348 | const details: Array = maybeReasoningDetails as Array; 349 | // Sort by index to preserve order (in case out-of-order chunks) 350 | const sortedDetails = details.sort((a, b) => (a.index ?? 0) - (b.index ?? 0)); 351 | 352 | for (const detail of sortedDetails) { 353 | let extractedText = ""; 354 | if (detail.type === "reasoning.summary") { 355 | extractedText = (detail as ReasoningSummaryDetail).summary; 356 | } else if (detail.type === "reasoning.text") { 357 | extractedText = (detail as ReasoningTextDetail).text; 358 | } else if (detail.type === "reasoning.encrypted") { 359 | extractedText = "[REDACTED]"; // As per docs 360 | } else { 361 | extractedText = JSON.stringify(detail); // Fallback for unknown 362 | } 363 | 364 | if (extractedText) { 365 | this.bufferThinkingContent(extractedText, progress); 366 | emitted = true; 367 | } 368 | } 369 | maybeThinking = null; // Skip simple thinking if details present 370 | } 371 | 372 | // Fallback to simple thinking if no details 373 | if (maybeThinking !== undefined && maybeThinking !== null) { 374 | let text = ""; 375 | // let metadata: Record | undefined; 376 | if (maybeThinking && typeof maybeThinking === "object") { 377 | const mt = maybeThinking as Record; 378 | text = typeof mt["text"] === "string" ? (mt["text"] as string) : JSON.stringify(mt); 379 | // metadata = mt["metadata"] ? (mt["metadata"] as Record) : undefined; 380 | } else if (typeof maybeThinking === "string") { 381 | text = maybeThinking; 382 | } 383 | if (text) { 384 | this.bufferThinkingContent(text, progress); 385 | emitted = true; 386 | } 387 | } 388 | } catch (e) { 389 | console.error("[OAI Compatible Model Provider] Failed to process thinking/reasoning_details:", e); 390 | } 391 | 392 | if (deltaObj?.content) { 393 | const content = String(deltaObj.content); 394 | 395 | // Process XML think blocks or text content (mutually exclusive) 396 | const xmlRes = this.processXmlThinkBlocks(content, progress); 397 | if (xmlRes.emittedAny) { 398 | emitted = true; 399 | } else { 400 | // If there's an active thinking sequence, end it first 401 | this.reportEndThinking(progress); 402 | 403 | // Only process text content if no XML think blocks were emitted 404 | const res = this.processTextContent(content, progress); 405 | if (res.emittedText) { 406 | this._hasEmittedAssistantText = true; 407 | } 408 | if (res.emittedAny) { 409 | emitted = true; 410 | } 411 | } 412 | } 413 | 414 | if (deltaObj?.tool_calls) { 415 | // If there's an active thinking sequence, end it first 416 | this.reportEndThinking(progress); 417 | 418 | const toolCalls = deltaObj.tool_calls as Array>; 419 | 420 | // SSEProcessor-like: if first tool call appears after text, emit a whitespace 421 | // to ensure any UI buffers/linkifiers are flushed without adding visible noise. 422 | if (!this._emittedBeginToolCallsHint && this._hasEmittedAssistantText && toolCalls.length > 0) { 423 | progress.report(new vscode.LanguageModelTextPart(" ")); 424 | this._emittedBeginToolCallsHint = true; 425 | } 426 | 427 | for (const tc of toolCalls) { 428 | const idx = (tc.index as number) ?? 0; 429 | // Ignore any further deltas for an index we've already completed 430 | if (this._completedToolCallIndices.has(idx)) { 431 | continue; 432 | } 433 | const buf = this._toolCallBuffers.get(idx) ?? { args: "" }; 434 | if (tc.id && typeof tc.id === "string") { 435 | buf.id = tc.id as string; 436 | } 437 | const func = tc.function as Record | undefined; 438 | if (func?.name && typeof func.name === "string") { 439 | buf.name = func.name as string; 440 | } 441 | if (typeof func?.arguments === "string") { 442 | buf.args += func.arguments as string; 443 | } 444 | this._toolCallBuffers.set(idx, buf); 445 | 446 | // Emit immediately once arguments become valid JSON to avoid perceived hanging 447 | await this.tryEmitBufferedToolCall(idx, progress); 448 | } 449 | } 450 | 451 | const finish = (choice.finish_reason as string | undefined) ?? undefined; 452 | if (finish === "tool_calls" || finish === "stop") { 453 | // On both 'tool_calls' and 'stop', emit any buffered calls and throw on invalid JSON 454 | await this.flushToolCallBuffers(progress, /*throwOnInvalid*/ true); 455 | } 456 | return emitted; 457 | } 458 | 459 | /** 460 | * Process streamed text content for inline tool-call control tokens and emit text/tool calls. 461 | * Returns which parts were emitted for logging/flow control. 462 | */ 463 | private processTextContent( 464 | input: string, 465 | progress: Progress 466 | ): { emittedText: boolean; emittedAny: boolean } { 467 | let emittedText = false; 468 | let emittedAny = false; 469 | 470 | // Emit any visible text 471 | const textToEmit = input; 472 | if (textToEmit && textToEmit.length > 0) { 473 | progress.report(new vscode.LanguageModelTextPart(textToEmit)); 474 | emittedText = true; 475 | emittedAny = true; 476 | } 477 | 478 | return { emittedText, emittedAny }; 479 | } 480 | 481 | /** 482 | * Process streamed text content for XML think blocks and emit thinking parts. 483 | * Returns whether any thinking content was emitted. 484 | */ 485 | private processXmlThinkBlocks( 486 | input: string, 487 | progress: Progress 488 | ): { emittedAny: boolean } { 489 | // If we've already attempted detection and found no THINK_START, skip processing 490 | if (this._xmlThinkDetectionAttempted && !this._xmlThinkActive) { 491 | return { emittedAny: false }; 492 | } 493 | 494 | const THINK_START = ""; 495 | const THINK_END = ""; 496 | 497 | let data = input; 498 | let emittedAny = false; 499 | 500 | while (data.length > 0) { 501 | if (!this._xmlThinkActive) { 502 | // Look for think start tag 503 | const startIdx = data.indexOf(THINK_START); 504 | if (startIdx === -1) { 505 | // No think start found, mark detection as attempted and skip future processing 506 | this._xmlThinkDetectionAttempted = true; 507 | data = ""; 508 | break; 509 | } 510 | 511 | // Found think start tag 512 | this._xmlThinkActive = true; 513 | // Generate a new thinking ID for this XML think block 514 | this._currentThinkingId = this.generateThinkingId(); 515 | 516 | // Skip the start tag and continue processing 517 | data = data.slice(startIdx + THINK_START.length); 518 | continue; 519 | } 520 | 521 | // We are inside a think block, look for end tag 522 | const endIdx = data.indexOf(THINK_END); 523 | if (endIdx === -1) { 524 | // No end tag found, emit current chunk content as thinking part 525 | const thinkContent = data.trim(); 526 | if (thinkContent) { 527 | progress.report(new vscode.LanguageModelThinkingPart(thinkContent, this._currentThinkingId || undefined)); 528 | emittedAny = true; 529 | } 530 | data = ""; 531 | break; 532 | } 533 | 534 | // Found end tag, emit final thinking part 535 | const thinkContent = data.slice(0, endIdx); 536 | if (thinkContent) { 537 | progress.report(new vscode.LanguageModelThinkingPart(thinkContent, this._currentThinkingId || undefined)); 538 | emittedAny = true; 539 | } 540 | 541 | // Reset state and continue with remaining data 542 | this._xmlThinkActive = false; 543 | this._currentThinkingId = null; 544 | data = data.slice(endIdx + THINK_END.length); 545 | } 546 | 547 | return { emittedAny }; 548 | } 549 | } 550 | --------------------------------------------------------------------------------