├── assets
    ├── logo.png
    ├── statusBar.png
    └── thinkingPartDemo.png
├── .prettierrc
├── .gitignore
├── .prettierignore
├── .vscode
    ├── settings.json
    ├── tasks.json
    └── launch.json
├── .vscodeignore
├── .vscode-test.mjs
├── tsconfig.json
├── LICENSE
├── CONTRIBUTING.md
├── eslint.config.mjs
├── .github
    ├── workflows
    │   └── release.yml
    └── copilot-instructions.md
├── src
    ├── ollama
    │   ├── ollamaTypes.ts
    │   └── ollamaApi.ts
    ├── openai
    │   ├── openaiTypes.ts
    │   └── openaiApi.ts
    ├── anthropic
    │   ├── anthropicTypes.ts
    │   └── anthropicApi.ts
    ├── provideToken.ts
    ├── vscode.proposed.chatProvider.d.ts
    ├── types.ts
    ├── statusBar.ts
    ├── extension.ts
    ├── vscode.proposed.languageModelThinkingPart.d.ts
    ├── vscode.proposed.languageModelDataPart.d.ts
    ├── provideModel.ts
    ├── utils.ts
    ├── commonApi.ts
    └── provider.ts
├── CHANGELOG.md
├── package.json
└── README.md


/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JohnnyZ93/oai-compatible-copilot/HEAD/assets/logo.png


--------------------------------------------------------------------------------
/assets/statusBar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JohnnyZ93/oai-compatible-copilot/HEAD/assets/statusBar.png


--------------------------------------------------------------------------------
/assets/thinkingPartDemo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JohnnyZ93/oai-compatible-copilot/HEAD/assets/thinkingPartDemo.png


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | 	"semi": true,
3 | 	"useTabs": true,
4 | 	"tabWidth": 2,
5 | 	"printWidth": 120,
6 | 	"trailingComma": "es5"
7 | }
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | out
 2 | dist
 3 | node_modules
 4 | .vscode-test/
 5 | *.vsix
 6 | .DS_Store
 7 | CLAUDE.md
 8 | .doc
 9 | .clinerules
10 | AGENTS.md


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
 1 | dist/**
 2 | .vscode/**
 3 | .vscode-test/**
 4 | .git-blame-ignore-revs
 5 | **/*.md
 6 | **/*.yml
 7 | **/*.json
 8 | **/*.mjs
 9 | **/*vscode.d.ts
10 | .build/**


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "search.exclude": {
3 |         "out": true
4 |     },
5 |     "git.branchProtection": [
6 |         "main"
7 |     ],
8 |     "files.trimTrailingWhitespace": true
9 | }


--------------------------------------------------------------------------------
/.vscodeignore:
--------------------------------------------------------------------------------
 1 | .vscode/**
 2 | .vscode-test/**
 3 | src/**
 4 | .gitignore
 5 | .yarnrc
 6 | webpack.config.js
 7 | vsc-extension-quickstart.md
 8 | **/tsconfig.json
 9 | **/.eslintrc.json
10 | **/*.map
11 | **/*.ts


--------------------------------------------------------------------------------
/.vscode-test.mjs:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from '@vscode/test-cli';
 2 | 
 3 | export default defineConfig({
 4 |   files: 'out/test/**/*.test.js',
 5 |   mocha: {
 6 |     ui: 'tdd',
 7 |     timeout: 20000,
 8 |     color: true
 9 |   }
10 | });


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"compilerOptions": {
 3 | 		"module": "Node16",
 4 | 		"target": "ES2024",
 5 | 		"lib": ["ES2024", "dom"],
 6 | 		"sourceMap": true,
 7 | 		"rootDir": "src",
 8 | 		"strict": true /* enable all strict type-checking options */,
 9 | 		"outDir": "out",
10 | 		"skipLibCheck": true /* Skip type checking of declaration files */,
11 | 		"types": ["node", "mocha"]
12 | 		/* Additional Checks */
13 | 		// "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */
14 | 		// "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */
15 | 		// "noUnusedParameters": true,  /* Report errors on unused parameters. */
16 | 	}
17 | }
18 | 


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | // See https://go.microsoft.com/fwlink/?LinkId=733558
 2 | // for the documentation about the tasks.json format
 3 | {
 4 | 	"version": "2.0.0",
 5 | 	"tasks": [
 6 | 		{
 7 | 			"type": "npm",
 8 | 			"script": "watch",
 9 | 			"problemMatcher": "$tsc-watch",
10 | 			"isBackground": true,
11 | 			"presentation": {
12 | 				"reveal": "never",
13 | 				"group": "watchers"
14 | 			},
15 | 			"group": {
16 | 				"kind": "build",
17 | 				"isDefault": true
18 | 			}
19 | 		},
20 | 		{
21 | 			"type": "npm",
22 | 			"script": "watch-tests",
23 | 			"problemMatcher": "$tsc-watch",
24 | 			"isBackground": true,
25 | 			"presentation": {
26 | 				"reveal": "never",
27 | 				"group": "watchers"
28 | 			},
29 | 			"group": "build"
30 | 		},
31 | 		{
32 | 			"label": "tasks: watch-tests",
33 | 			"dependsOn": [
34 | 				"npm: watch",
35 | 				"npm: watch-tests"
36 | 			],
37 | 			"problemMatcher": []
38 | 		}
39 | 	]
40 | }
41 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | // A launch configuration that compiles the extension and then opens it inside a new window
 2 | // Use IntelliSense to learn about possible attributes.
 3 | // Hover to view descriptions of existing attributes.
 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 | {
 6 | 	"version": "0.2.0",
 7 | 	"configurations": [
 8 | 		{
 9 | 			"name": "Run Extension",
10 | 			"type": "extensionHost",
11 | 			"request": "launch",
12 | 			"args": [
13 | 				"--extensionDevelopmentPath=${workspaceFolder}"
14 | 			],
15 | 			"outFiles": [
16 | 				"${workspaceFolder}/out/**/*.js"
17 | 			],
18 | 			"preLaunchTask": "${defaultBuildTask}"
19 | 		},
20 | 		{
21 | 			"name": "Extension Tests",
22 | 			"type": "extensionHost",
23 | 			"request": "launch",
24 | 			"args": [
25 | 				"--extensionDevelopmentPath=${workspaceFolder}",
26 | 				"--extensionTestsPath=${workspaceFolder}/out/test/suite/index"
27 | 			],
28 | 			"outFiles": [
29 | 				"${workspaceFolder}/out/**/*.js",
30 | 				"${workspaceFolder}/dist/**/*.js"
31 | 			],
32 | 			"preLaunchTask": "tasks: watch-tests"
33 | 		}
34 | 	]
35 | }
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Johnny Zhao
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guide
 2 | 
 3 | Thanks for taking the time to start contributing. This guide will help you get started with the project.
 4 | 
 5 | Also the project welcome serious and willing maintainers.
 6 | 
 7 | ## How to contribute?
 8 | 
 9 | ### Creating an Issue
10 | 
11 | For anything else than a typo or a bug fix, please raise an issue to discuss your proposal before submitting any code.
12 | 
13 | ### License for contributions
14 | 
15 | As the copyright owner, you agree to license your contributions under an irrevocable MIT license.
16 | 
17 | ### For Developers: Creating a Pull Request
18 | 
19 | **Requirements:**
20 | - VS Code 1.104.0 or higher.
21 | - Node.js 22.
22 | - Your OpenAI-compatible provider API key.
23 | 
24 | ```bash
25 | git clone https://github.com/JohnnyZ93/oai-compatible-copilot
26 | cd oai-compatible-copilot
27 | npm install
28 | npm run compile
29 | ```
30 | Press F5 to launch an Extension Development Host.
31 | 
32 | **Common scripts:**
33 | - Build: `npm run compile`
34 | - Watch: `npm run watch`
35 | - Lint: `npm run lint`
36 | - Format: `npm run format`
37 | 
38 | ### Tests
39 | 
40 | You should use your own OpenAI-compatible provider API key for test.


--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * ESLint configuration for the project.
 3 |  *
 4 |  * See https://eslint.style and https://typescript-eslint.io for additional linting options.
 5 |  */
 6 | // @ts-check
 7 | import js from '@eslint/js';
 8 | import tseslint from 'typescript-eslint';
 9 | import stylistic from '@stylistic/eslint-plugin';
10 | 
11 | export default tseslint.config(
12 | 	{
13 | 		ignores: [
14 | 			'.vscode-test',
15 | 			'out',
16 | 			'**/*.d.ts'
17 | 		]
18 | 	},
19 | 	{
20 | 		files: ['**/*.{js,mjs,cjs,ts,jsx,tsx}'],
21 | 	},
22 | 	js.configs.recommended,
23 | 	...tseslint.configs.recommended,
24 | 	...tseslint.configs.stylistic,
25 | 	{
26 | 		plugins: {
27 | 			'@stylistic': stylistic
28 | 		},
29 | 		rules: {
30 | 			'curly': 'warn',
31 | 			'@stylistic/semi': ['warn', 'always'],
32 | 			'@typescript-eslint/no-empty-function': 'off',
33 | 			'@typescript-eslint/array-type': 'off',
34 | 			'@typescript-eslint/naming-convention': [
35 | 				'warn',
36 | 				{
37 | 					'selector': 'import',
38 | 					'format': ['camelCase', 'PascalCase']
39 | 				}
40 | 			],
41 | 			'@typescript-eslint/no-unused-vars': [
42 | 				'error',
43 | 				{
44 | 					'argsIgnorePattern': '^_'
45 | 				}
46 | 			]
47 | 		}
48 | 	}
49 | );


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | on:
 3 |   workflow_dispatch:
 4 |   push:
 5 |     branches:
 6 |       - 'release/**'
 7 | jobs:
 8 |   package:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v4
12 | 
13 |       - name: Verify version matches branch
14 |         env:
15 |           BRANCH: ${{ github.ref_name }}
16 |         run: |
17 |           TAG="${BRANCH##*/}"
18 |           PKG_VERSION=$(node -p "require('./package.json').version")
19 |           if [ "$TAG" != "$PKG_VERSION" ]; then
20 |             echo "package.json version ($PKG_VERSION) does not match branch tag ($TAG)"; exit 1;
21 |           fi
22 | 
23 |       - uses: actions/setup-node@v4
24 |         with:
25 |           node-version: 20
26 | 
27 |       - run: npm ci
28 |       - run: npm run compile
29 | 
30 |       - name: Package VSIX
31 |         run: npx @vscode/vsce package -o extension.vsix
32 | 
33 |       - name: Upload artifact
34 |         uses: actions/upload-artifact@v4
35 |         with:
36 |           name: vsix
37 |           path: extension.vsix
38 | 
39 |   publish:
40 |     runs-on: ubuntu-latest
41 |     needs: [package]
42 |     steps:
43 |       - uses: actions/checkout@v4
44 | 
45 |       - uses: actions/setup-node@v4
46 |         with:
47 |           node-version: 20
48 | 
49 |       - uses: actions/download-artifact@v4
50 |         with:
51 |           name: vsix
52 |           path: .
53 | 
54 |       - name: Publish to VS Code Marketplace
55 |         run: npx @vscode/vsce publish --allow-all-proposed-apis --pat ${{ secrets.MARKETPLACE_TOKEN }} --packagePath ./extension.vsix
56 | 


--------------------------------------------------------------------------------
/src/ollama/ollamaTypes.ts:
--------------------------------------------------------------------------------
 1 | import { OpenAIFunctionToolDef } from "../openai/openaiTypes";
 2 | 
 3 | /**
 4 |  * Ollama native API message format
 5 |  * @see https://docs.ollama.com/api#generate-a-chat-message
 6 |  */
 7 | export interface OllamaMessage {
 8 | 	role: "system" | "user" | "assistant" | "tool";
 9 | 	content: string;
10 | 	images?: string[];
11 | 	thinking?: string;
12 | 	tool_calls?: OllamaToolCall[];
13 | 	tool_name?: string; // For tool role messages
14 | }
15 | 
16 | /**
17 |  * Ollama native API request body
18 |  * @see https://docs.ollama.com/api#generate-a-chat-message
19 |  */
20 | export interface OllamaRequestBody {
21 | 	model: string;
22 | 	messages: OllamaMessage[];
23 | 	stream?: boolean;
24 | 	think?: boolean | string;
25 | 	options?: OllamaModelOptions;
26 | 	tools?: OpenAIFunctionToolDef[];
27 | }
28 | 
29 | /**
30 |  * Ollama model options for controlling text generation
31 |  * @see https://docs.ollama.com/api#generate-a-chat-message
32 |  */
33 | export interface OllamaModelOptions {
34 | 	seed?: number;
35 | 	temperature?: number;
36 | 	top_k?: number;
37 | 	top_p?: number;
38 | 	min_p?: number;
39 | 	stop?: string | string[];
40 | 	num_ctx?: number;
41 | 	num_predict?: number;
42 | }
43 | 
44 | /**
45 |  * Ollama tool call format
46 |  * @see https://docs.ollama.com/api#tool-calling
47 |  */
48 | export interface OllamaToolCall {
49 | 	function: {
50 | 		name: string;
51 | 		arguments: Record<string, unknown>;
52 | 	};
53 | }
54 | 
55 | /**
56 |  * Ollama native API streaming response chunk
57 |  */
58 | export interface OllamaStreamChunk {
59 | 	model: string;
60 | 	created_at: string;
61 | 	message: {
62 | 		role: string;
63 | 		content: string;
64 | 		thinking?: string;
65 | 		tool_calls?: OllamaToolCall[];
66 | 	};
67 | 	done: boolean;
68 | 	done_reason?: string;
69 | }
70 | 


--------------------------------------------------------------------------------
/src/openai/openaiTypes.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * OpenAI function-call entry emitted by assistant messages.
 3 |  */
 4 | export interface OpenAIToolCall {
 5 | 	id: string;
 6 | 	type: "function";
 7 | 	function: { name: string; arguments: string };
 8 | }
 9 | 
10 | /**
11 |  * OpenAI function tool definition used to advertise tools.
12 |  */
13 | export interface OpenAIFunctionToolDef {
14 | 	type: "function";
15 | 	function: {
16 | 		name: string;
17 | 		description?: string;
18 | 		parameters?: object;
19 | 	};
20 | }
21 | 
22 | /**
23 |  * OpenAI-style chat message used for router requests.
24 |  */
25 | export interface OpenAIChatMessage {
26 | 	role: OpenAIChatRole;
27 | 	content?: string | ChatMessageContent[];
28 | 	name?: string;
29 | 	tool_calls?: OpenAIToolCall[];
30 | 	tool_call_id?: string;
31 | 	reasoning_content?: string;
32 | }
33 | 
34 | /**
35 |  * 聊天消息内容接口（支持多模态）
36 |  */
37 | export interface ChatMessageContent {
38 | 	type: "text" | "image_url";
39 | 	text?: string;
40 | 	image_url?: {
41 | 		url: string;
42 | 	};
43 | }
44 | 
45 | /** OpenAI-style chat roles. */
46 | export type OpenAIChatRole = "system" | "user" | "assistant" | "tool";
47 | 
48 | export interface ReasoningDetailCommon {
49 | 	id: string | null;
50 | 	format: string; // e.g., "anthropic-claude-v1", "openai-responses-v1"
51 | 	index?: number;
52 | }
53 | 
54 | export interface ReasoningSummaryDetail extends ReasoningDetailCommon {
55 | 	type: "reasoning.summary";
56 | 	summary: string;
57 | }
58 | 
59 | export interface ReasoningEncryptedDetail extends ReasoningDetailCommon {
60 | 	type: "reasoning.encrypted";
61 | 	data: string; // Base64 encoded
62 | }
63 | 
64 | export interface ReasoningTextDetail extends ReasoningDetailCommon {
65 | 	type: "reasoning.text";
66 | 	text: string;
67 | 	signature?: string | null;
68 | }
69 | 
70 | export type ReasoningDetail = ReasoningSummaryDetail | ReasoningEncryptedDetail | ReasoningTextDetail;
71 | 


--------------------------------------------------------------------------------
/src/anthropic/anthropicTypes.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Anthropic API message format
  3 |  * @see https://docs.anthropic.com/en/api/messages
  4 |  */
  5 | 
  6 | export type AnthropicRole = "user" | "assistant";
  7 | 
  8 | export interface AnthropicTextBlock {
  9 | 	type: "text";
 10 | 	text: string;
 11 | }
 12 | 
 13 | export interface AnthropicImageBlock {
 14 | 	type: "image";
 15 | 	source: {
 16 | 		type: "base64";
 17 | 		media_type: string;
 18 | 		data: string;
 19 | 	};
 20 | }
 21 | 
 22 | export interface AnthropicThinkingBlock {
 23 | 	type: "thinking";
 24 | 	thinking: string;
 25 | 	signature?: string;
 26 | }
 27 | 
 28 | export interface AnthropicToolUseBlock {
 29 | 	type: "tool_use";
 30 | 	id: string;
 31 | 	name: string;
 32 | 	input: Record<string, unknown>;
 33 | }
 34 | 
 35 | export interface AnthropicToolResultBlock {
 36 | 	type: "tool_result";
 37 | 	tool_use_id: string;
 38 | 	content: string | AnthropicTextBlock[];
 39 | 	is_error?: boolean;
 40 | }
 41 | 
 42 | export type AnthropicContentBlock =
 43 | 	| AnthropicTextBlock
 44 | 	| AnthropicImageBlock
 45 | 	| AnthropicThinkingBlock
 46 | 	| AnthropicToolUseBlock
 47 | 	| AnthropicToolResultBlock;
 48 | 
 49 | export interface AnthropicMessage {
 50 | 	role: AnthropicRole;
 51 | 	content: string | AnthropicContentBlock[];
 52 | }
 53 | 
 54 | export interface AnthropicRequestBody {
 55 | 	model: string;
 56 | 	messages: AnthropicMessage[];
 57 | 	max_tokens?: number;
 58 | 	system?: string | AnthropicTextBlock[];
 59 | 	stream?: boolean;
 60 | 	temperature?: number;
 61 | 	top_p?: number;
 62 | 	top_k?: number;
 63 | 	stop_sequences?: string[];
 64 | 	metadata?: {
 65 | 		user_id?: string;
 66 | 	};
 67 | 	service_tier?: "auto" | "standard_only";
 68 | 	thinking?: {
 69 | 		type: "enabled";
 70 | 		budget_tokens: number;
 71 | 	};
 72 | 	tools?: AnthropicToolDefinition[];
 73 | 	tool_choice?: AnthropicToolChoice;
 74 | }
 75 | 
 76 | export interface AnthropicToolDefinition {
 77 | 	name: string;
 78 | 	description?: string;
 79 | 	input_schema?: object;
 80 | }
 81 | 
 82 | export type AnthropicToolChoice =
 83 | 	| { type: "auto" }
 84 | 	| { type: "any" }
 85 | 	| { type: "tool"; name: string }
 86 | 	| { type: "none" };
 87 | 
 88 | export interface AnthropicStreamChunk {
 89 | 	type:
 90 | 		| "message_start"
 91 | 		| "content_block_start"
 92 | 		| "content_block_delta"
 93 | 		| "content_block_stop"
 94 | 		| "message_delta"
 95 | 		| "message_stop"
 96 | 		| "ping"
 97 | 		| "error";
 98 | 	index?: number;
 99 | 	message?: {
100 | 		id: string;
101 | 		type: "message";
102 | 		role: "assistant";
103 | 		content: AnthropicContentBlock[];
104 | 		model: string;
105 | 		stop_reason?: string;
106 | 		stop_sequence?: string;
107 | 	};
108 | 	content_block?: {
109 | 		type: "text" | "thinking" | "tool_use";
110 | 		text?: string;
111 | 		thinking?: string;
112 | 		id?: string;
113 | 		name?: string;
114 | 		input?: Record<string, unknown>;
115 | 	};
116 | 	delta?: {
117 | 		type: "text_delta" | "thinking_delta" | "input_json_delta" | "signature_delta";
118 | 		text?: string;
119 | 		thinking?: string;
120 | 		partial_json?: string;
121 | 		signature?: string;
122 | 	};
123 | 	usage?: {
124 | 		input_tokens: number;
125 | 		output_tokens: number;
126 | 	};
127 | 	error?: {
128 | 		type: string;
129 | 		message: string;
130 | 	};
131 | }
132 | 


--------------------------------------------------------------------------------
/src/provideToken.ts:
--------------------------------------------------------------------------------
 1 | import * as vscode from "vscode";
 2 | import { CancellationToken, LanguageModelChatInformation, LanguageModelChatRequestMessage } from "vscode";
 3 | 
 4 | /**
 5 |  * Returns the number of tokens for a given text using the model specific tokenizer logic
 6 |  * @param model The language model to use
 7 |  * @param text The text to count tokens for
 8 |  * @param token A cancellation token for the request
 9 |  * @returns A promise that resolves to the number of tokens
10 |  */
11 | export async function prepareTokenCount(
12 | 	model: LanguageModelChatInformation,
13 | 	text: string | LanguageModelChatRequestMessage,
14 | 	_token: CancellationToken
15 | ): Promise<number> {
16 | 	if (typeof text === "string") {
17 | 		// Estimate tokens directly for plain text
18 | 		return estimateTextTokens(text);
19 | 	} else {
20 | 		// For complex messages, calculate tokens for each part separately
21 | 		let totalTokens = 0;
22 | 
23 | 		for (const part of text.content) {
24 | 			if (part instanceof vscode.LanguageModelTextPart) {
25 | 				// Estimate tokens directly for plain text
26 | 				totalTokens += estimateTextTokens(part.value);
27 | 			} else if (part instanceof vscode.LanguageModelDataPart) {
28 | 				// Estimate tokens for image or data parts based on type
29 | 				if (part.mimeType.startsWith("image/")) {
30 | 					// Images are approximately 170 tokens
31 | 					totalTokens += 170;
32 | 				} else {
33 | 					// For other binary data, use a more conservative estimate
34 | 					totalTokens += Math.ceil(part.data.length / 4);
35 | 				}
36 | 			} else if (part instanceof vscode.LanguageModelToolCallPart) {
37 | 				// Tool call token calculation
38 | 				const toolCallText = `${part.name}(${JSON.stringify(part.input)})`;
39 | 				totalTokens += estimateTextTokens(toolCallText);
40 | 			} else if (part instanceof vscode.LanguageModelToolResultPart) {
41 | 				// Tool result token calculation
42 | 				const resultText = typeof part.content === "string" ? part.content : JSON.stringify(part.content);
43 | 				totalTokens += estimateTextTokens(resultText);
44 | 			} else if (part instanceof vscode.LanguageModelThinkingPart) {
45 | 				// Thinking Token
46 | 				const thinkingText = Array.isArray(part.value) ? part.value.join("") : part.value;
47 | 				totalTokens += estimateTextTokens(thinkingText);
48 | 			}
49 | 		}
50 | 
51 | 		// Add fixed overhead for roles and structure
52 | 		totalTokens += 4;
53 | 
54 | 		return totalTokens;
55 | 	}
56 | }
57 | 
58 | /** Roughly estimate tokens for VS Code chat messages (text only) */
59 | export function estimateMessagesTokens(msgs: readonly vscode.LanguageModelChatRequestMessage[]): number {
60 | 	let total = 0;
61 | 	for (const m of msgs) {
62 | 		for (const part of m.content) {
63 | 			if (part instanceof vscode.LanguageModelTextPart) {
64 | 				total += estimateTextTokens(part.value);
65 | 			}
66 | 		}
67 | 	}
68 | 	return total;
69 | }
70 | 
71 | /** 针对不同内容类型的 token 估算 */
72 | export function estimateTextTokens(text: string): number {
73 | 	const chineseChars = (text.match(/[\u4e00-\u9fff]/g) || []).length;
74 | 	const englishWords = (text.match(/\b[a-zA-Z]+\b/g) || []).length;
75 | 	const symbols = text.length - chineseChars - englishWords;
76 | 
77 | 	// 中文字符约1.5个token，英文单词约1个token，符号约0.5个token
78 | 	return Math.ceil(chineseChars * 1.5 + englishWords + symbols * 0.5);
79 | }
80 | 
81 | /** Rough token estimate for tool definitions by JSON size */
82 | export function estimateToolTokens(
83 | 	tools: { type: string; function: { name: string; description?: string; parameters?: object } }[] | undefined
84 | ): number {
85 | 	if (!tools || tools.length === 0) {
86 | 		return 0;
87 | 	}
88 | 	try {
89 | 		const json = JSON.stringify(tools);
90 | 		return Math.ceil(json.length / 4);
91 | 	} catch {
92 | 		return 0;
93 | 	}
94 | }
95 | 


--------------------------------------------------------------------------------
/src/vscode.proposed.chatProvider.d.ts:
--------------------------------------------------------------------------------
 1 | /*---------------------------------------------------------------------------------------------
 2 |  *  Copyright (c) Microsoft Corporation. All rights reserved.
 3 |  *  Licensed under the MIT License. See License.txt in the project root for license information.
 4 |  *--------------------------------------------------------------------------------------------*/
 5 | 
 6 | // version: 4
 7 | 
 8 | declare module "vscode" {
 9 | 	/**
10 | 	 * The provider version of {@linkcode LanguageModelChatRequestOptions}
11 | 	 */
12 | 	export interface ProvideLanguageModelChatResponseOptions {
13 | 		/**
14 | 		 * What extension initiated the request to the language model
15 | 		 */
16 | 		readonly requestInitiator: string;
17 | 	}
18 | 
19 | 	/**
20 | 	 * All the information representing a single language model contributed by a {@linkcode LanguageModelChatProvider}.
21 | 	 */
22 | 	export interface LanguageModelChatInformation {
23 | 		/**
24 | 		 * When present, this gates the use of `requestLanguageModelAccess` behind an authorization flow where
25 | 		 * the user must approve of another extension accessing the models contributed by this extension.
26 | 		 * Additionally, the extension can provide a label that will be shown in the UI.
27 | 		 * A common example of a label is an account name that is signed in.
28 | 		 *
29 | 		 */
30 | 		requiresAuthorization?: true | { label: string };
31 | 
32 | 		/**
33 | 		 * Whether or not this will be selected by default in the model picker
34 | 		 * NOT BEING FINALIZED
35 | 		 */
36 | 		readonly isDefault?: boolean;
37 | 
38 | 		/**
39 | 		 * Whether or not the model will show up in the model picker immediately upon being made known via {@linkcode LanguageModelChatProvider.provideLanguageModelChatInformation}.
40 | 		 * NOT BEING FINALIZED
41 | 		 */
42 | 		readonly isUserSelectable?: boolean;
43 | 
44 | 		/**
45 | 		 * Optional category to group models by in the model picker.
46 | 		 * The lower the order, the higher the category appears in the list.
47 | 		 * Has no effect if `isUserSelectable` is `false`.
48 | 		 *
49 | 		 * WONT BE FINALIZED
50 | 		 */
51 | 		readonly category?: { label: string; order: number };
52 | 
53 | 		readonly statusIcon?: ThemeIcon;
54 | 	}
55 | 
56 | 	export interface LanguageModelChatCapabilities {
57 | 		/**
58 | 		 * The tools the model prefers for making file edits. If not provided or if none of the tools,
59 | 		 * are recognized, the editor will try multiple edit tools and pick the best one. The available
60 | 		 * edit tools WILL change over time and this capability only serves as a hint to the editor.
61 | 		 *
62 | 		 * Edit tools currently recognized include:
63 | 		 * - 'find-replace': Find and replace text in a document.
64 | 		 * - 'multi-find-replace': Find and replace multiple text snippets across documents.
65 | 		 * - 'apply-patch': A file-oriented diff format used by some OpenAI models
66 | 		 * - 'code-rewrite': A general but slower editing tool that allows the model
67 | 		 *   to rewrite and code snippet and provide only the replacement to the editor.
68 | 		 *
69 | 		 * The order of edit tools in this array has no significance; all of the recognized edit
70 | 		 * tools will be made available to the model.
71 | 		 */
72 | 		readonly editTools?: string[];
73 | 	}
74 | 
75 | 	export type LanguageModelResponsePart2 =
76 | 		| LanguageModelResponsePart
77 | 		| LanguageModelDataPart
78 | 		| LanguageModelThinkingPart;
79 | 
80 | 	export interface LanguageModelChatProvider<T extends LanguageModelChatInformation = LanguageModelChatInformation> {
81 | 		provideLanguageModelChatResponse(
82 | 			model: T,
83 | 			messages: readonly LanguageModelChatRequestMessage[],
84 | 			options: ProvideLanguageModelChatResponseOptions,
85 | 			progress: Progress<LanguageModelResponsePart2>,
86 | 			token: CancellationToken
87 | 		): Thenable<void>;
88 | 	}
89 | }
90 | 


--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * A single underlying provider (e.g., together, groq) for a model.
  3 |  */
  4 | export interface HFProvider {
  5 | 	provider: string;
  6 | 	status: string;
  7 | 	supports_tools?: boolean;
  8 | 	supports_structured_output?: boolean;
  9 | 	context_length?: number;
 10 | }
 11 | 
 12 | /**
 13 |  * A model entry returned by the Hugging Face router models endpoint.
 14 |  */
 15 | export interface HFArchitecture {
 16 | 	input_modalities?: string[];
 17 | 	output_modalities?: string[];
 18 | }
 19 | 
 20 | export interface HFModelItem {
 21 | 	id: string;
 22 | 	object?: string;
 23 | 	created?: number;
 24 | 	owned_by: string;
 25 | 	configId?: string;
 26 | 	displayName?: string;
 27 | 	baseUrl?: string;
 28 | 	providers?: HFProvider[];
 29 | 	architecture?: HFArchitecture;
 30 | 	context_length?: number;
 31 | 	vision?: boolean;
 32 | 	max_tokens?: number;
 33 | 	// OpenAI new standard parameter
 34 | 	max_completion_tokens?: number;
 35 | 	reasoning_effort?: string;
 36 | 	enable_thinking?: boolean;
 37 | 	thinking_budget?: number;
 38 | 	// New thinking configuration for Zai provider
 39 | 	thinking?: ThinkingConfig;
 40 | 	// Allow null so user can explicitly disable sending this parameter (fall back to provider default)
 41 | 	temperature?: number | null;
 42 | 	// Allow null so user can explicitly disable sending this parameter (fall back to provider default)
 43 | 	top_p?: number | null;
 44 | 	top_k?: number;
 45 | 	min_p?: number;
 46 | 	frequency_penalty?: number;
 47 | 	presence_penalty?: number;
 48 | 	repetition_penalty?: number;
 49 | 	reasoning?: ReasoningConfig;
 50 | 	/**
 51 | 	 * Optional family specification for the model. This allows users to specify
 52 | 	 * the model family (e.g., "gpt-4", "claude-3", "gemini") to enable family-specific
 53 | 	 * optimizations and behaviors in the Copilot extension. If not specified,
 54 | 	 * defaults to "oai-compatible".
 55 | 	 */
 56 | 	family?: string;
 57 | 
 58 | 	/**
 59 | 	 * Extra configuration parameters that can be used for custom functionality.
 60 | 	 * This allows users to add any additional parameters they might need
 61 | 	 * without modifying the core interface.
 62 | 	 */
 63 | 	extra?: Record<string, unknown>;
 64 | 
 65 | 	/**
 66 | 	 * Custom HTTP headers to be sent with every request to this model's provider.
 67 | 	 * These headers will be merged with the default headers (Authorization, Content-Type, User-Agent).
 68 | 	 * Example: { "X-API-Version": "v1", "X-Custom-Header": "value" }
 69 | 	 */
 70 | 	headers?: Record<string, string>;
 71 | 
 72 | 	/**
 73 | 	 * Whether to include reasoning_content in assistant messages sent to the API.
 74 | 	 * Support deepseek-v3.2 or others.
 75 | 	 */
 76 | 	include_reasoning_in_request?: boolean;
 77 | 
 78 | 	/**
 79 | 	 * API mode: "openai" for OpenAI-compatible API, "ollama" for Ollama native API.
 80 | 	 * Default is "openai".
 81 | 	 */
 82 | 	apiMode?: "openai" | "ollama" | "anthropic";
 83 | }
 84 | 
 85 | /**
 86 |  * OpenRouter reasoning configuration
 87 |  */
 88 | export interface ReasoningConfig {
 89 | 	effort?: string;
 90 | 	exclude?: boolean;
 91 | 	max_tokens?: number;
 92 | 	enabled?: boolean;
 93 | }
 94 | 
 95 | /**
 96 |  * Supplemental model info from the Hugging Face hub API.
 97 |  */
 98 | // Deprecated: extra model info was previously fetched from the hub API
 99 | export interface HFExtraModelInfo {
100 | 	id: string;
101 | 	pipeline_tag?: string;
102 | }
103 | 
104 | /**
105 |  * Response envelope for the router models listing.
106 |  */
107 | export interface HFModelsResponse {
108 | 	object: string;
109 | 	data: HFModelItem[];
110 | }
111 | 
112 | /**
113 |  * Thinking configuration for Zai provider
114 |  */
115 | export interface ThinkingConfig {
116 | 	type?: string;
117 | }
118 | 
119 | /**
120 |  * Retry configuration for rate limiting
121 |  */
122 | export interface RetryConfig {
123 | 	enabled?: boolean;
124 | 	max_attempts?: number;
125 | 	interval_ms?: number;
126 | 	status_codes?: number[];
127 | }
128 | 


--------------------------------------------------------------------------------
/src/statusBar.ts:
--------------------------------------------------------------------------------
 1 | import * as vscode from "vscode";
 2 | import { LanguageModelChatInformation, LanguageModelChatRequestMessage, CancellationTokenSource } from "vscode";
 3 | import { prepareTokenCount } from "./provideToken";
 4 | 
 5 | export function initStatusBar(context: vscode.ExtensionContext): vscode.StatusBarItem {
 6 | 	// Create status bar item for token count display
 7 | 	const tokenCountStatusBarItem = vscode.window.createStatusBarItem(vscode.StatusBarAlignment.Right, 100);
 8 | 	tokenCountStatusBarItem.name = "Token Count";
 9 | 	tokenCountStatusBarItem.text = "$(symbol-numeric) Ready";
10 | 	tokenCountStatusBarItem.tooltip = "Current model token usage - Click to Manage ApiKeys";
11 | 	tokenCountStatusBarItem.command = "oaicopilot.setProviderApikey";
12 | 	context.subscriptions.push(tokenCountStatusBarItem);
13 | 	// Show the status bar item initially
14 | 	tokenCountStatusBarItem.show();
15 | 	return tokenCountStatusBarItem;
16 | }
17 | 
18 | /**
19 |  * Format number to thousands (K, M, B) format
20 |  * @param value The number to format
21 |  * @returns Formatted string (e.g., "2.3K", "168.0K")
22 |  */
23 | export function formatTokenCount(value: number): string {
24 | 	if (value >= 1_000_000_000) {
25 | 		return (value / 1_000_000_000).toFixed(1) + "B";
26 | 	} else if (value >= 1_000_000) {
27 | 		return (value / 1_000_000).toFixed(1) + "M";
28 | 	} else if (value >= 1_000) {
29 | 		return (value / 1_000).toFixed(1) + "K";
30 | 	}
31 | 	return value.toLocaleString();
32 | }
33 | 
34 | /**
35 |  * Create a visual progress bar showing token usage
36 |  * @param usedTokens Tokens used
37 |  * @param maxTokens Maximum tokens available
38 |  * @returns Progress bar string (e.g., "▆ 75%")
39 |  */
40 | export function createProgressBar(usedTokens: number, maxTokens: number): string {
41 | 	const blocks = ["▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"];
42 | 	const usagePercentage = Math.min((usedTokens / maxTokens) * 100, 100);
43 | 	const blockIndex = Math.min(Math.floor((usagePercentage / 100) * blocks.length), blocks.length - 1);
44 | 
45 | 	return `${blocks[blockIndex]} ${Math.round(usagePercentage)}%`;
46 | }
47 | 
48 | /**
49 |  * Update the status bar with token usage information
50 |  * @param messages The chat messages to count tokens for
51 |  * @param model The language model information
52 |  * @param statusBarItem The status bar item to update
53 |  * @param provideTokenCount Callback function to count tokens for a message
54 |  */
55 | export async function updateContextStatusBar(
56 | 	messages: readonly LanguageModelChatRequestMessage[],
57 | 	model: LanguageModelChatInformation,
58 | 	statusBarItem: vscode.StatusBarItem
59 | ): Promise<void> {
60 | 	// Create a single CancellationTokenSource for all token count operations
61 | 	const cancellationTokenSource = new CancellationTokenSource();
62 | 
63 | 	// Calculate tokens for all messages in parallel
64 | 	const tokenCountPromises = messages.map((message) =>
65 | 		prepareTokenCount(model, message, cancellationTokenSource.token)
66 | 	);
67 | 
68 | 	const tokenCounts = await Promise.all(tokenCountPromises);
69 | 	const totalTokenCount = tokenCounts.reduce((sum, count) => sum + count, 0);
70 | 
71 | 	// Update status bar with token count and model context window
72 | 	const maxTokens = model.maxInputTokens + model.maxOutputTokens;
73 | 
74 | 	// Create visual progress bar with single progressive block
75 | 	const progressBar = createProgressBar(totalTokenCount, maxTokens);
76 | 	const displayText = `$(symbol-parameter) ${progressBar}`;
77 | 	statusBarItem.text = displayText;
78 | 	statusBarItem.tooltip = `Token Usage: ${formatTokenCount(totalTokenCount)} / ${formatTokenCount(maxTokens)}\n\n${progressBar}\n\nClick to Manage ApiKeys`;
79 | 
80 | 	// Add color coding based on token usage
81 | 	const usagePercentage = (totalTokenCount / maxTokens) * 100;
82 | 	if (usagePercentage >= 90) {
83 | 		statusBarItem.backgroundColor = new vscode.ThemeColor("statusBarItem.errorBackground");
84 | 	} else if (usagePercentage >= 70) {
85 | 		statusBarItem.backgroundColor = new vscode.ThemeColor("statusBarItem.warningBackground");
86 | 	} else {
87 | 		statusBarItem.backgroundColor = undefined;
88 | 	}
89 | 
90 | 	statusBarItem.show();
91 | }
92 | 


--------------------------------------------------------------------------------
/src/extension.ts:
--------------------------------------------------------------------------------
  1 | import * as vscode from "vscode";
  2 | import { HuggingFaceChatModelProvider } from "./provider";
  3 | import type { HFModelItem } from "./types";
  4 | import { initStatusBar } from "./statusBar";
  5 | 
  6 | export function activate(context: vscode.ExtensionContext) {
  7 | 	// Build a descriptive User-Agent to help quantify API usage
  8 | 	const ext = vscode.extensions.getExtension("johnny-zhao.oai-compatible-copilot");
  9 | 	const extVersion = ext?.packageJSON?.version ?? "unknown";
 10 | 	const vscodeVersion = vscode.version;
 11 | 	// Keep UA minimal: only extension version and VS Code version
 12 | 	const ua = `oai-compatible-copilot/${extVersion} VSCode/${vscodeVersion}`;
 13 | 
 14 | 	const tokenCountStatusBarItem: vscode.StatusBarItem = initStatusBar(context);
 15 | 	const provider = new HuggingFaceChatModelProvider(context.secrets, ua, tokenCountStatusBarItem);
 16 | 	// Register the Hugging Face provider under the vendor id used in package.json
 17 | 	vscode.lm.registerLanguageModelChatProvider("oaicopilot", provider);
 18 | 
 19 | 	// Management command to configure API key
 20 | 	context.subscriptions.push(
 21 | 		vscode.commands.registerCommand("oaicopilot.setApikey", async () => {
 22 | 			const existing = await context.secrets.get("oaicopilot.apiKey");
 23 | 			const apiKey = await vscode.window.showInputBox({
 24 | 				title: "OAI Compatible Provider API Key",
 25 | 				prompt: existing ? "Update your OAI Compatible API key" : "Enter your OAI Compatible API key",
 26 | 				ignoreFocusOut: true,
 27 | 				password: true,
 28 | 				value: existing ?? "",
 29 | 			});
 30 | 			if (apiKey === undefined) {
 31 | 				return; // user canceled
 32 | 			}
 33 | 			if (!apiKey.trim()) {
 34 | 				await context.secrets.delete("oaicopilot.apiKey");
 35 | 				vscode.window.showInformationMessage("OAI Compatible API key cleared.");
 36 | 				return;
 37 | 			}
 38 | 			await context.secrets.store("oaicopilot.apiKey", apiKey.trim());
 39 | 			vscode.window.showInformationMessage("OAI Compatible API key saved.");
 40 | 		})
 41 | 	);
 42 | 
 43 | 	// Management command to configure provider-specific API keys
 44 | 	context.subscriptions.push(
 45 | 		vscode.commands.registerCommand("oaicopilot.setProviderApikey", async () => {
 46 | 			// Get provider list from configuration
 47 | 			const config = vscode.workspace.getConfiguration();
 48 | 			const userModels = config.get<HFModelItem[]>("oaicopilot.models", []);
 49 | 
 50 | 			// Extract unique providers (case-insensitive)
 51 | 			const providers = Array.from(
 52 | 				new Set(userModels.map((m) => m.owned_by.toLowerCase()).filter((p) => p && p.trim() !== ""))
 53 | 			).sort();
 54 | 
 55 | 			if (providers.length === 0) {
 56 | 				vscode.window.showErrorMessage(
 57 | 					"No providers found in oaicopilot.models configuration. Please configure models first."
 58 | 				);
 59 | 				return;
 60 | 			}
 61 | 
 62 | 			// Let user select provider
 63 | 			const selectedProvider = await vscode.window.showQuickPick(providers, {
 64 | 				title: "Select Provider",
 65 | 				placeHolder: "Select a provider to configure API key",
 66 | 			});
 67 | 
 68 | 			if (!selectedProvider) {
 69 | 				return; // user canceled
 70 | 			}
 71 | 
 72 | 			// Get existing API key for selected provider
 73 | 			const providerKey = `oaicopilot.apiKey.${selectedProvider}`;
 74 | 			const existing = await context.secrets.get(providerKey);
 75 | 
 76 | 			// Prompt for API key
 77 | 			const apiKey = await vscode.window.showInputBox({
 78 | 				title: `OAI Compatible API Key for ${selectedProvider}`,
 79 | 				prompt: existing ? `Update API key for ${selectedProvider}` : `Enter API key for ${selectedProvider}`,
 80 | 				ignoreFocusOut: true,
 81 | 				password: true,
 82 | 				value: existing ?? "",
 83 | 			});
 84 | 
 85 | 			if (apiKey === undefined) {
 86 | 				return; // user canceled
 87 | 			}
 88 | 
 89 | 			if (!apiKey.trim()) {
 90 | 				await context.secrets.delete(providerKey);
 91 | 				vscode.window.showInformationMessage(`API key for ${selectedProvider} cleared.`);
 92 | 				return;
 93 | 			}
 94 | 
 95 | 			await context.secrets.store(providerKey, apiKey.trim());
 96 | 			vscode.window.showInformationMessage(`API key for ${selectedProvider} saved.`);
 97 | 		})
 98 | 	);
 99 | }
100 | 
101 | export function deactivate() {}
102 | 


--------------------------------------------------------------------------------
/src/vscode.proposed.languageModelThinkingPart.d.ts:
--------------------------------------------------------------------------------
  1 | /*---------------------------------------------------------------------------------------------
  2 |  *  Copyright (c) Microsoft Corporation. All rights reserved.
  3 |  *  Licensed under the MIT License. See License.txt in the project root for license information.
  4 |  *--------------------------------------------------------------------------------------------*/
  5 | 
  6 | // version: 1
  7 | 
  8 | declare module "vscode" {
  9 | 	/**
 10 | 	 * A language model response part containing thinking/reasoning content.
 11 | 	 * Thinking tokens represent the model's internal reasoning process that
 12 | 	 * typically streams before the final response.
 13 | 	 */
 14 | 	export class LanguageModelThinkingPart {
 15 | 		/**
 16 | 		 * The thinking/reasoning text content.
 17 | 		 */
 18 | 		value: string | string[];
 19 | 
 20 | 		/**
 21 | 		 * Optional unique identifier for this thinking sequence.
 22 | 		 * This ID is typically provided at the end of the thinking stream
 23 | 		 * and can be used for retrieval or reference purposes.
 24 | 		 */
 25 | 		id?: string;
 26 | 
 27 | 		/**
 28 | 		 * Optional metadata associated with this thinking sequence.
 29 | 		 */
 30 | 		metadata?: { readonly [key: string]: any };
 31 | 
 32 | 		/**
 33 | 		 * Construct a thinking part with the given content.
 34 | 		 * @param value The thinking text content.
 35 | 		 * @param id Optional unique identifier for this thinking sequence.
 36 | 		 * @param metadata Optional metadata associated with this thinking sequence.
 37 | 		 */
 38 | 		constructor(value: string | string[], id?: string, metadata?: { readonly [key: string]: any });
 39 | 	}
 40 | 
 41 | 	export interface LanguageModelChatResponse {
 42 | 		/**
 43 | 		 * An async iterable that is a stream of text, thinking, and tool-call parts forming the overall response.
 44 | 		 * This includes {@link LanguageModelThinkingPart} which represents the model's internal reasoning process.
 45 | 		 */
 46 | 		stream: AsyncIterable<LanguageModelTextPart | LanguageModelThinkingPart | LanguageModelToolCallPart | unknown>;
 47 | 	}
 48 | 
 49 | 	export interface LanguageModelChat {
 50 | 		sendRequest(
 51 | 			messages: Array<LanguageModelChatMessage | LanguageModelChatMessage2>,
 52 | 			options?: LanguageModelChatRequestOptions,
 53 | 			token?: CancellationToken
 54 | 		): Thenable<LanguageModelChatResponse>;
 55 | 		countTokens(
 56 | 			text: string | LanguageModelChatMessage | LanguageModelChatMessage2,
 57 | 			token?: CancellationToken
 58 | 		): Thenable<number>;
 59 | 	}
 60 | 
 61 | 	/**
 62 | 	 * Represents a message in a chat. Can assume different roles, like user or assistant.
 63 | 	 */
 64 | 	export class LanguageModelChatMessage2 {
 65 | 		/**
 66 | 		 * Utility to create a new user message.
 67 | 		 *
 68 | 		 * @param content The content of the message.
 69 | 		 * @param name The optional name of a user for the message.
 70 | 		 */
 71 | 		static User(
 72 | 			content: string | Array<LanguageModelTextPart | LanguageModelToolResultPart | LanguageModelDataPart>,
 73 | 			name?: string
 74 | 		): LanguageModelChatMessage2;
 75 | 
 76 | 		/**
 77 | 		 * Utility to create a new assistant message.
 78 | 		 *
 79 | 		 * @param content The content of the message.
 80 | 		 * @param name The optional name of a user for the message.
 81 | 		 */
 82 | 		static Assistant(
 83 | 			content: string | Array<LanguageModelTextPart | LanguageModelToolCallPart | LanguageModelDataPart>,
 84 | 			name?: string
 85 | 		): LanguageModelChatMessage2;
 86 | 
 87 | 		/**
 88 | 		 * The role of this message.
 89 | 		 */
 90 | 		role: LanguageModelChatMessageRole;
 91 | 
 92 | 		/**
 93 | 		 * A string or heterogeneous array of things that a message can contain as content. Some parts may be message-type
 94 | 		 * specific for some models.
 95 | 		 */
 96 | 		content: Array<
 97 | 			| LanguageModelTextPart
 98 | 			| LanguageModelToolResultPart
 99 | 			| LanguageModelToolCallPart
100 | 			| LanguageModelDataPart
101 | 			| LanguageModelThinkingPart
102 | 		>;
103 | 
104 | 		/**
105 | 		 * The optional name of a user for this message.
106 | 		 */
107 | 		name: string | undefined;
108 | 
109 | 		/**
110 | 		 * Create a new user message.
111 | 		 *
112 | 		 * @param role The role of the message.
113 | 		 * @param content The content of the message.
114 | 		 * @param name The optional name of a user for the message.
115 | 		 */
116 | 		constructor(
117 | 			role: LanguageModelChatMessageRole,
118 | 			content:
119 | 				| string
120 | 				| Array<
121 | 						| LanguageModelTextPart
122 | 						| LanguageModelToolResultPart
123 | 						| LanguageModelToolCallPart
124 | 						| LanguageModelDataPart
125 | 						| LanguageModelThinkingPart
126 | 				  >,
127 | 			name?: string
128 | 		);
129 | 	}
130 | 
131 | 	/**
132 | 	 * Temporary alias for LanguageModelToolResultPart to avoid breaking changes in chat.
133 | 	 */
134 | 	export class LanguageModelToolResultPart2 extends LanguageModelToolResultPart {}
135 | 
136 | 	/**
137 | 	 * Temporary alias for LanguageModelToolResult to avoid breaking changes in chat.
138 | 	 */
139 | 	export class LanguageModelToolResult2 extends LanguageModelToolResult {}
140 | }
141 | 


--------------------------------------------------------------------------------
/.github/copilot-instructions.md:
--------------------------------------------------------------------------------
  1 | # OAI Compatible Copilot - AI Agent Guidelines
  2 | 
  3 | ## Project Overview
  4 | This is a VS Code extension that integrates OpenAI-compatible inference providers into GitHub Copilot Chat. It enables users to use frontier LLMs (Qwen3 Coder, Kimi K2, DeepSeek V3.2, GLM 4.6, etc.) through any OpenAI-compatible API provider.
  5 | 
  6 | ## Architecture Patterns
  7 | 
  8 | ### Core Components
  9 | 1. **Provider System** (`src/provider.ts`): Main entry point implementing `LanguageModelChatProvider`
 10 | 2. **API Abstraction Layer** (`src/commonApi.ts`): Base class for all API implementations
 11 | 3. **Specific API Implementations**:
 12 |    - `src/openai/openaiApi.ts` - OpenAI-compatible API
 13 |    - `src/ollama/ollamaApi.ts` - Ollama local API
 14 |    - `src/anthropic/anthropicApi.ts` - Anthropic Claude API
 15 | 4. **Type System** (`src/types.ts`): Centralized type definitions for model configurations
 16 | 5. **Utility Functions** (`src/utils.ts`): Shared helpers for retry logic, tool conversion, etc.
 17 | 
 18 | ### Key Design Decisions
 19 | - **Multi-provider support**: Users can configure models from multiple providers simultaneously
 20 | - **Configuration IDs**: Model IDs can include `::configId` suffix for different configurations of the same model
 21 | - **Retry mechanism**: Automatic retry for HTTP errors (429, 500, 502, 503, 504) with exponential backoff
 22 | - **Thinking support**: Integration with VS Code's `languageModelThinkingPart` proposal for reasoning content
 23 | 
 24 | ## Development Workflows
 25 | 
 26 | ### Build Commands
 27 | ```bash
 28 | npm run compile        # TypeScript compilation
 29 | npm run lint           # ESLint checking
 30 | npm run format         # Prettier formatting
 31 | ```
 32 | 
 33 | ### Testing & Debugging
 34 | - **Run Extension**: Use VS Code's "Run Extension" launch configuration
 35 | - **Extension Tests**: Use "Extension Tests" launch configuration (requires `tasks: watch-tests`)
 36 | - **Watch Tasks**: Two background tasks run automatically:
 37 |   - `npm: watch` - TypeScript compilation
 38 |   - `npm: watch-tests` - Test compilation
 39 | 
 40 | ### VS Code Integration
 41 | - **API Proposals**: Uses `chatProvider` and `languageModelThinkingPart` proposals
 42 | - **Secret Storage**: API keys stored via `vscode.SecretStorage`
 43 | - **Status Bar**: Token usage displayed in status bar (`src/statusBar.ts`)
 44 | 
 45 | ## Code Conventions
 46 | 
 47 | ### TypeScript Patterns
 48 | - **Strict mode**: Enabled in `tsconfig.json`
 49 | - **ES2024 target**: Modern JavaScript features
 50 | - **Module resolution**: `Node16` module system
 51 | - **Type imports**: Use `import type` for type-only imports
 52 | - write code comments in English.
 53 | 
 54 | ### Error Handling
 55 | - **Retry logic**: Implement retry with `createRetryConfig()` and `executeWithRetry()` from `utils.ts`
 56 | - **HTTP errors**: Retry on specific status codes (429, 500, 502, 503, 504)
 57 | - **User feedback**: Show appropriate messages via `vscode.window.showInformationMessage()`
 58 | 
 59 | ### Model Configuration
 60 | - **Model items**: Defined in `HFModelItem` interface (`src/types.ts`)
 61 | - **Provider-specific keys**: Support for multiple API keys via `oaicopilot.setProviderApikey` command
 62 | - **Configuration inheritance**: Model-specific `baseUrl` falls back to global `oaicopilot.baseUrl`
 63 | 
 64 | ### Message Conversion
 65 | - **Role mapping**: Convert VS Code chat roles to provider-specific roles in API implementations
 66 | - **Content handling**: Support for text, images (via data URLs), and tool calls
 67 | - **Thinking parts**: Parse and emit `LanguageModelThinkingPart` for reasoning models
 68 | 
 69 | ## File Organization
 70 | 
 71 | ### Source Structure
 72 | ```
 73 | src/
 74 | ├── extension.ts              # Extension activation
 75 | ├── provider.ts              # Main provider implementation
 76 | ├── commonApi.ts             # Base API class
 77 | ├── types.ts                 # Type definitions
 78 | ├── utils.ts                 # Utility functions
 79 | ├── statusBar.ts             # Status bar integration
 80 | ├── provideModel.ts          # Model information provider
 81 | ├── provideToken.ts          # Token counting
 82 | ├── openai/                  # OpenAI-compatible API
 83 | ├── ollama/                  # Ollama API
 84 | └── anthropic/               # Anthropic API
 85 | ```
 86 | 
 87 | ### Configuration Files
 88 | - `package.json` - Extension metadata and dependencies
 89 | - `tsconfig.json` - TypeScript configuration
 90 | - `eslint.config.mjs` - ESLint configuration (ES modules)
 91 | - `.prettierrc` - Code formatting rules
 92 | 
 93 | ## Integration Points
 94 | 
 95 | ### VS Code APIs
 96 | - `vscode.lm.registerLanguageModelChatProvider()` - Register chat provider
 97 | - `vscode.SecretStorage` - Secure API key storage
 98 | - `vscode.StatusBarItem` - Display token usage
 99 | - `vscode.commands.registerCommand()` - Extension commands
100 | 
101 | ### External Dependencies
102 | - **No runtime dependencies** - Extension uses VS Code APIs only
103 | - **Dev dependencies**: TypeScript, ESLint, Prettier, VS Code test utilities
104 | - **API Proposals**: Experimental VS Code APIs enabled via `enabledApiProposals`
105 | 
106 | ## Common Tasks
107 | 
108 | ### Adding New API Provider
109 | 1. Create new directory under `src/` (e.g., `src/newprovider/`)
110 | 2. Create API class extending `CommonApi`
111 | 3. Implement `convertMessages()` and `sendRequest()` methods
112 | 4. Add to provider instantiation logic in `provider.ts`
113 | 5. Update type definitions if needed
114 | 
115 | ### Modifying Model Configuration
116 | 1. Update `HFModelItem` interface in `src/types.ts`
117 | 2. Update configuration parsing in `src/provider.ts`
118 | 3. Update API implementations to handle new fields
119 | 4. Update documentation in `README.md`
120 | 
121 | ### Testing Changes
122 | 1. Run `npm run watch` in background
123 | 2. Use "Run Extension" launch configuration
124 | 3. Test in Extension Development Host window
125 | 4. Check status bar updates and error handling
126 | 
127 | ## Important Notes
128 | - **API Key Management**: Users can set global or provider-specific API keys
129 | - **Model Families**: `family` field enables model-specific optimizations
130 | - **Vision Support**: Enabled via `vision: true` in model configuration
131 | - **Tool Support**: Convert VS Code tools to OpenAI function definitions
132 | - **Streaming**: Support for streaming responses with tool call buffering
133 | 
134 | ## Troubleshooting
135 | - **Compilation errors**: Check TypeScript strict mode requirements
136 | - **API errors**: Verify retry logic in `utils.ts`
137 | - **Missing models**: Check `provideLanguageModelChatInformation()` in `provider.ts`
138 | - **Thinking not working**: Ensure `languageModelThinkingPart` proposal is enabled


--------------------------------------------------------------------------------
/src/vscode.proposed.languageModelDataPart.d.ts:
--------------------------------------------------------------------------------
  1 | /*---------------------------------------------------------------------------------------------
  2 |  *  Copyright (c) Microsoft Corporation. All rights reserved.
  3 |  *  Licensed under the MIT License. See License.txt in the project root for license information.
  4 |  *--------------------------------------------------------------------------------------------*/
  5 | 
  6 | // version: 3
  7 | 
  8 | declare module "vscode" {
  9 | 	export interface LanguageModelChat {
 10 | 		sendRequest(
 11 | 			messages: Array<LanguageModelChatMessage | LanguageModelChatMessage2>,
 12 | 			options?: LanguageModelChatRequestOptions,
 13 | 			token?: CancellationToken
 14 | 		): Thenable<LanguageModelChatResponse>;
 15 | 		countTokens(
 16 | 			text: string | LanguageModelChatMessage | LanguageModelChatMessage2,
 17 | 			token?: CancellationToken
 18 | 		): Thenable<number>;
 19 | 	}
 20 | 
 21 | 	/**
 22 | 	 * Represents a message in a chat. Can assume different roles, like user or assistant.
 23 | 	 */
 24 | 	export class LanguageModelChatMessage2 {
 25 | 		/**
 26 | 		 * Utility to create a new user message.
 27 | 		 *
 28 | 		 * @param content The content of the message.
 29 | 		 * @param name The optional name of a user for the message.
 30 | 		 */
 31 | 		static User(
 32 | 			content: string | Array<LanguageModelTextPart | LanguageModelToolResultPart2 | LanguageModelDataPart>,
 33 | 			name?: string
 34 | 		): LanguageModelChatMessage2;
 35 | 
 36 | 		/**
 37 | 		 * Utility to create a new assistant message.
 38 | 		 *
 39 | 		 * @param content The content of the message.
 40 | 		 * @param name The optional name of a user for the message.
 41 | 		 */
 42 | 		static Assistant(
 43 | 			content: string | Array<LanguageModelTextPart | LanguageModelToolCallPart | LanguageModelDataPart>,
 44 | 			name?: string
 45 | 		): LanguageModelChatMessage2;
 46 | 
 47 | 		/**
 48 | 		 * The role of this message.
 49 | 		 */
 50 | 		role: LanguageModelChatMessageRole;
 51 | 
 52 | 		/**
 53 | 		 * A string or heterogeneous array of things that a message can contain as content. Some parts may be message-type
 54 | 		 * specific for some models.
 55 | 		 */
 56 | 		content: Array<
 57 | 			| LanguageModelTextPart
 58 | 			| LanguageModelToolResultPart2
 59 | 			| LanguageModelToolCallPart
 60 | 			| LanguageModelDataPart
 61 | 			| LanguageModelThinkingPart
 62 | 		>;
 63 | 
 64 | 		/**
 65 | 		 * The optional name of a user for this message.
 66 | 		 */
 67 | 		name: string | undefined;
 68 | 
 69 | 		/**
 70 | 		 * Create a new user message.
 71 | 		 *
 72 | 		 * @param role The role of the message.
 73 | 		 * @param content The content of the message.
 74 | 		 * @param name The optional name of a user for the message.
 75 | 		 */
 76 | 		constructor(
 77 | 			role: LanguageModelChatMessageRole,
 78 | 			content:
 79 | 				| string
 80 | 				| Array<
 81 | 						| LanguageModelTextPart
 82 | 						| LanguageModelToolResultPart2
 83 | 						| LanguageModelToolCallPart
 84 | 						| LanguageModelDataPart
 85 | 						| LanguageModelThinkingPart
 86 | 				  >,
 87 | 			name?: string
 88 | 		);
 89 | 	}
 90 | 
 91 | 	/**
 92 | 	 * A language model response part containing arbitrary data, returned from a {@link LanguageModelChatResponse}.
 93 | 	 */
 94 | 	export class LanguageModelDataPart {
 95 | 		/**
 96 | 		 * Factory function to create a `LanguageModelDataPart` for an image.
 97 | 		 * @param data Binary image data
 98 | 		 * @param mimeType The MIME type of the image
 99 | 		 */
100 | 		static image(data: Uint8Array, mimeType: ChatImageMimeType): LanguageModelDataPart;
101 | 
102 | 		static json(value: object): LanguageModelDataPart;
103 | 
104 | 		static text(value: string): LanguageModelDataPart;
105 | 
106 | 		/**
107 | 		 * The mime type which determines how the data property is interpreted.
108 | 		 */
109 | 		mimeType: string;
110 | 
111 | 		/**
112 | 		 * The data of the part.
113 | 		 */
114 | 		data: Uint8Array;
115 | 
116 | 		/**
117 | 		 * Construct a generic data part with the given content.
118 | 		 * @param value The data of the part.
119 | 		 */
120 | 		constructor(data: Uint8Array, mimeType: string);
121 | 	}
122 | 
123 | 	/**
124 | 	 * Enum for supported image MIME types.
125 | 	 */
126 | 	export enum ChatImageMimeType {
127 | 		PNG = "image/png",
128 | 		JPEG = "image/jpeg",
129 | 		GIF = "image/gif",
130 | 		WEBP = "image/webp",
131 | 		BMP = "image/bmp",
132 | 	}
133 | 
134 | 	/**
135 | 	 * The result of a tool call. This is the counterpart of a {@link LanguageModelToolCallPart tool call} and
136 | 	 * it can only be included in the content of a User message
137 | 	 */
138 | 	export class LanguageModelToolResultPart2 {
139 | 		/**
140 | 		 * The ID of the tool call.
141 | 		 *
142 | 		 * *Note* that this should match the {@link LanguageModelToolCallPart.callId callId} of a tool call part.
143 | 		 */
144 | 		callId: string;
145 | 
146 | 		/**
147 | 		 * The value of the tool result.
148 | 		 */
149 | 		content: Array<LanguageModelTextPart | LanguageModelPromptTsxPart | LanguageModelDataPart | unknown>;
150 | 
151 | 		/**
152 | 		 * @param callId The ID of the tool call.
153 | 		 * @param content The content of the tool result.
154 | 		 */
155 | 		constructor(
156 | 			callId: string,
157 | 			content: Array<LanguageModelTextPart | LanguageModelPromptTsxPart | LanguageModelDataPart | unknown>
158 | 		);
159 | 	}
160 | 
161 | 	/**
162 | 	 * A tool that can be invoked by a call to a {@link LanguageModelChat}.
163 | 	 */
164 | 	export interface LanguageModelTool<T> {
165 | 		/**
166 | 		 * Invoke the tool with the given input and return a result.
167 | 		 *
168 | 		 * The provided {@link LanguageModelToolInvocationOptions.input} has been validated against the declared schema.
169 | 		 */
170 | 		invoke(
171 | 			options: LanguageModelToolInvocationOptions<T>,
172 | 			token: CancellationToken
173 | 		): ProviderResult<LanguageModelToolResult2>;
174 | 	}
175 | 
176 | 	/**
177 | 	 * A result returned from a tool invocation. If using `@vscode/prompt-tsx`, this result may be rendered using a `ToolResult`.
178 | 	 */
179 | 	export class LanguageModelToolResult2 {
180 | 		/**
181 | 		 * A list of tool result content parts. Includes `unknown` becauses this list may be extended with new content types in
182 | 		 * the future.
183 | 		 * @see {@link lm.invokeTool}.
184 | 		 */
185 | 		content: Array<LanguageModelTextPart | LanguageModelPromptTsxPart | LanguageModelDataPart | unknown>;
186 | 
187 | 		/**
188 | 		 * Create a LanguageModelToolResult
189 | 		 * @param content A list of tool result content parts
190 | 		 */
191 | 		constructor(content: Array<LanguageModelTextPart | LanguageModelPromptTsxPart | LanguageModelDataPart | unknown>);
192 | 	}
193 | 
194 | 	export namespace lm {
195 | 		export function invokeTool(
196 | 			name: string,
197 | 			options: LanguageModelToolInvocationOptions<object>,
198 | 			token?: CancellationToken
199 | 		): Thenable<LanguageModelToolResult2>;
200 | 	}
201 | }
202 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Change Log
  2 | 
  3 | ## 0.1.8 (2025-12-17)
  4 | 
  5 | - Feat: [Add Ollama /api/chat](https://github.com/JohnnyZ93/oai-compatible-copilot/pull/65)
  6 | - Feat: [Add Anthropic /v1/messages](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/60)
  7 | - Enhanced `oaicopilot.models` configuration including:
  8 |   - `apiMode`: API mode: 'openai' (Default) for API (/v1/chat/completions), 'ollama' for API (/api/chat), 'anthropic' for API (/v1/messages).
  9 | 
 10 | ## 0.1.7 (2025-12-10)
 11 | 
 12 | - Feat: [Expand oaicopilot.retry to handle other type of errors](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/62)
 13 | - Fix: [Add buffer for think content](https://github.com/JohnnyZ93/oai-compatible-copilot/pull/61)
 14 | - Add `oaicopilot.retry` configuration including:
 15 |   > Retry configuration for handling api errors like [429, 500, 502, 503, 504].
 16 |   - `status_codes`: Additional HTTP status codes that will be merged. Default is [429, 500, 502, 503, 504].
 17 | 
 18 | ## 0.1.6 (2025-12-08)
 19 | 
 20 | - Feat: [Сontext window state in statusBar](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/59)
 21 | 
 22 | ## 0.1.5 (2025-12-05)
 23 | 
 24 | - Fix: [Deepseek v3.2 reasoning tool call failed](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/54)
 25 | - Enhanced `oaicopilot.models` configuration including:
 26 |   - `include_reasoning_in_request`: Whether to include reasoning_content in assistant messages sent to the API. Support deepseek-v3.2 or others.
 27 | 
 28 | ## 0.1.4 (2025-11-03)
 29 | 
 30 | - Feat: [Add headers support](https://github.com/JohnnyZ93/oai-compatible-copilot/pull/31)
 31 | - Feat: [Add displayName option for models in Copilot interface](https://github.com/JohnnyZ93/oai-compatible-copilot/pull/32)
 32 | - Enhanced `oaicopilot.models` configuration including:
 33 |   - `displayName`: Display name for the model that will be shown in the Copilot interface.
 34 |   - `headers`: Custom HTTP headers to be sent with every request to this model's provider (e.g., `{"X-API-Version": "v1", "X-Custom-Header": "value"}`).
 35 | 
 36 | ## 0.1.3 (2025-10-31)
 37 | 
 38 | - Fix: [Forces a prompt to set the default API key every time VS Code starts](https://github.com/JohnnyZ93/oai-compatible-copilot/pull/30)
 39 | 
 40 | ## 0.1.2 (2025-10-29)
 41 | 
 42 | - Feat: [add support for extra configuration parameters](https://github.com/JohnnyZ93/oai-compatible-copilot/pull/28)
 43 | - Enhanced `oaicopilot.models` configuration including:
 44 |   - `extra`: Extra request parameters that will be used in /chat/completions.
 45 | 
 46 | ## 0.1.1 (2025-10-28)
 47 | 
 48 | - Fix: Cannot change apiKey when the `oaicopilot.models` have no baseUrl.
 49 | 
 50 | ## 0.1.0 (2025-10-28)
 51 | 
 52 | - Feat: [Add request delay to prevent 429 Errors](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/24)
 53 | - Fix: [Not Asking for Key when add new provider](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/26)
 54 | - Add `oaicopilot.delay` configuration: Fixed delay in milliseconds between consecutive requests. Default is 0 (no delay).
 55 | 
 56 | ## 0.0.9 (2025-10-27)
 57 | 
 58 | - Feat: [Add Retry Mechanism for Model 429 Errors](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/24)
 59 | - Fix: [Thinking block not end and show in new chat](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/25)
 60 | - Add `oaicopilot.retry` configuration including:
 61 |   > Retry configuration for handling api errors like [429, 500, 502, 503, 504].
 62 |   - `enabled`: Enable retry mechanism for api errors. Default is true.
 63 |   - `max_attempts`: Maximum number of retry attempts. Default is 3.
 64 |   - `interval_ms`: Interval between retry attempts in milliseconds. Default is 1000 (1 seconds).
 65 | 
 66 | ## 0.0.8 (2025-10-21)
 67 | 
 68 | - Fix: [LLM output missing `<`](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/19)
 69 | - Remove inline tool call response processing, significantly accelerating model response speed.
 70 | 
 71 | ## 0.0.7 (2025-10-15)
 72 | 
 73 | - Feat: [`<think>` block is not detected properly for Perplexity Sonar models](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/21)
 74 | - Update VS Code proposed api version.
 75 | 
 76 | ## 0.0.6 (2025-10-10)
 77 | 
 78 | - Feat: [OpenAI use `max_completion_tokens` instead of `max_tokens` for GPT-5](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/19)
 79 | - Enhanced `oaicopilot.models` configuration including:
 80 |   - `max_completion_tokens`: Maximum number of tokens to generate (OpenAI new standard parameter)
 81 |   - `reasoning_effort`: Reasoning effort level (OpenAI reasoning configuration)
 82 | 
 83 | 
 84 | ## 0.0.5 (2025-10-09)
 85 | 
 86 | - Feat: [GLM 4.6 - no thinking tags](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/15)
 87 | - Feat: [Multi-config for the same model](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/18)
 88 | - Enhanced `oaicopilot.models` configuration including:
 89 |   - `configId`: Configuration ID for this model. Allows defining the same model with different settings (e.g. 'glm-4.6::thinking', 'glm-4.6::no-thinking')
 90 |   - `thinking`: Thinking configuration for Zai provider
 91 |     - `type`: Set to 'enabled' to enable thinking, 'disabled' to disable thinking
 92 | 
 93 | ## 0.0.4 (2025-09-23)
 94 | 
 95 | - Fix: [Base url should be model specific](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/4)
 96 | - Fix: [Set the effort variable of the reasoning model](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/5)
 97 | - Fix: [Allow setting a custom model 'family'](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/8)
 98 | 
 99 | ## 0.0.3 (2025-09-18)
100 | 
101 | - Now you can see the model reasoning content in chat interface.
102 |   > ![thinkingPartDemo](./assets/thinkingPartDemo.png)
103 | - Fix: [Thinking Budget #2](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/2)
104 | - Fix: [iflow api key no response was returned #1](https://github.com/JohnnyZ93/oai-compatible-copilot/issues/1)
105 | 
106 | ## 0.0.2 (2025-09-18)
107 | 
108 | - Deleted settings including:
109 |   - `oaicopilot.enableThinking`
110 |   - `oaicopilot.maxTokens`
111 |   - `oaicopilot.temperature`
112 |   - `oaicopilot.topP`
113 | - Enhanced `oaicopilot.models` configuration with support for per-model settings including:
114 |   - `max_tokens`: Maximum number of tokens to generate
115 |   - `enable_thinking`: Switches between thinking and non-thinking modes
116 |   - `temperature`: Sampling temperature (range: [0, 2])
117 |   - `top_p`: Top-p sampling value (range: (0, 1])
118 |   - `top_k`: Top-k sampling value
119 |   - `min_p`: Minimum probability threshold
120 |   - `frequency_penalty`: Frequency penalty (range: [-2, 2])
121 |   - `presence_penalty`: Presence penalty (range: [-2, 2])
122 |   - `repetition_penalty`: Repetition penalty (range: (0, 2])
123 | - Improved token estimation algorithm with better support for Chinese characters
124 | - Enhanced multi-modal message handling for image and text content
125 | 
126 | ## 0.0.1 (2025-09-16)
127 | 
128 | - Initial release


--------------------------------------------------------------------------------
/src/provideModel.ts:
--------------------------------------------------------------------------------
  1 | import * as vscode from "vscode";
  2 | import { CancellationToken, LanguageModelChatInformation } from "vscode";
  3 | 
  4 | import type { HFModelItem, HFModelsResponse } from "./types";
  5 | 
  6 | const DEFAULT_CONTEXT_LENGTH = 128000;
  7 | const DEFAULT_MAX_TOKENS = 4096;
  8 | 
  9 | /**
 10 |  * Get the list of available language models contributed by this provider
 11 |  * @param options Options which specify the calling context of this function
 12 |  * @param token A cancellation token which signals if the user cancelled the request or not
 13 |  * @returns A promise that resolves to the list of available language models
 14 |  */
 15 | export async function prepareLanguageModelChatInformation(
 16 | 	options: { silent: boolean },
 17 | 	_token: CancellationToken,
 18 | 	secrets: vscode.SecretStorage,
 19 | 	userAgent: string
 20 | ): Promise<LanguageModelChatInformation[]> {
 21 | 	// Check for user-configured models first
 22 | 	const config = vscode.workspace.getConfiguration();
 23 | 	const userModels = config.get<HFModelItem[]>("oaicopilot.models", []);
 24 | 
 25 | 	let infos: LanguageModelChatInformation[];
 26 | 	if (userModels && userModels.length > 0) {
 27 | 		// Return user-provided models directly
 28 | 		infos = userModels.map((m) => {
 29 | 			const contextLen = m?.context_length ?? DEFAULT_CONTEXT_LENGTH;
 30 | 			const maxOutput = m?.max_completion_tokens ?? m?.max_tokens ?? DEFAULT_MAX_TOKENS;
 31 | 			const maxInput = Math.max(1, contextLen - maxOutput);
 32 | 
 33 | 			// 使用配置ID（如果存在）来生成唯一的模型ID
 34 | 			const modelId = m.configId ? `${m.id}::${m.configId}` : m.id;
 35 | 			const modelName =
 36 | 				m.displayName || (m.configId ? `${m.id}::${m.configId} via ${m.owned_by}` : `${m.id} via ${m.owned_by}`);
 37 | 
 38 | 			return {
 39 | 				id: modelId,
 40 | 				name: modelName,
 41 | 				tooltip: m.configId
 42 | 					? `OAI Compatible ${m.id} (config: ${m.configId}) via ${m.owned_by}`
 43 | 					: `OAI Compatible via ${m.owned_by}`,
 44 | 				family: m.family ?? "oai-compatible",
 45 | 				version: "1.0.0",
 46 | 				maxInputTokens: maxInput,
 47 | 				maxOutputTokens: maxOutput,
 48 | 				capabilities: {
 49 | 					toolCalling: true,
 50 | 					imageInput: m?.vision ?? false,
 51 | 				},
 52 | 			} satisfies LanguageModelChatInformation;
 53 | 		});
 54 | 	} else {
 55 | 		// Fallback: Fetch models from API
 56 | 		const apiKey = await ensureApiKey(options.silent, secrets);
 57 | 		if (!apiKey) {
 58 | 			if (options.silent) {
 59 | 				return [];
 60 | 			} else {
 61 | 				throw new Error("OAI Compatible API key not found");
 62 | 			}
 63 | 		}
 64 | 		const { models } = await fetchModels(apiKey, userAgent);
 65 | 
 66 | 		infos = models.flatMap((m) => {
 67 | 			const providers = m?.providers ?? [];
 68 | 			const modalities = m.architecture?.input_modalities ?? [];
 69 | 			const vision = Array.isArray(modalities) && modalities.includes("image");
 70 | 
 71 | 			// Build entries for all providers that support tool calling
 72 | 			const toolProviders = providers.filter((p) => p.supports_tools === true);
 73 | 			const entries: LanguageModelChatInformation[] = [];
 74 | 
 75 | 			for (const p of toolProviders) {
 76 | 				const contextLen = p?.context_length ?? DEFAULT_CONTEXT_LENGTH;
 77 | 				const maxOutput = DEFAULT_MAX_TOKENS;
 78 | 				const maxInput = Math.max(1, contextLen - maxOutput);
 79 | 				entries.push({
 80 | 					id: `${m.id}:${p.provider}`,
 81 | 					name: `${m.id} via ${p.provider}`,
 82 | 					tooltip: `OAI Compatible via ${p.provider}`,
 83 | 					family: m.family ?? "oai-compatible",
 84 | 					version: "1.0.0",
 85 | 					maxInputTokens: maxInput,
 86 | 					maxOutputTokens: maxOutput,
 87 | 					capabilities: {
 88 | 						toolCalling: true,
 89 | 						imageInput: vision,
 90 | 					},
 91 | 				} satisfies LanguageModelChatInformation);
 92 | 			}
 93 | 
 94 | 			if (entries.length === 0) {
 95 | 				const base = providers.length > 0 ? providers[0] : null;
 96 | 				const contextLen = base?.context_length ?? DEFAULT_CONTEXT_LENGTH;
 97 | 				const maxOutput = DEFAULT_MAX_TOKENS;
 98 | 				const maxInput = Math.max(1, contextLen - maxOutput);
 99 | 				entries.push({
100 | 					id: `${m.id}`,
101 | 					name: `${m.id} via OAI Compatible`,
102 | 					tooltip: "OAI Compatible",
103 | 					family: m.family ?? "oai-compatible",
104 | 					version: "1.0.0",
105 | 					maxInputTokens: maxInput,
106 | 					maxOutputTokens: maxOutput,
107 | 					capabilities: {
108 | 						toolCalling: true,
109 | 						imageInput: true,
110 | 					},
111 | 				} satisfies LanguageModelChatInformation);
112 | 			}
113 | 
114 | 			return entries;
115 | 		});
116 | 	}
117 | 
118 | 	// console.debug("[OAI Compatible Model Provider] Loaded models:", infos);
119 | 	return infos;
120 | }
121 | 
122 | /**
123 |  * Fetch the list of models and supplementary metadata from Hugging Face.
124 |  * @param apiKey The HF API key used to authenticate.
125 |  */
126 | async function fetchModels(apiKey: string, userAgent: string): Promise<{ models: HFModelItem[] }> {
127 | 	const config = vscode.workspace.getConfiguration();
128 | 	const BASE_URL = config.get<string>("oaicopilot.baseUrl", "");
129 | 	if (!BASE_URL || !BASE_URL.startsWith("http")) {
130 | 		throw new Error(`Invalid base URL configuration.`);
131 | 	}
132 | 	const modelsList = (async () => {
133 | 		const resp = await fetch(`${BASE_URL.replace(/\/+$/, "")}/models`, {
134 | 			method: "GET",
135 | 			headers: { Authorization: `Bearer ${apiKey}`, "User-Agent": userAgent },
136 | 		});
137 | 		if (!resp.ok) {
138 | 			let text = "";
139 | 			try {
140 | 				text = await resp.text();
141 | 			} catch (error) {
142 | 				console.error("[OAI Compatible Model Provider] Failed to read response text", error);
143 | 			}
144 | 			const err = new Error(
145 | 				`Failed to fetch OAI Compatible models: ${resp.status} ${resp.statusText}${text ? `\n${text}` : ""}`
146 | 			);
147 | 			console.error("[OAI Compatible Model Provider] Failed to fetch OAI Compatible models", err);
148 | 			throw err;
149 | 		}
150 | 		const parsed = (await resp.json()) as HFModelsResponse;
151 | 		return parsed.data ?? [];
152 | 	})();
153 | 
154 | 	try {
155 | 		const models = await modelsList;
156 | 		return { models };
157 | 	} catch (err) {
158 | 		console.error("[OAI Compatible Model Provider] Failed to fetch OAI Compatible models", err);
159 | 		throw err;
160 | 	}
161 | }
162 | 
163 | /**
164 |  * Ensure an API key exists in SecretStorage, optionally prompting the user when not silent.
165 |  * @param silent If true, do not prompt the user.
166 |  * @param secrets vscode.SecretStorage
167 |  */
168 | async function ensureApiKey(silent: boolean, secrets: vscode.SecretStorage): Promise<string | undefined> {
169 | 	// Fall back to generic API key
170 | 	let apiKey = await secrets.get("oaicopilot.apiKey");
171 | 
172 | 	if (!apiKey && !silent) {
173 | 		const entered = await vscode.window.showInputBox({
174 | 			title: "OAI Compatible API Key",
175 | 			prompt: "Enter your OAI Compatible API key",
176 | 			ignoreFocusOut: true,
177 | 			password: true,
178 | 		});
179 | 		if (entered && entered.trim()) {
180 | 			apiKey = entered.trim();
181 | 			await secrets.store("oaicopilot.apiKey", apiKey);
182 | 		}
183 | 	}
184 | 	return apiKey;
185 | }
186 | 


--------------------------------------------------------------------------------
/src/utils.ts:
--------------------------------------------------------------------------------
  1 | import * as vscode from "vscode";
  2 | import type { RetryConfig } from "./types";
  3 | import { OpenAIFunctionToolDef } from "./openai/openaiTypes";
  4 | 
  5 | const RETRY_MAX_ATTEMPTS = 3;
  6 | const RETRY_INTERVAL_MS = 1000;
  7 | 
  8 | // HTTP status codes that should trigger a retry
  9 | const RETRYABLE_STATUS_CODES = [429, 500, 502, 503, 504];
 10 | 
 11 | // Model ID parsing helper
 12 | export interface ParsedModelId {
 13 | 	baseId: string;
 14 | 	configId?: string;
 15 | }
 16 | 
 17 | /**
 18 |  * Parse a model ID that may contain a configuration ID separator.
 19 |  * Format: "baseId::configId" or just "baseId"
 20 |  */
 21 | export function parseModelId(modelId: string): ParsedModelId {
 22 | 	const parts = modelId.split("::");
 23 | 	if (parts.length >= 2) {
 24 | 		return {
 25 | 			baseId: parts[0],
 26 | 			configId: parts.slice(1).join("::"), // In case configId itself contains '::'
 27 | 		};
 28 | 	}
 29 | 	return {
 30 | 		baseId: modelId,
 31 | 	};
 32 | }
 33 | 
 34 | /**
 35 |  * Map VS Code message role to OpenAI message role string.
 36 |  * @param message The message whose role is mapped.
 37 |  */
 38 | export function mapRole(message: vscode.LanguageModelChatRequestMessage): "user" | "assistant" | "system" {
 39 | 	const USER = vscode.LanguageModelChatMessageRole.User as unknown as number;
 40 | 	const ASSISTANT = vscode.LanguageModelChatMessageRole.Assistant as unknown as number;
 41 | 	const r = message.role as unknown as number;
 42 | 	if (r === USER) {
 43 | 		return "user";
 44 | 	}
 45 | 	if (r === ASSISTANT) {
 46 | 		return "assistant";
 47 | 	}
 48 | 	return "system";
 49 | }
 50 | 
 51 | /**
 52 |  * Convert VS Code tool definitions to OpenAI function tool definitions.
 53 |  * @param options Request options containing tools and toolMode.
 54 |  */
 55 | export function convertToolsToOpenAI(options: vscode.ProvideLanguageModelChatResponseOptions): {
 56 | 	tools?: OpenAIFunctionToolDef[];
 57 | 	tool_choice?: "auto" | { type: "function"; function: { name: string } };
 58 | } {
 59 | 	const tools = options.tools ?? [];
 60 | 	if (!tools || tools.length === 0) {
 61 | 		return {};
 62 | 	}
 63 | 
 64 | 	const toolDefs: OpenAIFunctionToolDef[] = tools
 65 | 		.filter((t) => t && typeof t === "object")
 66 | 		.map((t) => {
 67 | 			const name = t.name;
 68 | 			const description = typeof t.description === "string" ? t.description : "";
 69 | 			const params = t.inputSchema ?? { type: "object", properties: {} };
 70 | 			return {
 71 | 				type: "function" as const,
 72 | 				function: {
 73 | 					name,
 74 | 					description,
 75 | 					parameters: params,
 76 | 				},
 77 | 			} satisfies OpenAIFunctionToolDef;
 78 | 		});
 79 | 
 80 | 	let tool_choice: "auto" | { type: "function"; function: { name: string } } = "auto";
 81 | 	if (options.toolMode === vscode.LanguageModelChatToolMode.Required) {
 82 | 		if (tools.length !== 1) {
 83 | 			console.error("[OAI Compatible Model Provider] ToolMode.Required but multiple tools:", tools.length);
 84 | 			throw new Error("LanguageModelChatToolMode.Required is not supported with more than one tool");
 85 | 		}
 86 | 		tool_choice = { type: "function", function: { name: tools[0].name } };
 87 | 	}
 88 | 
 89 | 	return { tools: toolDefs, tool_choice };
 90 | }
 91 | 
 92 | /**
 93 |  * 检查是否为图片MIME类型
 94 |  */
 95 | export function isImageMimeType(mimeType: string): boolean {
 96 | 	return mimeType.startsWith("image/") && ["image/jpeg", "image/png", "image/gif", "image/webp"].includes(mimeType);
 97 | }
 98 | 
 99 | /**
100 |  * 创建图片的data URL
101 |  */
102 | export function createDataUrl(dataPart: vscode.LanguageModelDataPart): string {
103 | 	const base64Data = Buffer.from(dataPart.data).toString("base64");
104 | 	return `data:${dataPart.mimeType};base64,${base64Data}`;
105 | }
106 | 
107 | /**
108 |  * Type guard for LanguageModelToolResultPart-like values.
109 |  * @param value Unknown value to test.
110 |  */
111 | export function isToolResultPart(value: unknown): value is { callId: string; content?: ReadonlyArray<unknown> } {
112 | 	if (!value || typeof value !== "object") {
113 | 		return false;
114 | 	}
115 | 	const obj = value as Record<string, unknown>;
116 | 	const hasCallId = typeof obj.callId === "string";
117 | 	const hasContent = "content" in obj;
118 | 	return hasCallId && hasContent;
119 | }
120 | 
121 | /**
122 |  * Concatenate tool result content into a single text string.
123 |  * @param pr Tool result-like object with content array.
124 |  */
125 | export function collectToolResultText(pr: { content?: ReadonlyArray<unknown> }): string {
126 | 	let text = "";
127 | 	for (const c of pr.content ?? []) {
128 | 		if (c instanceof vscode.LanguageModelTextPart) {
129 | 			text += c.value;
130 | 		} else if (typeof c === "string") {
131 | 			text += c;
132 | 		} else if (c instanceof vscode.LanguageModelDataPart && c.mimeType === "cache_control") {
133 | 			/* ignore */
134 | 		} else {
135 | 			try {
136 | 				text += JSON.stringify(c);
137 | 			} catch {
138 | 				/* ignore */
139 | 			}
140 | 		}
141 | 	}
142 | 	return text;
143 | }
144 | 
145 | /**
146 |  * Try to parse a JSON object from a string.
147 |  * @param text The input string.
148 |  * @returns Parsed object or ok:false.
149 |  */
150 | export function tryParseJSONObject(text: string): { ok: true; value: Record<string, unknown> } | { ok: false } {
151 | 	try {
152 | 		if (!text || !/[{]/.test(text)) {
153 | 			return { ok: false };
154 | 		}
155 | 		const value = JSON.parse(text);
156 | 		if (value && typeof value === "object" && !Array.isArray(value)) {
157 | 			return { ok: true, value };
158 | 		}
159 | 		return { ok: false };
160 | 	} catch {
161 | 		return { ok: false };
162 | 	}
163 | }
164 | 
165 | /**
166 |  * Create retry configuration from VS Code workspace settings.
167 |  * @returns Retry configuration with default values.
168 |  */
169 | export function createRetryConfig(): RetryConfig {
170 | 	const config = vscode.workspace.getConfiguration();
171 | 	const retryConfig = config.get<RetryConfig>("oaicopilot.retry", {
172 | 		enabled: true,
173 | 		max_attempts: RETRY_MAX_ATTEMPTS,
174 | 		interval_ms: RETRY_INTERVAL_MS,
175 | 	});
176 | 
177 | 	return {
178 | 		enabled: retryConfig.enabled ?? true,
179 | 		max_attempts: retryConfig.max_attempts ?? RETRY_MAX_ATTEMPTS,
180 | 		interval_ms: retryConfig.interval_ms ?? RETRY_INTERVAL_MS,
181 | 		status_codes: retryConfig.status_codes,
182 | 	};
183 | }
184 | 
185 | /**
186 |  * Execute a function with retry logic for rate limiting.
187 |  * @param fn The async function to execute
188 |  * @param retryConfig Retry configuration
189 |  * @param token Cancellation token
190 |  * @returns Result of the function execution
191 |  */
192 | export async function executeWithRetry<T>(fn: () => Promise<T>, retryConfig: RetryConfig): Promise<T> {
193 | 	if (!retryConfig.enabled) {
194 | 		return await fn();
195 | 	}
196 | 
197 | 	const maxAttempts = retryConfig.max_attempts ?? RETRY_MAX_ATTEMPTS;
198 | 	const intervalMs = retryConfig.interval_ms ?? RETRY_INTERVAL_MS;
199 | 	// Merge user-configured status codes with default ones, removing duplicates
200 | 	const retryableStatusCodes = retryConfig.status_codes
201 | 		? [...new Set([...RETRYABLE_STATUS_CODES, ...retryConfig.status_codes])]
202 | 		: RETRYABLE_STATUS_CODES;
203 | 	let lastError: Error | undefined;
204 | 
205 | 	for (let attempt = 0; attempt <= maxAttempts; attempt++) {
206 | 		try {
207 | 			return await fn();
208 | 		} catch (error) {
209 | 			lastError = error instanceof Error ? error : new Error(String(error));
210 | 
211 | 			// Check if error is retryable based on status codes
212 | 			const isRetryableError = retryableStatusCodes.some((code) => lastError?.message.includes(`[${code}]`));
213 | 
214 | 			if (!isRetryableError || attempt === maxAttempts) {
215 | 				throw lastError;
216 | 			}
217 | 
218 | 			console.error(
219 | 				`[OAI Compatible Model Provider] Retryable error detected, retrying in ${intervalMs}ms (attempt ${attempt + 1}/${maxAttempts})`
220 | 			);
221 | 
222 | 			// Wait for the specified interval before retrying
223 | 			await new Promise<void>((resolve) => setTimeout(resolve, intervalMs));
224 | 		}
225 | 	}
226 | 
227 | 	// This should never be reached, but TypeScript needs it
228 | 	throw lastError || new Error("Retry failed");
229 | }
230 | 


--------------------------------------------------------------------------------
/src/ollama/ollamaApi.ts:
--------------------------------------------------------------------------------
  1 | import * as vscode from "vscode";
  2 | import {
  3 | 	CancellationToken,
  4 | 	LanguageModelChatRequestMessage,
  5 | 	ProvideLanguageModelChatResponseOptions,
  6 | 	LanguageModelResponsePart2,
  7 | 	Progress,
  8 | } from "vscode";
  9 | 
 10 | import type { HFModelItem } from "../types";
 11 | 
 12 | import type { OllamaMessage, OllamaRequestBody, OllamaStreamChunk, OllamaToolCall } from "./ollamaTypes";
 13 | 
 14 | import { isToolResultPart, collectToolResultText, convertToolsToOpenAI, mapRole } from "../utils";
 15 | 
 16 | import { CommonApi } from "../commonApi";
 17 | 
 18 | export class OllamaApi extends CommonApi {
 19 | 	constructor() {
 20 | 		super();
 21 | 	}
 22 | 
 23 | 	/**
 24 | 	 * Convert VS Code chat messages to Ollama native message format.
 25 | 	 * @param messages The VS Code chat messages to convert.
 26 | 	 * @returns Ollama-compatible messages array.
 27 | 	 */
 28 | 	convertMessages(
 29 | 		messages: readonly LanguageModelChatRequestMessage[],
 30 | 		_modelConfig: { includeReasoningInRequest: boolean }
 31 | 	): OllamaMessage[] {
 32 | 		const out: OllamaMessage[] = [];
 33 | 
 34 | 		for (const m of messages) {
 35 | 			const role = mapRole(m);
 36 | 			const textParts: string[] = [];
 37 | 			const imageParts: string[] = [];
 38 | 			let thinkingContent = "";
 39 | 			const toolCalls: OllamaToolCall[] = [];
 40 | 			const toolResults: { toolName: string; content: string }[] = [];
 41 | 
 42 | 			for (const part of m.content ?? []) {
 43 | 				if (part instanceof vscode.LanguageModelTextPart) {
 44 | 					textParts.push(part.value);
 45 | 				} else if (part instanceof vscode.LanguageModelDataPart) {
 46 | 					// Convert image data to base64 for Ollama
 47 | 					if (part.mimeType.startsWith("image/")) {
 48 | 						const base64Data = Buffer.from(part.data).toString("base64");
 49 | 						imageParts.push(base64Data);
 50 | 					}
 51 | 				} else if (part instanceof vscode.LanguageModelThinkingPart) {
 52 | 					// Capture thinking content
 53 | 					const content = Array.isArray(part.value) ? part.value.join("") : part.value;
 54 | 					thinkingContent += content;
 55 | 				} else if (part instanceof vscode.LanguageModelToolCallPart) {
 56 | 					// Capture tool calls from assistant
 57 | 					toolCalls.push({
 58 | 						function: {
 59 | 							name: part.name,
 60 | 							arguments: (part.input as Record<string, unknown>) ?? {},
 61 | 						},
 62 | 					});
 63 | 				} else if (isToolResultPart(part)) {
 64 | 					// Capture tool results
 65 | 					const content = collectToolResultText(part);
 66 | 					const toolName = (part as { toolName?: string }).toolName ?? "unknown";
 67 | 					toolResults.push({ toolName, content });
 68 | 				}
 69 | 			}
 70 | 
 71 | 			// Handle tool results as separate "tool" role messages
 72 | 			for (const tr of toolResults) {
 73 | 				out.push({
 74 | 					role: "tool",
 75 | 					content: tr.content,
 76 | 					tool_name: tr.toolName,
 77 | 				});
 78 | 			}
 79 | 
 80 | 			// Handle regular messages
 81 | 			if (textParts.length > 0 || imageParts.length > 0 || toolCalls.length > 0) {
 82 | 				const content = textParts.join("\n");
 83 | 
 84 | 				const ollamaMessage: OllamaMessage = {
 85 | 					role,
 86 | 					content,
 87 | 				};
 88 | 
 89 | 				if (imageParts.length > 0) {
 90 | 					ollamaMessage.images = imageParts;
 91 | 				}
 92 | 
 93 | 				if (thinkingContent && role === "assistant") {
 94 | 					ollamaMessage.thinking = thinkingContent;
 95 | 				}
 96 | 
 97 | 				if (toolCalls.length > 0 && role === "assistant") {
 98 | 					ollamaMessage.tool_calls = toolCalls;
 99 | 				}
100 | 
101 | 				out.push(ollamaMessage);
102 | 			}
103 | 		}
104 | 
105 | 		return out;
106 | 	}
107 | 
108 | 	prepareRequestBody(
109 | 		rb: OllamaRequestBody,
110 | 		um: HFModelItem | undefined,
111 | 		options: ProvideLanguageModelChatResponseOptions
112 | 	): OllamaRequestBody {
113 | 		// Add model options if configured
114 | 		if (
115 | 			um?.temperature !== undefined ||
116 | 			um?.top_p !== undefined ||
117 | 			um?.top_k !== undefined ||
118 | 			um?.max_tokens !== undefined
119 | 		) {
120 | 			rb.options = {};
121 | 			if (um.temperature !== undefined && um.temperature !== null) {
122 | 				rb.options.temperature = um.temperature;
123 | 			}
124 | 			if (um.top_p !== undefined && um.top_p !== null) {
125 | 				rb.options.top_p = um.top_p;
126 | 			}
127 | 			if (um.top_k !== undefined) {
128 | 				rb.options.top_k = um.top_k;
129 | 			}
130 | 			if (um.max_tokens !== undefined) {
131 | 				rb.options.num_predict = um.max_tokens;
132 | 			}
133 | 		}
134 | 
135 | 		// Add tools if provided
136 | 		const toolConfig = convertToolsToOpenAI(options);
137 | 		if (toolConfig.tools) {
138 | 			rb.tools = toolConfig.tools;
139 | 		}
140 | 
141 | 		// Process extra configuration parameters
142 | 		if (um?.extra && typeof um.extra === "object") {
143 | 			// Add all extra parameters directly to the request body
144 | 			for (const [key, value] of Object.entries(um.extra)) {
145 | 				if (value !== undefined) {
146 | 					(rb as unknown as Record<string, unknown>)[key] = value;
147 | 				}
148 | 			}
149 | 		}
150 | 
151 | 		return rb;
152 | 	}
153 | 
154 | 	/**
155 | 	 * Process Ollama native API streaming response (JSON lines format).
156 | 	 * @param responseBody The readable stream body.
157 | 	 * @param progress Progress reporter for streamed parts.
158 | 	 * @param token Cancellation token.
159 | 	 */
160 | 	async processStreamingResponse(
161 | 		responseBody: ReadableStream<Uint8Array>,
162 | 		progress: Progress<LanguageModelResponsePart2>,
163 | 		token: CancellationToken
164 | 	): Promise<void> {
165 | 		const reader = responseBody.getReader();
166 | 		const decoder = new TextDecoder();
167 | 		let buffer = "";
168 | 
169 | 		try {
170 | 			while (true) {
171 | 				if (token.isCancellationRequested) {
172 | 					break;
173 | 				}
174 | 
175 | 				const { done, value } = await reader.read();
176 | 				if (done) {
177 | 					break;
178 | 				}
179 | 
180 | 				buffer += decoder.decode(value, { stream: true });
181 | 				const lines = buffer.split("\n");
182 | 				buffer = lines.pop() || "";
183 | 
184 | 				for (const line of lines) {
185 | 					if (!line.trim()) {
186 | 						continue;
187 | 					}
188 | 
189 | 					try {
190 | 						const chunk: OllamaStreamChunk = JSON.parse(line);
191 | 						// console.debug("[OAI Compatible Model Provider] data:", JSON.stringify(chunk));
192 | 
193 | 						await this.processOllamaDelta(chunk, progress);
194 | 
195 | 						// Check if this is the final chunk
196 | 						if (chunk.done) {
197 | 							// End any active thinking sequence
198 | 							this.reportEndThinking(progress);
199 | 						}
200 | 					} catch {
201 | 						// Silently ignore malformed JSON lines
202 | 					}
203 | 				}
204 | 			}
205 | 		} finally {
206 | 			reader.releaseLock();
207 | 			// End any active thinking sequence
208 | 			this.reportEndThinking(progress);
209 | 		}
210 | 	}
211 | 
212 | 	/**
213 | 	 * Process a single Ollama streaming chunk.
214 | 	 * @param chunk Parsed Ollama stream chunk.
215 | 	 * @param progress Progress reporter for parts.
216 | 	 */
217 | 	private async processOllamaDelta(
218 | 		chunk: OllamaStreamChunk,
219 | 		progress: Progress<LanguageModelResponsePart2>
220 | 	): Promise<void> {
221 | 		const message = chunk.message;
222 | 		if (!message) {
223 | 			return;
224 | 		}
225 | 
226 | 		// Process thinking content first
227 | 		if (message.thinking) {
228 | 			// Buffer and emit thinking content
229 | 			this.bufferThinkingContent(message.thinking, progress);
230 | 		}
231 | 
232 | 		// Process tool calls
233 | 		if (message.tool_calls && message.tool_calls.length > 0) {
234 | 			// End thinking if active
235 | 			this.reportEndThinking(progress);
236 | 
237 | 			for (const tc of message.tool_calls) {
238 | 				const id = `ollama_tc_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
239 | 				progress.report(new vscode.LanguageModelToolCallPart(id, tc.function.name, tc.function.arguments));
240 | 			}
241 | 		}
242 | 
243 | 		// Process regular content
244 | 		if (message.content) {
245 | 			// If we have thinking content and now receiving regular content, end thinking first
246 | 			this.reportEndThinking(progress);
247 | 
248 | 			// Emit text content
249 | 			progress.report(new vscode.LanguageModelTextPart(message.content));
250 | 		}
251 | 	}
252 | }
253 | 


--------------------------------------------------------------------------------
/src/commonApi.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 | 	ProvideLanguageModelChatResponseOptions,
  3 | 	LanguageModelChatRequestMessage,
  4 | 	LanguageModelToolCallPart,
  5 | 	LanguageModelResponsePart2,
  6 | 	LanguageModelThinkingPart,
  7 | 	Progress,
  8 | 	CancellationToken,
  9 | } from "vscode";
 10 | 
 11 | import type { OllamaMessage, OllamaRequestBody } from "./ollama/ollamaTypes";
 12 | 
 13 | import type { OpenAIChatMessage } from "./openai/openaiTypes";
 14 | import type { AnthropicMessage, AnthropicRequestBody } from "./anthropic/anthropicTypes";
 15 | import { HFModelItem } from "./types";
 16 | import { tryParseJSONObject } from "./utils";
 17 | 
 18 | export abstract class CommonApi {
 19 | 	/** Buffer for assembling streamed tool calls by index. */
 20 | 	protected _toolCallBuffers: Map<number, { id?: string; name?: string; args: string }> = new Map<
 21 | 		number,
 22 | 		{ id?: string; name?: string; args: string }
 23 | 	>();
 24 | 
 25 | 	/** Indices for which a tool call has been fully emitted. */
 26 | 	protected _completedToolCallIndices = new Set<number>();
 27 | 
 28 | 	/** Track if we emitted any assistant text before seeing tool calls (SSE-like begin-tool-calls hint). */
 29 | 	protected _hasEmittedAssistantText = false;
 30 | 
 31 | 	/** Track if we emitted the begin-tool-calls whitespace flush. */
 32 | 	protected _emittedBeginToolCallsHint = false;
 33 | 
 34 | 	// XML think block parsing state
 35 | 	protected _xmlThinkActive = false;
 36 | 	protected _xmlThinkDetectionAttempted = false;
 37 | 
 38 | 	// Thinking content state management
 39 | 	protected _currentThinkingId: string | null = null;
 40 | 
 41 | 	/** Buffer for accumulating thinking content before emitting. */
 42 | 	protected _thinkingBuffer = "";
 43 | 
 44 | 	/** Timer for delayed flushing of thinking buffer. */
 45 | 	protected _thinkingFlushTimer: NodeJS.Timeout | null = null;
 46 | 
 47 | 	constructor() {}
 48 | 
 49 | 	/**
 50 | 	 * Convert VS Code chat messages to specific api message format.
 51 | 	 * @param messages The VS Code chat messages to convert.
 52 | 	 * @param modelConfig Config for special model.
 53 | 	 * @returns Specific api messages array.
 54 | 	 */
 55 | 	abstract convertMessages(
 56 | 		messages: readonly LanguageModelChatRequestMessage[],
 57 | 		modelConfig: { includeReasoningInRequest: boolean }
 58 | 	): Array<OpenAIChatMessage | OllamaMessage | AnthropicMessage>;
 59 | 
 60 | 	/**
 61 | 	 * Construct request body for Specific api
 62 | 	 * @param rb Specific api Request body
 63 | 	 * @param um Current Model Info
 64 | 	 * @param options From VS Code
 65 | 	 */
 66 | 	abstract prepareRequestBody(
 67 | 		rb: Record<string, unknown> | OllamaRequestBody | AnthropicRequestBody,
 68 | 		um: HFModelItem | undefined,
 69 | 		options: ProvideLanguageModelChatResponseOptions
 70 | 	): Record<string, unknown> | OllamaRequestBody | AnthropicRequestBody;
 71 | 
 72 | 	/**
 73 | 	 * Process specific api streaming response (JSON lines format).
 74 | 	 * @param responseBody The readable stream body.
 75 | 	 * @param progress Progress reporter for streamed parts.
 76 | 	 * @param token Cancellation token.
 77 | 	 */
 78 | 	abstract processStreamingResponse(
 79 | 		responseBody: ReadableStream<Uint8Array>,
 80 | 		progress: Progress<LanguageModelResponsePart2>,
 81 | 		token: CancellationToken
 82 | 	): Promise<void>;
 83 | 
 84 | 	/**
 85 | 	 * Try to emit a buffered tool call when a valid name and JSON arguments are available.
 86 | 	 * @param index The tool call index from the stream.
 87 | 	 * @param progress Progress reporter for parts.
 88 | 	 */
 89 | 	protected async tryEmitBufferedToolCall(
 90 | 		index: number,
 91 | 		progress: Progress<LanguageModelResponsePart2>
 92 | 	): Promise<void> {
 93 | 		const buf = this._toolCallBuffers.get(index);
 94 | 		if (!buf) {
 95 | 			return;
 96 | 		}
 97 | 		if (!buf.name) {
 98 | 			return;
 99 | 		}
100 | 		const canParse = tryParseJSONObject(buf.args);
101 | 		if (!canParse.ok) {
102 | 			return;
103 | 		}
104 | 		const id = buf.id ?? `call_${Math.random().toString(36).slice(2, 10)}`;
105 | 		const parameters = canParse.value;
106 | 		progress.report(new LanguageModelToolCallPart(id, buf.name, parameters));
107 | 		this._toolCallBuffers.delete(index);
108 | 		this._completedToolCallIndices.add(index);
109 | 	}
110 | 
111 | 	/**
112 | 	 * Flush all buffered tool calls, optionally throwing if arguments are not valid JSON.
113 | 	 * @param progress Progress reporter for parts.
114 | 	 * @param throwOnInvalid If true, throw when a tool call has invalid JSON args.
115 | 	 */
116 | 	protected async flushToolCallBuffers(
117 | 		progress: Progress<LanguageModelResponsePart2>,
118 | 		throwOnInvalid: boolean
119 | 	): Promise<void> {
120 | 		if (this._toolCallBuffers.size === 0) {
121 | 			return;
122 | 		}
123 | 		for (const [idx, buf] of Array.from(this._toolCallBuffers.entries())) {
124 | 			const parsed = tryParseJSONObject(buf.args);
125 | 			if (!parsed.ok) {
126 | 				if (throwOnInvalid) {
127 | 					console.error("[OAI Compatible Model Provider] Invalid JSON for tool call", {
128 | 						idx,
129 | 						snippet: (buf.args || "").slice(0, 200),
130 | 					});
131 | 					throw new Error("Invalid JSON for tool call");
132 | 				}
133 | 				// When not throwing (e.g. on [DONE]), drop silently to reduce noise
134 | 				continue;
135 | 			}
136 | 			const id = buf.id ?? `call_${Math.random().toString(36).slice(2, 10)}`;
137 | 			const name = buf.name ?? "unknown_tool";
138 | 			progress.report(new LanguageModelToolCallPart(id, name, parsed.value));
139 | 			this._toolCallBuffers.delete(idx);
140 | 			this._completedToolCallIndices.add(idx);
141 | 		}
142 | 	}
143 | 
144 | 	/**
145 | 	 * Report to VS Code for ending thinking
146 | 	 * @param progress Progress reporter for parts
147 | 	 */
148 | 	protected reportEndThinking(progress: Progress<LanguageModelResponsePart2>) {
149 | 		if (!this._currentThinkingId) {
150 | 			return;
151 | 		}
152 | 		// Always clean up state after attempting to end the thinking sequence
153 | 		try {
154 | 			this.flushThinkingBuffer(progress);
155 | 			// End the current thinking sequence with empty content and same ID
156 | 			progress.report(new LanguageModelThinkingPart("", this._currentThinkingId));
157 | 		} catch (e) {
158 | 			console.error("[OAI Compatible Model Provider] Failed to end thinking sequence:", e);
159 | 		}
160 | 		this._currentThinkingId = null;
161 | 		// Clear thinking buffer and timer since sequence ended
162 | 		this._thinkingBuffer = "";
163 | 		if (this._thinkingFlushTimer) {
164 | 			clearTimeout(this._thinkingFlushTimer);
165 | 			this._thinkingFlushTimer = null;
166 | 		}
167 | 	}
168 | 
169 | 	/**
170 | 	 * Generate a unique thinking ID based on request start time and random suffix
171 | 	 */
172 | 	protected generateThinkingId(): string {
173 | 		return `thinking_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
174 | 	}
175 | 
176 | 	/**
177 | 	 * Buffer and schedule a flush for thinking content.
178 | 	 * @param text The thinking text to buffer
179 | 	 * @param progress Progress reporter for parts
180 | 	 */
181 | 	protected bufferThinkingContent(text: string, progress: Progress<LanguageModelResponsePart2>): void {
182 | 		// Generate thinking ID if not provided by the model
183 | 		if (!this._currentThinkingId) {
184 | 			this._currentThinkingId = this.generateThinkingId();
185 | 		}
186 | 
187 | 		// Append to thinking buffer
188 | 		this._thinkingBuffer += text;
189 | 
190 | 		// Schedule flush with 100ms delay
191 | 		if (!this._thinkingFlushTimer) {
192 | 			this._thinkingFlushTimer = setTimeout(() => {
193 | 				this.flushThinkingBuffer(progress);
194 | 			}, 100);
195 | 		}
196 | 	}
197 | 
198 | 	/**
199 | 	 * Flush the thinking buffer to the progress reporter.
200 | 	 * @param progress Progress reporter for parts.
201 | 	 */
202 | 	protected flushThinkingBuffer(progress: Progress<LanguageModelResponsePart2>): void {
203 | 		// Always clear existing timer first
204 | 		if (this._thinkingFlushTimer) {
205 | 			clearTimeout(this._thinkingFlushTimer);
206 | 			this._thinkingFlushTimer = null;
207 | 		}
208 | 
209 | 		// Flush current buffer if we have content
210 | 		if (this._thinkingBuffer && this._currentThinkingId) {
211 | 			const text = this._thinkingBuffer;
212 | 			this._thinkingBuffer = "";
213 | 			progress.report(new LanguageModelThinkingPart(text, this._currentThinkingId));
214 | 		}
215 | 	}
216 | }
217 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"name": "oai-compatible-copilot",
  3 | 	"publisher": "johnny-zhao",
  4 | 	"displayName": "OAI Compatible Provider for Copilot",
  5 | 	"description": "An extension that integrates OpenAI/Ollama/Anthropic API Providers into GitHub Copilot Chat",
  6 | 	"icon": "assets/logo.png",
  7 | 	"keywords": [
  8 | 		"ai",
  9 | 		"chat",
 10 | 		"copilot",
 11 | 		"github-copilot",
 12 | 		"language-model",
 13 | 		"openai",
 14 | 		"ollama",
 15 | 		"anthropic",
 16 | 		"claude"
 17 | 	],
 18 | 	"repository": {
 19 | 		"type": "git",
 20 | 		"url": "https://github.com/JohnnyZ93/oai-compatible-copilot"
 21 | 	},
 22 | 	"version": "0.1.8",
 23 | 	"engines": {
 24 | 		"vscode": "^1.104.0"
 25 | 	},
 26 | 	"extensionDependencies": [
 27 | 		"github.copilot-chat"
 28 | 	],
 29 | 	"categories": [
 30 | 		"AI",
 31 | 		"Chat"
 32 | 	],
 33 | 	"badges": [
 34 | 		{
 35 | 			"url": "https://img.shields.io/github/stars/JohnnyZ93/oai-compatible-copilot?style=social",
 36 | 			"description": "Star oai-compatible-copilot on Github",
 37 | 			"href": "https://github.com/JohnnyZ93/oai-compatible-copilot"
 38 | 		}
 39 | 	],
 40 | 	"bugs": {
 41 | 		"url": "https://github.com/JohnnyZ93/oai-compatible-copilot/issues"
 42 | 	},
 43 | 	"license": "MIT",
 44 | 	"enabledApiProposals": [
 45 | 		"chatProvider",
 46 | 		"languageModelThinkingPart"
 47 | 	],
 48 | 	"contributes": {
 49 | 		"languageModelChatProviders": [
 50 | 			{
 51 | 				"vendor": "oaicopilot",
 52 | 				"displayName": "OAI Compatible",
 53 | 				"managementCommand": "oaicopilot.setApikey"
 54 | 			}
 55 | 		],
 56 | 		"commands": [
 57 | 			{
 58 | 				"command": "oaicopilot.setApikey",
 59 | 				"title": "OAICopilot: Set OAI Compatible Apikey"
 60 | 			},
 61 | 			{
 62 | 				"command": "oaicopilot.setProviderApikey",
 63 | 				"title": "OAICopilot: Set OAI Compatible Multi-Provider Apikey"
 64 | 			}
 65 | 		],
 66 | 		"configuration": {
 67 | 			"title": "OAI Compatible Copilot",
 68 | 			"properties": {
 69 | 				"oaicopilot.baseUrl": {
 70 | 					"type": "string",
 71 | 					"default": "https://router.huggingface.co/v1",
 72 | 					"description": "The base URL for the Openai Compatible Inference API. Default value is Hugging Face."
 73 | 				},
 74 | 				"oaicopilot.models": {
 75 | 					"type": "array",
 76 | 					"default": [],
 77 | 					"items": {
 78 | 						"type": "object",
 79 | 						"properties": {
 80 | 							"id": {
 81 | 								"type": "string",
 82 | 								"description": "Model ID (e.g., 'glm-4.6')."
 83 | 							},
 84 | 							"displayName": {
 85 | 								"type": "string",
 86 | 								"description": "(Optional) Display name for the model that will be shown in the Copilot interface. If not provided, will be generated automatically."
 87 | 							},
 88 | 							"configId": {
 89 | 								"type": "string",
 90 | 								"description": "(Optional) Configuration ID for this model. Allows defining the same model with different settings (e.g. 'glm-4.6::thinking', 'glm-4.6::no-thinking')."
 91 | 							},
 92 | 							"owned_by": {
 93 | 								"type": "string",
 94 | 								"description": "Model provider (e.g., 'zai', 'openai')."
 95 | 							},
 96 | 							"family": {
 97 | 								"type": "string",
 98 | 								"description": "Model family (e.g., 'gpt-4', 'claude-3', 'gemini'). Enables model-specific optimizations and behaviors. Defaults to 'oai-compatible' if not specified."
 99 | 							},
100 | 							"baseUrl": {
101 | 								"type": "string",
102 | 								"description": "Base URL for the model provider. If not provided, the global oaicopilot.baseUrl will be used."
103 | 							},
104 | 							"context_length": {
105 | 								"type": "number",
106 | 								"default": 128000,
107 | 								"minimum": 1000,
108 | 								"maximum": 10000000,
109 | 								"description": "Model support context length. Default is 128000."
110 | 							},
111 | 							"vision": {
112 | 								"type": "boolean",
113 | 								"default": false,
114 | 								"description": "Model support vision. Default is false."
115 | 							},
116 | 							"max_tokens": {
117 | 								"type": "number",
118 | 								"default": 4096,
119 | 								"minimum": 1,
120 | 								"maximum": 10000000,
121 | 								"description": "Maximum number of tokens to generate (range: [1, context_length)). Default is 4096."
122 | 							},
123 | 							"max_completion_tokens": {
124 | 								"type": "number",
125 | 								"default": 4096,
126 | 								"minimum": 1,
127 | 								"maximum": 10000000,
128 | 								"description": "Maximum number of tokens to generate (OpenAI new standard parameter)."
129 | 							},
130 | 							"reasoning_effort": {
131 | 								"type": "string",
132 | 								"default": "medium",
133 | 								"enum": [
134 | 									"high",
135 | 									"medium",
136 | 									"low",
137 | 									"minimal"
138 | 								],
139 | 								"description": "Reasoning effort level (OpenAI reasoning configuration)"
140 | 							},
141 | 							"thinking": {
142 | 								"type": "object",
143 | 								"description": "Thinking configuration for Zai provider",
144 | 								"properties": {
145 | 									"type": {
146 | 										"type": "string",
147 | 										"enum": [
148 | 											"enabled",
149 | 											"disabled"
150 | 										],
151 | 										"description": "Set to 'enabled' to enable thinking, 'disabled' to disable thinking"
152 | 									}
153 | 								}
154 | 							},
155 | 							"enable_thinking": {
156 | 								"type": "boolean",
157 | 								"default": false,
158 | 								"description": "Switches between thinking and non-thinking modes. Not required."
159 | 							},
160 | 							"thinking_budget": {
161 | 								"type": "number",
162 | 								"default": 128,
163 | 								"minimum": 128,
164 | 								"maximum": 10000000,
165 | 								"description": "Maximum number of tokens for chain-of-thought output. Not required."
166 | 							},
167 | 							"temperature": {
168 | 								"type": "number",
169 | 								"default": 0,
170 | 								"minimum": 0,
171 | 								"maximum": 2,
172 | 								"description": "Sampling temperature (range: [0, 2]). Lower values make output more deterministic, higher values make it more creative. Default is 0."
173 | 							},
174 | 							"top_p": {
175 | 								"type": "number",
176 | 								"default": 1,
177 | 								"minimum": 0,
178 | 								"maximum": 1,
179 | 								"description": "Top-p sampling value (range: (0, 1]). Not required."
180 | 							},
181 | 							"top_k": {
182 | 								"type": "number",
183 | 								"default": 50,
184 | 								"minimum": 1,
185 | 								"description": "Top-k sampling value (range: [1, Infinity)). Not required."
186 | 							},
187 | 							"min_p": {
188 | 								"type": "number",
189 | 								"default": 0,
190 | 								"minimum": 0,
191 | 								"maximum": 1,
192 | 								"description": "Minimum probability threshold (range: [0, 1]). Not required."
193 | 							},
194 | 							"frequency_penalty": {
195 | 								"type": "number",
196 | 								"default": 0,
197 | 								"minimum": -2,
198 | 								"maximum": 2,
199 | 								"description": "Frequency penalty (range: [-2, 2]). Not required."
200 | 							},
201 | 							"presence_penalty": {
202 | 								"type": "number",
203 | 								"default": 0,
204 | 								"minimum": -2,
205 | 								"maximum": 2,
206 | 								"description": "Presence penalty (range: [-2, 2]). Not required."
207 | 							},
208 | 							"repetition_penalty": {
209 | 								"type": "number",
210 | 								"default": 0,
211 | 								"minimum": 0,
212 | 								"maximum": 2,
213 | 								"description": "Repetition penalty (range: (0, 2]). Not required."
214 | 							},
215 | 							"reasoning": {
216 | 								"type": "object",
217 | 								"default": {
218 | 									"effort": "medium"
219 | 								},
220 | 								"properties": {
221 | 									"effort": {
222 | 										"type": "string",
223 | 										"default": "medium",
224 | 										"enum": [
225 | 											"high",
226 | 											"medium",
227 | 											"low",
228 | 											"minimal",
229 | 											"auto"
230 | 										],
231 | 										"description": "Reasoning effort level for OpenRouter/xAI (high, medium, low, minimal, auto)"
232 | 									},
233 | 									"exclude": {
234 | 										"type": "boolean",
235 | 										"default": false,
236 | 										"description": "Exclude reasoning tokens from the final response"
237 | 									},
238 | 									"max_tokens": {
239 | 										"type": "number",
240 | 										"default": 2000,
241 | 										"minimum": 1,
242 | 										"description": "Specific token limit for reasoning (Anthropic-style, alternative to effort)"
243 | 									},
244 | 									"enabled": {
245 | 										"type": "boolean",
246 | 										"default": true,
247 | 										"description": "Enable reasoning (inferred from effort or max_tokens if not specified)"
248 | 									}
249 | 								},
250 | 								"description": "Reasoning configuration for OpenRouter-compatible providers"
251 | 							},
252 | 							"extra": {
253 | 								"type": "object",
254 | 								"description": "Extra request body parameters."
255 | 							},
256 | 							"headers": {
257 | 								"type": "object",
258 | 								"additionalProperties": {
259 | 									"type": "string"
260 | 								},
261 | 								"description": "Custom HTTP headers to be sent with every request to this model's provider. These headers will be merged with the default headers (Authorization, Content-Type, User-Agent)."
262 | 							},
263 | 							"include_reasoning_in_request": {
264 | 								"type": "boolean",
265 | 								"default": false,
266 | 								"description": "Whether to include reasoning_content in assistant messages sent to the API. Support deepseek-v3.2 or others."
267 | 							},
268 | 							"apiMode": {
269 | 								"type": "string",
270 | 								"enum": [
271 | 									"openai",
272 | 									"ollama",
273 | 									"anthropic"
274 | 								],
275 | 								"default": "openai",
276 | 								"description": "API mode: 'openai' (Default) for API (/v1/chat/completions), 'ollama' for API (/api/chat), 'anthropic' for API (/v1/messages)."
277 | 							}
278 | 						},
279 | 						"required": [
280 | 							"id",
281 | 							"owned_by"
282 | 						]
283 | 					},
284 | 					"description": "A list of preferred models to use. If provided, these models will be used directly instead of fetching from the API."
285 | 				},
286 | 				"oaicopilot.retry": {
287 | 					"type": "object",
288 | 					"default": {
289 | 						"enabled": true,
290 | 						"max_attempts": 3,
291 | 						"interval_ms": 1000
292 | 					},
293 | 					"properties": {
294 | 						"enabled": {
295 | 							"type": "boolean",
296 | 							"default": true,
297 | 							"description": "Enable retry mechanism for api errors. Default is true."
298 | 						},
299 | 						"max_attempts": {
300 | 							"type": "number",
301 | 							"default": 3,
302 | 							"minimum": 1,
303 | 							"description": "Maximum number of retry attempts. Default is 3."
304 | 						},
305 | 						"interval_ms": {
306 | 							"type": "number",
307 | 							"default": 1000,
308 | 							"minimum": 1,
309 | 							"description": "Interval between retry attempts in milliseconds. Default is 1000 (1 seconds)."
310 | 						},
311 | 						"status_codes": {
312 | 							"type": "array",
313 | 							"items": {
314 | 								"type": "number"
315 | 							},
316 | 							"description": "Additional HTTP status codes that will be merged. Default is [429, 500, 502, 503, 504]."
317 | 						}
318 | 					},
319 | 					"description": "Retry configuration for handling api errors like [429, 500, 502, 503, 504]."
320 | 				},
321 | 				"oaicopilot.delay": {
322 | 					"type": "number",
323 | 					"default": 0,
324 | 					"minimum": 0,
325 | 					"description": "Fixed delay in milliseconds between consecutive requests. Default is 0 (no delay)."
326 | 				}
327 | 			}
328 | 		}
329 | 	},
330 | 	"main": "./out/extension.js",
331 | 	"scripts": {
332 | 		"vscode:prepublish": "npm run compile",
333 | 		"download-api": "dts dev && mv vscode.proposed.*.ts src",
334 | 		"compile": "tsc -p ./",
335 | 		"lint": "eslint",
336 | 		"format": "prettier --write .",
337 | 		"watch": "tsc -watch -p ./",
338 | 		"test": "npm run compile && vscode-test",
339 | 		"build": "npx @vscode/vsce package -o extension.vsix"
340 | 	},
341 | 	"dependencies": {},
342 | 	"devDependencies": {
343 | 		"@eslint/js": "^9.13.0",
344 | 		"@stylistic/eslint-plugin": "^2.9.0",
345 | 		"@types/node": "^22",
346 | 		"@types/mocha": "^10.0.6",
347 | 		"@vscode/dts": "^0.4.1",
348 | 		"@types/vscode": "^1.104.0",
349 | 		"@vscode/test-cli": "^0.0.11",
350 | 		"@vscode/test-electron": "^2.5.2",
351 | 		"eslint": "^9.13.0",
352 | 		"prettier": "^3.1.0",
353 | 		"typescript": "^5.9.2",
354 | 		"typescript-eslint": "^8.39.0"
355 | 	}
356 | }
357 | 


--------------------------------------------------------------------------------
/src/anthropic/anthropicApi.ts:
--------------------------------------------------------------------------------
  1 | import * as vscode from "vscode";
  2 | import {
  3 | 	CancellationToken,
  4 | 	LanguageModelChatRequestMessage,
  5 | 	ProvideLanguageModelChatResponseOptions,
  6 | 	LanguageModelResponsePart2,
  7 | 	Progress,
  8 | } from "vscode";
  9 | 
 10 | import type { HFModelItem } from "../types";
 11 | 
 12 | import type {
 13 | 	AnthropicMessage,
 14 | 	AnthropicRequestBody,
 15 | 	AnthropicContentBlock,
 16 | 	AnthropicToolUseBlock,
 17 | 	AnthropicToolResultBlock,
 18 | 	AnthropicStreamChunk,
 19 | } from "./anthropicTypes";
 20 | 
 21 | import { isImageMimeType, isToolResultPart, collectToolResultText, convertToolsToOpenAI, mapRole } from "../utils";
 22 | 
 23 | import { CommonApi } from "../commonApi";
 24 | 
 25 | export class AnthropicApi extends CommonApi {
 26 | 	private _systemContent: string | undefined;
 27 | 
 28 | 	constructor() {
 29 | 		super();
 30 | 	}
 31 | 
 32 | 	/**
 33 | 	 * Convert VS Code chat messages to Anthropic message format.
 34 | 	 * @param messages The VS Code chat messages to convert.
 35 | 	 * @param modelConfig model configuration that may affect message conversion.
 36 | 	 * @returns Anthropic-compatible messages array.
 37 | 	 */
 38 | 	convertMessages(
 39 | 		messages: readonly LanguageModelChatRequestMessage[],
 40 | 		modelConfig: { includeReasoningInRequest: boolean }
 41 | 	): AnthropicMessage[] {
 42 | 		const out: AnthropicMessage[] = [];
 43 | 
 44 | 		for (const m of messages) {
 45 | 			const role = mapRole(m);
 46 | 			const textParts: string[] = [];
 47 | 			const imageParts: vscode.LanguageModelDataPart[] = [];
 48 | 			const toolCalls: AnthropicToolUseBlock[] = [];
 49 | 			const toolResults: AnthropicToolResultBlock[] = [];
 50 | 			const thinkingParts: string[] = [];
 51 | 
 52 | 			for (const part of m.content ?? []) {
 53 | 				if (part instanceof vscode.LanguageModelTextPart) {
 54 | 					textParts.push(part.value);
 55 | 				} else if (part instanceof vscode.LanguageModelDataPart && isImageMimeType(part.mimeType)) {
 56 | 					imageParts.push(part);
 57 | 				} else if (part instanceof vscode.LanguageModelToolCallPart) {
 58 | 					const id = part.callId || `toolu_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
 59 | 					toolCalls.push({
 60 | 						type: "tool_use",
 61 | 						id,
 62 | 						name: part.name,
 63 | 						input: (part.input as Record<string, unknown>) ?? {},
 64 | 					});
 65 | 				} else if (isToolResultPart(part)) {
 66 | 					const callId = (part as { callId?: string }).callId ?? "";
 67 | 					const content = collectToolResultText(part as { content?: ReadonlyArray<unknown> });
 68 | 					toolResults.push({
 69 | 						type: "tool_result",
 70 | 						tool_use_id: callId,
 71 | 						content,
 72 | 					});
 73 | 				} else if (part instanceof vscode.LanguageModelThinkingPart) {
 74 | 					const content = Array.isArray(part.value) ? part.value.join("") : part.value;
 75 | 					thinkingParts.push(content);
 76 | 				}
 77 | 			}
 78 | 
 79 | 			// Handle system messages separately (Anthropic uses top-level system field)
 80 | 			if (role === "system") {
 81 | 				if (textParts.length > 0) {
 82 | 					this._systemContent = textParts.join("\n");
 83 | 				}
 84 | 				continue;
 85 | 			}
 86 | 
 87 | 			// Build content blocks for user/assistant messages
 88 | 			const contentBlocks: AnthropicContentBlock[] = [];
 89 | 
 90 | 			// Add text content
 91 | 			if (textParts.length > 0) {
 92 | 				contentBlocks.push({
 93 | 					type: "text",
 94 | 					text: textParts.join("\n"),
 95 | 				});
 96 | 			}
 97 | 
 98 | 			// Add image content
 99 | 			for (const imagePart of imageParts) {
100 | 				const base64Data = Buffer.from(imagePart.data).toString("base64");
101 | 				contentBlocks.push({
102 | 					type: "image",
103 | 					source: {
104 | 						type: "base64",
105 | 						media_type: imagePart.mimeType,
106 | 						data: base64Data,
107 | 					},
108 | 				});
109 | 			}
110 | 
111 | 			// Add thinking content for assistant messages
112 | 			if (role === "assistant" && thinkingParts.length > 0 && modelConfig.includeReasoningInRequest) {
113 | 				contentBlocks.push({
114 | 					type: "thinking",
115 | 					thinking: thinkingParts.join("\n"),
116 | 				});
117 | 			}
118 | 
119 | 			// Add tool calls for assistant messages
120 | 			for (const toolCall of toolCalls) {
121 | 				contentBlocks.push(toolCall);
122 | 			}
123 | 
124 | 			// For tool results, they should be added to user messages
125 | 			// We'll add them to the current message if it's a user message
126 | 			if (role === "user" && toolResults.length > 0) {
127 | 				for (const toolResult of toolResults) {
128 | 					contentBlocks.push(toolResult);
129 | 				}
130 | 			} else if (toolResults.length > 0) {
131 | 				// If tool results appear in non-user messages, log warning
132 | 				console.warn("[Anthropic Provider] Tool results found in non-user message, ignoring");
133 | 			}
134 | 
135 | 			// Only add message if we have content blocks
136 | 			if (contentBlocks.length > 0) {
137 | 				out.push({
138 | 					role,
139 | 					content: contentBlocks,
140 | 				});
141 | 			}
142 | 		}
143 | 
144 | 		return out;
145 | 	}
146 | 
147 | 	prepareRequestBody(
148 | 		rb: AnthropicRequestBody,
149 | 		um: HFModelItem | undefined,
150 | 		options: ProvideLanguageModelChatResponseOptions
151 | 	): AnthropicRequestBody {
152 | 		// Set max_tokens (required for Anthropic)
153 | 		if (um?.max_tokens !== undefined) {
154 | 			rb.max_tokens = um.max_tokens;
155 | 		}
156 | 
157 | 		// Add system content if we extracted it
158 | 		if (this._systemContent) {
159 | 			rb.system = this._systemContent;
160 | 		}
161 | 
162 | 		// Add temperature
163 | 		const oTemperature = options.modelOptions?.temperature ?? 0;
164 | 		const temperature = um?.temperature ?? oTemperature;
165 | 		rb.temperature = temperature;
166 | 		if (um && um.temperature === null) {
167 | 			delete rb.temperature;
168 | 		}
169 | 
170 | 		// Add top_p if configured
171 | 		if (um?.top_p !== undefined && um.top_p !== null) {
172 | 			rb.top_p = um.top_p;
173 | 		}
174 | 
175 | 		// Add top_k if configured
176 | 		if (um?.top_k !== undefined) {
177 | 			rb.top_k = um.top_k;
178 | 		}
179 | 
180 | 		// Add tools configuration
181 | 		const toolConfig = convertToolsToOpenAI(options);
182 | 		if (toolConfig.tools) {
183 | 			// Convert OpenAI tool definitions to Anthropic format
184 | 			rb.tools = toolConfig.tools.map((tool) => ({
185 | 				name: tool.function.name,
186 | 				description: tool.function.description,
187 | 				input_schema: tool.function.parameters,
188 | 			}));
189 | 		}
190 | 
191 | 		// Add tool_choice
192 | 		if (toolConfig.tool_choice) {
193 | 			if (toolConfig.tool_choice === "auto") {
194 | 				rb.tool_choice = { type: "auto" };
195 | 			} else if (typeof toolConfig.tool_choice === "object" && toolConfig.tool_choice.type === "function") {
196 | 				rb.tool_choice = { type: "tool", name: toolConfig.tool_choice.function.name };
197 | 			}
198 | 		}
199 | 
200 | 		// Process extra configuration parameters
201 | 		if (um?.extra && typeof um.extra === "object") {
202 | 			// Add all extra parameters directly to the request body
203 | 			for (const [key, value] of Object.entries(um.extra)) {
204 | 				if (value !== undefined) {
205 | 					(rb as unknown as Record<string, unknown>)[key] = value;
206 | 				}
207 | 			}
208 | 		}
209 | 
210 | 		return rb;
211 | 	}
212 | 
213 | 	/**
214 | 	 * Process Anthropic streaming response (SSE format).
215 | 	 * @param responseBody The readable stream body.
216 | 	 * @param progress Progress reporter for streamed parts.
217 | 	 * @param token Cancellation token.
218 | 	 */
219 | 	async processStreamingResponse(
220 | 		responseBody: ReadableStream<Uint8Array>,
221 | 		progress: Progress<LanguageModelResponsePart2>,
222 | 		token: CancellationToken
223 | 	): Promise<void> {
224 | 		const reader = responseBody.getReader();
225 | 		const decoder = new TextDecoder();
226 | 		let buffer = "";
227 | 
228 | 		try {
229 | 			while (true) {
230 | 				if (token.isCancellationRequested) {
231 | 					break;
232 | 				}
233 | 
234 | 				const { done, value } = await reader.read();
235 | 				if (done) {
236 | 					break;
237 | 				}
238 | 
239 | 				buffer += decoder.decode(value, { stream: true });
240 | 				const lines = buffer.split("\n");
241 | 				buffer = lines.pop() || "";
242 | 
243 | 				for (const line of lines) {
244 | 					if (line.trim() === "") {
245 | 						continue;
246 | 					}
247 | 					if (!line.startsWith("data: ")) {
248 | 						continue;
249 | 					}
250 | 
251 | 					const data = line.slice(6);
252 | 					if (data === "[DONE]") {
253 | 						// Do not throw on [DONE]; any incomplete/empty buffers are ignored.
254 | 						await this.flushToolCallBuffers(progress, /*throwOnInvalid*/ false);
255 | 						continue;
256 | 					}
257 | 
258 | 					try {
259 | 						const chunk: AnthropicStreamChunk = JSON.parse(data);
260 | 						// console.debug("[OAI Compatible Model Provider] data:", JSON.stringify(chunk));
261 | 
262 | 						await this.processAnthropicChunk(chunk, progress);
263 | 					} catch (e) {
264 | 						console.error("[Anthropic Provider] Failed to parse SSE chunk:", e, "data:", data);
265 | 					}
266 | 				}
267 | 			}
268 | 		} finally {
269 | 			reader.releaseLock();
270 | 			// If there's an active thinking sequence, end it first
271 | 			this.reportEndThinking(progress);
272 | 		}
273 | 	}
274 | 
275 | 	/**
276 | 	 * Process a single Anthropic streaming chunk.
277 | 	 * @param chunk Parsed Anthropic stream chunk.
278 | 	 * @param progress Progress reporter for parts.
279 | 	 */
280 | 	private async processAnthropicChunk(
281 | 		chunk: AnthropicStreamChunk,
282 | 		progress: Progress<LanguageModelResponsePart2>
283 | 	): Promise<void> {
284 | 		// Handle ping events (ignore)
285 | 		if (chunk.type === "ping") {
286 | 			return;
287 | 		}
288 | 
289 | 		// Handle error events
290 | 		if (chunk.type === "error") {
291 | 			const errorType = chunk.error?.type || "unknown_error";
292 | 			const errorMessage = chunk.error?.message || "Anthropic API streaming error";
293 | 			console.error(`[Anthropic Provider] Streaming error: ${errorType} - ${errorMessage}`);
294 | 			// We could throw here, but for now just log and continue
295 | 			return;
296 | 		}
297 | 
298 | 		if (chunk.type === "message_start" && chunk.message) {
299 | 			// Extract message metadata (id, model, etc.)
300 | 			// Could store for later use, but not required for basic streaming
301 | 			return;
302 | 		}
303 | 
304 | 		if (chunk.type === "message_delta" && chunk.delta) {
305 | 			// Extract stop_reason and usage information
306 | 			// We're not processing usage per user request, but could log if needed
307 | 			return;
308 | 		}
309 | 
310 | 		if (chunk.type === "content_block_start" && chunk.content_block) {
311 | 			// Start of a content block
312 | 			if (chunk.content_block.type === "thinking") {
313 | 				// Start thinking block
314 | 				if (chunk.content_block.thinking) {
315 | 					this.bufferThinkingContent(chunk.content_block.thinking, progress);
316 | 				}
317 | 			} else if (chunk.content_block.type === "tool_use") {
318 | 				// Start tool call block
319 | 				// SSEProcessor-like: if first tool call appears after text, emit a whitespace
320 | 				// to ensure any UI buffers/linkifiers are flushed without adding visible noise.
321 | 				if (!this._emittedBeginToolCallsHint && this._hasEmittedAssistantText) {
322 | 					progress.report(new vscode.LanguageModelTextPart(" "));
323 | 					this._emittedBeginToolCallsHint = true;
324 | 				}
325 | 				const idx = (chunk.index as number) ?? 0;
326 | 				this._toolCallBuffers.set(idx, {
327 | 					id: chunk.content_block.id,
328 | 					name: chunk.content_block.name,
329 | 					args: "",
330 | 				});
331 | 			} else if (chunk.content_block.type === "text") {
332 | 				// Text block start - nothing special to do
333 | 				// The text content will come via content_block_delta events
334 | 			}
335 | 		} else if (chunk.type === "content_block_delta" && chunk.delta) {
336 | 			if (chunk.delta.type === "text_delta" && chunk.delta.text) {
337 | 				// Emit text content
338 | 				progress.report(new vscode.LanguageModelTextPart(chunk.delta.text));
339 | 				this._hasEmittedAssistantText = true;
340 | 			} else if (chunk.delta.type === "thinking_delta" && chunk.delta.thinking) {
341 | 				// Buffer thinking content
342 | 				this.bufferThinkingContent(chunk.delta.thinking, progress);
343 | 			} else if (chunk.delta.type === "input_json_delta" && chunk.delta.partial_json) {
344 | 				// Handle tool call argument streaming
345 | 				// Find the latest tool call buffer and append partial JSON
346 | 				const idx = (chunk.index as number) ?? 0;
347 | 				const buf = this._toolCallBuffers.get(idx);
348 | 				if (buf) {
349 | 					buf.args += chunk.delta.partial_json;
350 | 					this._toolCallBuffers.set(idx, buf);
351 | 					// Try to emit if we have valid JSON
352 | 					await this.tryEmitBufferedToolCall(idx, progress);
353 | 				}
354 | 			} else if (chunk.delta.type === "signature_delta" && chunk.delta.signature) {
355 | 				// Signature for thinking block - ignore for now
356 | 				// Could store for verification if needed later
357 | 			}
358 | 		} else if (chunk.type === "content_block_stop" || chunk.type === "message_stop") {
359 | 			// End of message - ensure thinking is ended and flush all tool calls
360 | 			await this.flushToolCallBuffers(progress, false);
361 | 			this.reportEndThinking(progress);
362 | 		}
363 | 	}
364 | }
365 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🤗 OAI Compatible Provider for Copilot
  2 | 
  3 | [![CI](https://github.com/JohnnyZ93/oai-compatible-copilot/actions/workflows/release.yml/badge.svg)](https://github.com/JohnnyZ93/oai-compatible-copilot/actions)
  4 | [![License](https://img.shields.io/github/license/JohnnyZ93/oai-compatible-copilot?color=orange&label=License)](https://github.com/JohnnyZ93/oai-compatible-copilot/blob/main/LICENSE)
  5 | 
  6 | Use frontier open LLMs like Qwen3 Coder, Kimi K2, DeepSeek V3.2, GLM 4.6 and more in VS Code with GitHub Copilot Chat powered by any OpenAI-compatible provider 🔥
  7 | 
  8 | ## ✨ Features
  9 | - Supports almost all OpenAI-compatible providers, such as ModelScope, SiliconFlow, DeepSeek...
 10 | - Supports vision models.
 11 | - Offers additional configuration options for chat requests.
 12 | - Supports control model thinking and reasoning content show in chat interface.
 13 |   > ![thinkingPartDemo](./assets/thinkingPartDemo.png)
 14 | - Supports configuring models from multiple providers simultaneously, automatically managing API keys without switch them repeatedly.
 15 | - Supports defining multiple configurations for the same model ID with different settings (e.g. thinking enable/disable for GLM-4.6).
 16 | - Support auto retry mechanism for handling api errors like [429, 500, 502, 503, 504].
 17 | - Support token usage count and set provider api keys in status bar.
 18 |   > ![statusBar](./assets/statusBar.png)
 19 | ---
 20 | 
 21 | ## Requirements
 22 | - VS Code 1.104.0 or higher.
 23 | - OpenAI-compatible provider API key.
 24 | ---
 25 | 
 26 | ## ⚡ Quick Start
 27 | 1. Install the OAI Compatible Provider for Copilot extension [here](https://marketplace.visualstudio.com/items?itemName=johnny-zhao.oai-compatible-copilot).
 28 | 2. Open VS Code Settings and configure `oaicopilot.baseUrl` and `oaicopilot.models`.
 29 | 3. Open Github Copilot Chat interface.
 30 | 4. Click the model picker and select "Manage Models...".
 31 | 5. Choose "OAI Compatible" provider.
 32 | 6. Enter your API key — it will be saved locally.
 33 | 7. Select the models you want to add to the model picker.
 34 | 
 35 | ### Settings Example
 36 | 
 37 | ```json
 38 | "oaicopilot.baseUrl": "https://api-inference.modelscope.cn/v1",
 39 | "oaicopilot.models": [
 40 |     {
 41 |         "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
 42 |         "owned_by": "modelscope",
 43 |         "context_length": 256000,
 44 |         "max_tokens": 8192,
 45 |         "temperature": 0,
 46 |         "top_p": 1
 47 |     }
 48 | ]
 49 | ```
 50 | ---
 51 | 
 52 | ## * Multi Api Mode
 53 | 
 54 | The extension supports three different API protocols to work with various model providers. You can specify which API mode to use for each model via the `apiMode` parameter.
 55 | 
 56 | ### Supported API Modes
 57 | 
 58 | 1. **`openai`** (default) - OpenAI-compatible API
 59 |    - Endpoint: `/chat/completions`
 60 |    - Header: `Authorization: Bearer <apiKey>`
 61 |    - Use for: Most OpenAI-compatible providers (ModelScope, SiliconFlow, etc.)
 62 | 
 63 | 2. **`ollama`** - Ollama native API
 64 |    - Endpoint: `/api/chat`
 65 |    - Header: `Authorization: Bearer <apiKey>` (or no header for local Ollama)
 66 |    - Use for: Local Ollama instances
 67 | 
 68 | 3. **`anthropic`** - Anthropic Claude API
 69 |    - Endpoint: `/v1/messages`
 70 |    - Header: `x-api-key: <apiKey>`
 71 |    - Use for: Anthropic Claude models
 72 | 
 73 | ### Configuration Examples
 74 | Mixed configuration with multiple API modes:
 75 | 
 76 | ```json
 77 | "oaicopilot.models": [
 78 |     {
 79 |         "id": "GLM-4.6",
 80 |         "owned_by": "modelscope",
 81 |     },
 82 |     {
 83 |         "id": "llama3.2",
 84 |         "owned_by": "ollama",
 85 |         "baseUrl": "http://localhost:11434",
 86 |         "apiMode": "ollama"
 87 |     },
 88 |     {
 89 |         "id": "claude-3-5-sonnet-20241022",
 90 |         "owned_by": "anthropic",
 91 |         "baseUrl": "https://api.anthropic.com",
 92 |         "apiMode": "anthropic"
 93 |     }
 94 | ]
 95 | ```
 96 | 
 97 | ### Important Notes
 98 | - The `apiMode` parameter defaults to `"openai"` if not specified.
 99 | - When using `ollama` mode, you can omit the API key (`ollama` by default) or set it to any string.
100 | - Each API mode uses different message conversion logic internally to match provider-specific formats (tools, images, thinking).
101 | ---
102 | 
103 | ## * Multi-Provider Guide
104 | 
105 | > `owned_by` in model config is used for group apiKey. The storage key is `oaicopilot.apiKey.${owned_by}`.
106 | 
107 | 1. Open VS Code Settings and configure `oaicopilot.models`.
108 | 2. Open command center ( Ctrl + Shift + P ), and search "OAICopilot: Set OAI Compatible Multi-Provider Apikey" to configure provider-specific API keys.
109 | 3. Open Github Copilot Chat interface.
110 | 4. Click the model picker and select "Manage Models...".
111 | 5. Choose "OAI Compatible" provider.
112 | 6. Select the models you want to add to the model picker.
113 | 
114 | ### Settings Example
115 | 
116 | ```json
117 | "oaicopilot.baseUrl": "https://api-inference.modelscope.cn/v1",
118 | "oaicopilot.models": [
119 |     {
120 |         "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
121 |         "owned_by": "modelscope",
122 |         "context_length": 256000,
123 |         "max_tokens": 8192,
124 |         "temperature": 0,
125 |         "top_p": 1
126 |     },
127 |     {
128 |         "id": "qwen3-coder",
129 |         "owned_by": "iflow",
130 |         "baseUrl": "https://apis.iflow.cn/v1",
131 |         "context_length": 256000,
132 |         "max_tokens": 8192,
133 |         "temperature": 0,
134 |         "top_p": 1
135 |     }
136 | ]
137 | ```
138 | 
139 | ---
140 | 
141 | ## * Multi-config for the same model
142 | 
143 | You can define multiple configurations for the same model ID by using the `configId` field. This allows you to have the same base model with different settings for different use cases.
144 | 
145 | To use this feature:
146 | 
147 | 1. Add the `configId` field to your model configuration
148 | 2. Each configuration with the same `id` must have a unique `configId`
149 | 3. The model will appear as separate entries in the VS Code model picker
150 | 
151 | ### Settings Example
152 | 
153 | ```json
154 | "oaicopilot.models": [
155 |     {
156 |         "id": "glm-4.6",
157 |         "configId": "thinking",
158 |         "owned_by": "zai",
159 |         "temperature": 0.7,
160 |         "top_p": 1,
161 |         "thinking": {
162 |             "type": "enabled"
163 |         }
164 |     },
165 |     {
166 |         "id": "glm-4.6",
167 |         "configId": "no-thinking",
168 |         "owned_by": "zai",
169 |         "temperature": 0,
170 |         "top_p": 1,
171 |         "thinking": {
172 |             "type": "disabled"
173 |         }
174 |     }
175 | ]
176 | ```
177 | 
178 | In this example, you'll have three different configurations of the glm-4.6 model available in VS Code:
179 | - `glm-4.6::thinking` - use GLM-4.6 with thinking
180 | - `glm-4.6::no-thinking` - use GLM-4.6 without thinking
181 | 
182 | ---
183 | 
184 | ## * Custom Headers
185 | 
186 | You can specify custom HTTP headers that will be sent with every request to a specific model's provider. This is useful for:
187 | 
188 | - API versioning headers
189 | - Custom authentication headers (in addition to the standard Authorization header)
190 | - Provider-specific headers required by certain APIs
191 | - Request tracking or debugging headers
192 | 
193 | ### Custom Headers Example
194 | 
195 | ```json
196 | "oaicopilot.models": [
197 |     {
198 |         "id": "custom-model",
199 |         "owned_by": "provider",
200 |         "baseUrl": "https://api.example.com/v1",
201 |         "headers": {
202 |             "X-API-Version": "2024-01",
203 |             "X-Request-Source": "vscode-copilot",
204 |             "Custom-Auth-Token": "additional-token-if-needed"
205 |         }
206 |     }
207 | ]
208 | ```
209 | 
210 | **Important Notes:**
211 | - Custom headers are merged with default headers (Authorization, Content-Type, User-Agent)
212 | - If a custom header conflicts with a default header, the custom header takes precedence
213 | - Headers are applied on a per-model basis, allowing different headers for different providers
214 | - Header values must be strings
215 | ---
216 | 
217 | ## * Custom Request body parameters
218 | 
219 | The `extra` field allows you to add arbitrary parameters to the API request body. This is useful for provider-specific features that aren't covered by the standard parameters.
220 | 
221 | ### How it works
222 | - Parameters in `extra` are merged directly into the request body
223 | - Works with all API modes (`openai`, `ollama`, `anthropic`)
224 | - Values can be any valid JSON type (string, number, boolean, object, array)
225 | 
226 | ### Common use cases
227 | - **OpenAI-specific parameters**: `seed`, `logprobs`, `top_logprobs`, `suffix`, `presence_penalty` (if not using standard parameter)
228 | - **Provider-specific features**: Custom sampling methods, debugging flags
229 | - **Experimental parameters**: Beta features from API providers
230 | 
231 | ### Configuration Example
232 | 
233 | ```json
234 | "oaicopilot.models": [
235 |     {
236 |         "id": "custom-model",
237 |         "owned_by": "openai",
238 |         "extra": {
239 |             "seed": 42,
240 |             "logprobs": true,
241 |             "top_logprobs": 5,
242 |             "suffix": "###",
243 |             "presence_penalty": 0.1
244 |         }
245 |     },
246 |     {
247 |         "id": "local-model",
248 |         "owned_by": "ollama",
249 |         "baseUrl": "http://localhost:11434",
250 |         "apiMode": "ollama",
251 |         "extra": {
252 |             "keep_alive": "5m",
253 |             "raw": true
254 |         }
255 |     },
256 |     {
257 |         "id": "claude-model",
258 |         "owned_by": "anthropic",
259 |         "baseUrl": "https://api.anthropic.com",
260 |         "apiMode": "anthropic",
261 |         "extra": {
262 |             "service_tier": "standard_only"
263 |         }
264 |     }
265 | ]
266 | ```
267 | 
268 | ### Important Notes
269 | - Parameters in `extra` are added after standard parameters
270 | - If an `extra` parameter conflicts with a standard parameter, the `extra` value takes precedence
271 | - Use this for provider-specific features only
272 | - Standard parameters (temperature, top_p, etc.) should use their dedicated fields when possible
273 | - API provider must support the parameters you specify
274 | 
275 | ---
276 | 
277 | ## Model Parameters
278 | All parameters support individual configuration for different models, providing highly flexible model tuning capabilities.
279 | 
280 | - `id` (required): Model identifier
281 | - `owned_by` (required): Model provider
282 | - `displayName`: Display name for the model that will be shown in the Copilot interface.
283 | - `configId`: Configuration ID for this model. Allows defining the same model with different settings (e.g. 'glm-4.6::thinking', 'glm-4.6::no-thinking')
284 | - `family`: Model family (e.g., 'gpt-4', 'claude-3', 'gemini'). Enables model-specific optimizations and behaviors. Defaults to 'oai-compatible' if not specified.
285 | - `baseUrl`: Model-specific base URL. If not provided, the global `oaicopilot.baseUrl` will be used
286 | - `context_length`: The context length supported by the model. Default value is 128000
287 | - `max_tokens`: Maximum number of tokens to generate (range: [1, context_length]). Default value is 4096
288 | - `max_completion_tokens`: Maximum number of tokens to generate (OpenAI new standard parameter)
289 | - `vision`: Whether the model supports vision capabilities. Defaults to false
290 | - `temperature`: Sampling temperature (range: [0, 2]). Lower values make the output more deterministic, higher values more creative. Default value is 0
291 | - `top_p`: Top-p sampling value (range: (0, 1]). Optional parameter
292 | - `top_k`: Top-k sampling value (range: [1, ∞)). Optional parameter
293 | - `min_p`: Minimum probability threshold (range: [0, 1]). Optional parameter
294 | - `frequency_penalty`: Frequency penalty (range: [-2, 2]). Optional parameter
295 | - `presence_penalty`: Presence penalty (range: [-2, 2]). Optional parameter
296 | - `repetition_penalty`: Repetition penalty (range: (0, 2]). Optional parameter
297 | - `enable_thinking`: Enable model thinking and reasoning content display (for non-OpenRouter providers)
298 | - `thinking_budget`: Maximum token count for thinking chain output. Optional parameter
299 | - `reasoning`: OpenRouter reasoning configuration, includes the following options:
300 |   - `enabled`: Enable reasoning functionality (if not specified, will be inferred from effort or max_tokens)
301 |   - `effort`: Reasoning effort level (high, medium, low, minimal, auto)
302 |   - `exclude`: Exclude reasoning tokens from the final response
303 |   - `max_tokens`: Specific token limit for reasoning (Anthropic style, as an alternative to effort)
304 | - `thinking`: Thinking configuration for Zai provider
305 |   - `type`: Set to 'enabled' to enable thinking, 'disabled' to disable thinking
306 | - `reasoning_effort`: Reasoning effort level (OpenAI reasoning configuration)
307 | - `headers`: Custom HTTP headers to be sent with every request to this model's provider (e.g., `{"X-API-Version": "v1", "X-Custom-Header": "value"}`). These headers will be merged with the default headers (Authorization, Content-Type, User-Agent)
308 | - `extra`: Extra request body parameters.
309 | - `include_reasoning_in_request`: Whether to include reasoning_content in assistant messages sent to the API. Support deepseek-v3.2 or others.
310 | - `apiMode`: API mode: 'openai' (Default) for API (/v1/chat/completions), 'ollama' for API (/api/chat), 'anthropic' for API (/v1/messages).
311 | ---
312 | 
313 | ## Thanks to
314 | 
315 | Thanks to all the people who contribute.
316 | 
317 | - [Contributors](https://github.com/JohnnyZ93/oai-compatible-copilot/graphs/contributors)
318 | - [Hugging Face Chat Extension](https://github.com/huggingface/huggingface-vscode-chat)
319 | - [VS Code Chat Provider API](https://code.visualstudio.com/api/extension-guides/ai/language-model-chat-provider)
320 | 
321 | ---
322 | 
323 | ## Support & License
324 | - Open issues: https://github.com/JohnnyZ93/oai-compatible-copilot/issues
325 | - License: MIT License Copyright (c) 2025 Johnny Zhao
326 | 


--------------------------------------------------------------------------------
/src/provider.ts:
--------------------------------------------------------------------------------
  1 | import * as vscode from "vscode";
  2 | import {
  3 | 	CancellationToken,
  4 | 	LanguageModelChatInformation,
  5 | 	LanguageModelChatProvider,
  6 | 	LanguageModelChatRequestMessage,
  7 | 	ProvideLanguageModelChatResponseOptions,
  8 | 	LanguageModelResponsePart2,
  9 | 	Progress,
 10 | } from "vscode";
 11 | 
 12 | import type { HFModelItem } from "./types";
 13 | 
 14 | import type { OllamaRequestBody } from "./ollama/ollamaTypes";
 15 | 
 16 | import { parseModelId, createRetryConfig, executeWithRetry } from "./utils";
 17 | 
 18 | import { prepareLanguageModelChatInformation } from "./provideModel";
 19 | import { prepareTokenCount } from "./provideToken";
 20 | import { updateContextStatusBar } from "./statusBar";
 21 | import { OllamaApi } from "./ollama/ollamaApi";
 22 | import { OpenaiApi } from "./openai/openaiApi";
 23 | import { AnthropicApi } from "./anthropic/anthropicApi";
 24 | import { AnthropicRequestBody } from "./anthropic/anthropicTypes";
 25 | 
 26 | /**
 27 |  * VS Code Chat provider backed by Hugging Face Inference Providers.
 28 |  */
 29 | export class HuggingFaceChatModelProvider implements LanguageModelChatProvider {
 30 | 	/** Track last request completion time for delay calculation. */
 31 | 	private _lastRequestTime: number | null = null;
 32 | 
 33 | 	/**
 34 | 	 * Create a provider using the given secret storage for the API key.
 35 | 	 * @param secrets VS Code secret storage.
 36 | 	 */
 37 | 	constructor(
 38 | 		private readonly secrets: vscode.SecretStorage,
 39 | 		private readonly userAgent: string,
 40 | 		private readonly statusBarItem: vscode.StatusBarItem
 41 | 	) {}
 42 | 
 43 | 	/**
 44 | 	 * Get the list of available language models contributed by this provider
 45 | 	 * @param options Options which specify the calling context of this function
 46 | 	 * @param token A cancellation token which signals if the user cancelled the request or not
 47 | 	 * @returns A promise that resolves to the list of available language models
 48 | 	 */
 49 | 	async provideLanguageModelChatInformation(
 50 | 		options: { silent: boolean },
 51 | 		_token: CancellationToken
 52 | 	): Promise<LanguageModelChatInformation[]> {
 53 | 		return prepareLanguageModelChatInformation(
 54 | 			{ silent: options.silent ?? false },
 55 | 			_token,
 56 | 			this.secrets,
 57 | 			this.userAgent
 58 | 		);
 59 | 	}
 60 | 
 61 | 	/**
 62 | 	 * Returns the number of tokens for a given text using the model specific tokenizer logic
 63 | 	 * @param model The language model to use
 64 | 	 * @param text The text to count tokens for
 65 | 	 * @param token A cancellation token for the request
 66 | 	 * @returns A promise that resolves to the number of tokens
 67 | 	 */
 68 | 	async provideTokenCount(
 69 | 		model: LanguageModelChatInformation,
 70 | 		text: string | LanguageModelChatRequestMessage,
 71 | 		_token: CancellationToken
 72 | 	): Promise<number> {
 73 | 		return prepareTokenCount(model, text, _token);
 74 | 	}
 75 | 
 76 | 	/**
 77 | 	 * Returns the response for a chat request, passing the results to the progress callback.
 78 | 	 * The {@linkcode LanguageModelChatProvider} must emit the response parts to the progress callback as they are received from the language model.
 79 | 	 * @param model The language model to use
 80 | 	 * @param messages The messages to include in the request
 81 | 	 * @param options Options for the request
 82 | 	 * @param progress The progress to emit the streamed response chunks to
 83 | 	 * @param token A cancellation token for the request
 84 | 	 * @returns A promise that resolves when the response is complete. Results are actually passed to the progress callback.
 85 | 	 */
 86 | 	async provideLanguageModelChatResponse(
 87 | 		model: LanguageModelChatInformation,
 88 | 		messages: readonly LanguageModelChatRequestMessage[],
 89 | 		options: ProvideLanguageModelChatResponseOptions,
 90 | 		progress: Progress<LanguageModelResponsePart2>,
 91 | 		token: CancellationToken
 92 | 	): Promise<void> {
 93 | 		// Update Token Usage
 94 | 		updateContextStatusBar(messages, model, this.statusBarItem);
 95 | 
 96 | 		// Apply delay between consecutive requests
 97 | 		const config = vscode.workspace.getConfiguration();
 98 | 		const delayMs = config.get<number>("oaicopilot.delay", 0);
 99 | 
100 | 		if (delayMs > 0 && this._lastRequestTime !== null) {
101 | 			const elapsed = Date.now() - this._lastRequestTime;
102 | 			if (elapsed < delayMs) {
103 | 				const remainingDelay = delayMs - elapsed;
104 | 				await new Promise<void>((resolve) => {
105 | 					const timeout = setTimeout(() => {
106 | 						clearTimeout(timeout);
107 | 						resolve();
108 | 					}, remainingDelay);
109 | 				});
110 | 			}
111 | 		}
112 | 
113 | 		const trackingProgress: Progress<LanguageModelResponsePart2> = {
114 | 			report: (part) => {
115 | 				try {
116 | 					progress.report(part);
117 | 				} catch (e) {
118 | 					console.error("[OAI Compatible Model Provider] Progress.report failed", {
119 | 						modelId: model.id,
120 | 						error: e instanceof Error ? { name: e.name, message: e.message } : String(e),
121 | 					});
122 | 				}
123 | 			},
124 | 		};
125 | 		try {
126 | 			// get model config from user settings
127 | 			const config = vscode.workspace.getConfiguration();
128 | 			const userModels = config.get<HFModelItem[]>("oaicopilot.models", []);
129 | 
130 | 			// 解析模型ID以处理配置ID
131 | 			const parsedModelId = parseModelId(model.id);
132 | 
133 | 			// 查找匹配的用户模型配置
134 | 			// 优先匹配同时具有相同基础ID和配置ID的模型
135 | 			// 如果没有配置ID，则匹配基础ID相同的模型
136 | 			let um: HFModelItem | undefined = userModels.find(
137 | 				(um) =>
138 | 					um.id === parsedModelId.baseId &&
139 | 					((parsedModelId.configId && um.configId === parsedModelId.configId) ||
140 | 						(!parsedModelId.configId && !um.configId))
141 | 			);
142 | 
143 | 			// 如果仍然没有找到模型，尝试查找任何匹配基础ID的模型（最宽松的匹配，用于向后兼容）
144 | 			if (!um) {
145 | 				um = userModels.find((um) => um.id === parsedModelId.baseId);
146 | 			}
147 | 
148 | 			// Prepare model configuration for message conversion
149 | 			const modelConfig = {
150 | 				includeReasoningInRequest: um?.include_reasoning_in_request ?? false,
151 | 			};
152 | 
153 | 			// Get API key for the model's provider
154 | 			const provider = um?.owned_by;
155 | 			const useGenericKey = !um?.baseUrl;
156 | 			const modelApiKey = await this.ensureApiKey(useGenericKey, provider);
157 | 			if (!modelApiKey) {
158 | 				throw new Error("OAI Compatible API key not found");
159 | 			}
160 | 
161 | 			// send chat request
162 | 			const BASE_URL = um?.baseUrl || config.get<string>("oaicopilot.baseUrl", "");
163 | 			if (!BASE_URL || !BASE_URL.startsWith("http")) {
164 | 				throw new Error(`Invalid base URL configuration.`);
165 | 			}
166 | 
167 | 			// get retry config
168 | 			const retryConfig = createRetryConfig();
169 | 
170 | 			// Check if using Ollama native API mode
171 | 			const apiMode = um?.apiMode ?? "openai";
172 | 
173 | 			// prepare headers with custom headers if specified
174 | 			const requestHeaders = this.prepareHeaders(modelApiKey, apiMode, um?.headers);
175 | 
176 | 			// console.debug("[OAI Compatible Model Provider] messages:", JSON.stringify(messages));
177 | 			if (apiMode === "ollama") {
178 | 				// Ollama native API mode
179 | 				const ollamaApi = new OllamaApi();
180 | 				const ollamaMessages = ollamaApi.convertMessages(messages, modelConfig);
181 | 
182 | 				let ollamaRequestBody: OllamaRequestBody = {
183 | 					model: parsedModelId.baseId,
184 | 					messages: ollamaMessages,
185 | 					stream: true,
186 | 				};
187 | 				ollamaRequestBody = ollamaApi.prepareRequestBody(ollamaRequestBody, um, options);
188 | 				// console.debug("[OAI Compatible Model Provider] RequestBody:", JSON.stringify(ollamaRequestBody));
189 | 
190 | 				// send Ollama chat request with retry
191 | 				const response = await executeWithRetry(async () => {
192 | 					const res = await fetch(`${BASE_URL.replace(/\/+$/, "")}/api/chat`, {
193 | 						method: "POST",
194 | 						headers: requestHeaders,
195 | 						body: JSON.stringify(ollamaRequestBody),
196 | 					});
197 | 
198 | 					if (!res.ok) {
199 | 						const errorText = await res.text();
200 | 						console.error("[Ollama Provider] Ollama API error response", errorText);
201 | 						throw new Error(`Ollama API error: [${res.status}] ${res.statusText}${errorText ? `\n${errorText}` : ""}`);
202 | 					}
203 | 
204 | 					return res;
205 | 				}, retryConfig);
206 | 
207 | 				if (!response.body) {
208 | 					throw new Error("No response body from Ollama API");
209 | 				}
210 | 				await ollamaApi.processStreamingResponse(response.body, trackingProgress, token);
211 | 			} else if (apiMode === "anthropic") {
212 | 				// Anthropic API mode
213 | 				const anthropicApi = new AnthropicApi();
214 | 				const anthropicMessages = anthropicApi.convertMessages(messages, modelConfig);
215 | 
216 | 				// requestBody
217 | 				let requestBody: AnthropicRequestBody = {
218 | 					model: parsedModelId.baseId,
219 | 					messages: anthropicMessages,
220 | 					stream: true,
221 | 				};
222 | 				requestBody = anthropicApi.prepareRequestBody(requestBody, um, options);
223 | 				// console.debug("[OAI Compatible Model Provider] RequestBody:", JSON.stringify(requestBody));
224 | 
225 | 				// send Anthropic chat request with retry
226 | 				const response = await executeWithRetry(async () => {
227 | 					const res = await fetch(`${BASE_URL.replace(/\/+$/, "")}/v1/messages`, {
228 | 						method: "POST",
229 | 						headers: requestHeaders,
230 | 						body: JSON.stringify(requestBody),
231 | 					});
232 | 
233 | 					if (!res.ok) {
234 | 						const errorText = await res.text();
235 | 						console.error("[Anthropic Provider] Anthropic API error response", errorText);
236 | 						throw new Error(
237 | 							`Anthropic API error: [${res.status}] ${res.statusText}${errorText ? `\n${errorText}` : ""}`
238 | 						);
239 | 					}
240 | 
241 | 					return res;
242 | 				}, retryConfig);
243 | 
244 | 				if (!response.body) {
245 | 					throw new Error("No response body from Anthropic API");
246 | 				}
247 | 				await anthropicApi.processStreamingResponse(response.body, trackingProgress, token);
248 | 			} else {
249 | 				// OpenAI compatible API mode (default)
250 | 				const openaiApi = new OpenaiApi();
251 | 				const openaiMessages = openaiApi.convertMessages(messages, modelConfig);
252 | 
253 | 				// requestBody
254 | 				let requestBody: Record<string, unknown> = {
255 | 					model: parsedModelId.baseId,
256 | 					messages: openaiMessages,
257 | 					stream: true,
258 | 					stream_options: { include_usage: true },
259 | 				};
260 | 				requestBody = openaiApi.prepareRequestBody(requestBody, um, options);
261 | 				// console.debug("[OAI Compatible Model Provider] RequestBody:", JSON.stringify(requestBody));
262 | 
263 | 				// send chat request with retry
264 | 				const response = await executeWithRetry(async () => {
265 | 					const res = await fetch(`${BASE_URL.replace(/\/+$/, "")}/chat/completions`, {
266 | 						method: "POST",
267 | 						headers: requestHeaders,
268 | 						body: JSON.stringify(requestBody),
269 | 					});
270 | 
271 | 					if (!res.ok) {
272 | 						const errorText = await res.text();
273 | 						console.error("[OAI Compatible Model Provider] OAI Compatible API error response", errorText);
274 | 						throw new Error(
275 | 							`OAI Compatible API error: [${res.status}] ${res.statusText}${errorText ? `\n${errorText}` : ""}`
276 | 						);
277 | 					}
278 | 
279 | 					return res;
280 | 				}, retryConfig);
281 | 
282 | 				if (!response.body) {
283 | 					throw new Error("No response body from OAI Compatible API");
284 | 				}
285 | 				await openaiApi.processStreamingResponse(response.body, trackingProgress, token);
286 | 			}
287 | 		} catch (err) {
288 | 			console.error("[OAI Compatible Model Provider] Chat request failed", {
289 | 				modelId: model.id,
290 | 				messageCount: messages.length,
291 | 				error: err instanceof Error ? { name: err.name, message: err.message } : String(err),
292 | 			});
293 | 			throw err;
294 | 		} finally {
295 | 			// Update last request time after successful completion
296 | 			this._lastRequestTime = Date.now();
297 | 		}
298 | 	}
299 | 
300 | 	/**
301 | 	 * Prepare headers for API request.
302 | 	 * @param apiKey The API key to use.
303 | 	 * @param apiMode The apiMode (affects header format).
304 | 	 * @param customHeaders Optional custom headers from model config.
305 | 	 * @returns Headers object.
306 | 	 */
307 | 	private prepareHeaders(
308 | 		apiKey: string,
309 | 		apiMode: string,
310 | 		customHeaders?: Record<string, string>
311 | 	): Record<string, string> {
312 | 		const headers: Record<string, string> = {
313 | 			"Content-Type": "application/json",
314 | 			"User-Agent": this.userAgent,
315 | 		};
316 | 
317 | 		// Provider-specific header formats
318 | 		if (apiMode === "anthropic") {
319 | 			headers["x-api-key"] = apiKey;
320 | 		} else if (apiMode === "ollama" && apiKey !== "ollama") {
321 | 			headers["Authorization"] = `Bearer ${apiKey}`;
322 | 		} else {
323 | 			headers["Authorization"] = `Bearer ${apiKey}`;
324 | 		}
325 | 
326 | 		// Merge custom headers
327 | 		if (customHeaders) {
328 | 			return { ...headers, ...customHeaders };
329 | 		}
330 | 
331 | 		return headers;
332 | 	}
333 | 
334 | 	/**
335 | 	 * Ensure an API key exists in SecretStorage, optionally prompting the user when not silent.
336 | 	 * @param useGenericKey If true, use generic API key.
337 | 	 * @param provider Optional provider name to get provider-specific API key.
338 | 	 */
339 | 	private async ensureApiKey(useGenericKey: boolean, provider?: string): Promise<string | undefined> {
340 | 		// Try to get provider-specific API key first
341 | 		let apiKey: string | undefined;
342 | 		if (provider && provider.trim() !== "") {
343 | 			const normalizedProvider = provider.toLowerCase();
344 | 			const providerKey = `oaicopilot.apiKey.${normalizedProvider}`;
345 | 			apiKey = await this.secrets.get(providerKey);
346 | 
347 | 			if (!apiKey && !useGenericKey) {
348 | 				const entered = await vscode.window.showInputBox({
349 | 					title: `OAI Compatible API Key for ${normalizedProvider}`,
350 | 					prompt: `Enter your OAI Compatible API key for ${normalizedProvider}`,
351 | 					ignoreFocusOut: true,
352 | 					password: true,
353 | 				});
354 | 				if (entered && entered.trim()) {
355 | 					apiKey = entered.trim();
356 | 					await this.secrets.store(providerKey, apiKey);
357 | 				}
358 | 			}
359 | 		}
360 | 
361 | 		// Fall back to generic API key
362 | 		if (!apiKey) {
363 | 			apiKey = await this.secrets.get("oaicopilot.apiKey");
364 | 		}
365 | 
366 | 		if (!apiKey && useGenericKey) {
367 | 			const entered = await vscode.window.showInputBox({
368 | 				title: "OAI Compatible API Key",
369 | 				prompt: "Enter your OAI Compatible API key",
370 | 				ignoreFocusOut: true,
371 | 				password: true,
372 | 			});
373 | 			if (entered && entered.trim()) {
374 | 				apiKey = entered.trim();
375 | 				await this.secrets.store("oaicopilot.apiKey", apiKey);
376 | 			}
377 | 		}
378 | 		return apiKey;
379 | 	}
380 | }
381 | 


--------------------------------------------------------------------------------
/src/openai/openaiApi.ts:
--------------------------------------------------------------------------------
  1 | import * as vscode from "vscode";
  2 | import {
  3 | 	CancellationToken,
  4 | 	LanguageModelChatRequestMessage,
  5 | 	ProvideLanguageModelChatResponseOptions,
  6 | 	LanguageModelResponsePart2,
  7 | 	Progress,
  8 | } from "vscode";
  9 | 
 10 | import type { HFModelItem, ReasoningConfig } from "../types";
 11 | 
 12 | import type {
 13 | 	OpenAIChatMessage,
 14 | 	OpenAIToolCall,
 15 | 	ChatMessageContent,
 16 | 	ReasoningDetail,
 17 | 	ReasoningSummaryDetail,
 18 | 	ReasoningTextDetail,
 19 | } from "./openaiTypes";
 20 | 
 21 | import {
 22 | 	isImageMimeType,
 23 | 	createDataUrl,
 24 | 	isToolResultPart,
 25 | 	collectToolResultText,
 26 | 	convertToolsToOpenAI,
 27 | 	mapRole,
 28 | } from "../utils";
 29 | 
 30 | import { CommonApi } from "../commonApi";
 31 | 
 32 | export class OpenaiApi extends CommonApi {
 33 | 	constructor() {
 34 | 		super();
 35 | 	}
 36 | 
 37 | 	/**
 38 | 	 * Convert VS Code chat request messages into OpenAI-compatible message objects.
 39 | 	 * @param messages The VS Code chat messages to convert.
 40 | 	 * @param modelConfig model configuration that may affect message conversion.
 41 | 	 * @returns OpenAI-compatible messages array.
 42 | 	 */
 43 | 	convertMessages(
 44 | 		messages: readonly LanguageModelChatRequestMessage[],
 45 | 		modelConfig: { includeReasoningInRequest: boolean }
 46 | 	): OpenAIChatMessage[] {
 47 | 		const out: OpenAIChatMessage[] = [];
 48 | 		for (const m of messages) {
 49 | 			const role = mapRole(m);
 50 | 			const textParts: string[] = [];
 51 | 			const imageParts: vscode.LanguageModelDataPart[] = [];
 52 | 			const toolCalls: OpenAIToolCall[] = [];
 53 | 			const toolResults: { callId: string; content: string }[] = [];
 54 | 			const reasoningParts: string[] = [];
 55 | 
 56 | 			for (const part of m.content ?? []) {
 57 | 				if (part instanceof vscode.LanguageModelTextPart) {
 58 | 					textParts.push(part.value);
 59 | 				} else if (part instanceof vscode.LanguageModelDataPart && isImageMimeType(part.mimeType)) {
 60 | 					imageParts.push(part);
 61 | 				} else if (part instanceof vscode.LanguageModelToolCallPart) {
 62 | 					const id = part.callId || `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
 63 | 					let args = "{}";
 64 | 					try {
 65 | 						args = JSON.stringify(part.input ?? {});
 66 | 					} catch {
 67 | 						args = "{}";
 68 | 					}
 69 | 					toolCalls.push({ id, type: "function", function: { name: part.name, arguments: args } });
 70 | 				} else if (isToolResultPart(part)) {
 71 | 					const callId = (part as { callId?: string }).callId ?? "";
 72 | 					const content = collectToolResultText(part as { content?: ReadonlyArray<unknown> });
 73 | 					toolResults.push({ callId, content });
 74 | 				} else if (part instanceof vscode.LanguageModelThinkingPart) {
 75 | 					// 处理思考内容
 76 | 					const content = Array.isArray(part.value) ? part.value.join("") : part.value;
 77 | 					reasoningParts.push(content);
 78 | 				}
 79 | 			}
 80 | 
 81 | 			// 构建 assistant 消息，包含思考内容
 82 | 			if (role === "assistant") {
 83 | 				const assistantMessage: OpenAIChatMessage = {
 84 | 					role: "assistant",
 85 | 					content: textParts.join("\n") || undefined,
 86 | 				};
 87 | 
 88 | 				// 添加思考内容（根据配置决定是否包含）
 89 | 				if (modelConfig.includeReasoningInRequest && reasoningParts.length > 0) {
 90 | 					assistantMessage.reasoning_content = reasoningParts.join("\n");
 91 | 				}
 92 | 
 93 | 				// 添加工具调用
 94 | 				if (toolCalls.length > 0) {
 95 | 					assistantMessage.tool_calls = toolCalls;
 96 | 				}
 97 | 
 98 | 				// 只有当消息有内容、思考内容或工具调用时才添加
 99 | 				if (assistantMessage.content || assistantMessage.reasoning_content || assistantMessage.tool_calls) {
100 | 					out.push(assistantMessage);
101 | 				}
102 | 			}
103 | 
104 | 			// 处理工具结果
105 | 			for (const tr of toolResults) {
106 | 				out.push({ role: "tool", tool_call_id: tr.callId, content: tr.content || "" });
107 | 			}
108 | 
109 | 			// 处理用户和系统消息
110 | 			if (textParts.length > 0 && role !== "assistant") {
111 | 				if (role === "user") {
112 | 					if (imageParts.length > 0) {
113 | 						// 多模态消息：包含图片、文本
114 | 						const contentArray: ChatMessageContent[] = [];
115 | 						contentArray.push({
116 | 							type: "text",
117 | 							text: textParts.join("\n"),
118 | 						});
119 | 
120 | 						// 添加图片内容
121 | 						for (const imagePart of imageParts) {
122 | 							const dataUrl = createDataUrl(imagePart);
123 | 							contentArray.push({
124 | 								type: "image_url",
125 | 								image_url: {
126 | 									url: dataUrl,
127 | 								},
128 | 							});
129 | 						}
130 | 						out.push({ role, content: contentArray });
131 | 					} else {
132 | 						// 纯文本消息
133 | 						out.push({ role, content: textParts.join("\n") });
134 | 					}
135 | 				} else if (role === "system") {
136 | 					out.push({ role, content: textParts.join("\n") });
137 | 				}
138 | 			}
139 | 		}
140 | 		return out;
141 | 	}
142 | 
143 | 	prepareRequestBody(
144 | 		rb: Record<string, unknown>,
145 | 		um: HFModelItem | undefined,
146 | 		options: ProvideLanguageModelChatResponseOptions
147 | 	): Record<string, unknown> {
148 | 		// temperature
149 | 		const oTemperature = options.modelOptions?.temperature ?? 0;
150 | 		const temperature = um?.temperature ?? oTemperature;
151 | 		rb.temperature = temperature;
152 | 		if (um && um.temperature === null) {
153 | 			delete rb.temperature;
154 | 		}
155 | 
156 | 		// top_p
157 | 		if (um?.top_p !== undefined && um.top_p !== null) {
158 | 			rb.top_p = um.top_p;
159 | 		}
160 | 
161 | 		// max_tokens
162 | 		if (um?.max_tokens !== undefined) {
163 | 			rb.max_tokens = um.max_tokens;
164 | 		}
165 | 
166 | 		// max_completion_tokens (OpenAI new standard parameter)
167 | 		if (um?.max_completion_tokens !== undefined) {
168 | 			rb.max_completion_tokens = um.max_completion_tokens;
169 | 		}
170 | 
171 | 		// OpenAI reasoning configuration
172 | 		if (um?.reasoning_effort !== undefined) {
173 | 			rb.reasoning_effort = um.reasoning_effort;
174 | 		}
175 | 
176 | 		// enable_thinking (non-OpenRouter only)
177 | 		const enableThinking = um?.enable_thinking;
178 | 		if (enableThinking !== undefined) {
179 | 			rb.enable_thinking = enableThinking;
180 | 
181 | 			if (um?.thinking_budget !== undefined) {
182 | 				rb.thinking_budget = um.thinking_budget;
183 | 			}
184 | 		}
185 | 
186 | 		// thinking (Zai provider)
187 | 		if (um?.thinking?.type !== undefined) {
188 | 			rb.thinking = {
189 | 				type: um.thinking.type,
190 | 			};
191 | 		}
192 | 
193 | 		// OpenRouter reasoning configuration
194 | 		if (um?.reasoning !== undefined) {
195 | 			const reasoningConfig: ReasoningConfig = um.reasoning as ReasoningConfig;
196 | 			if (reasoningConfig.enabled !== false) {
197 | 				const reasoningObj: Record<string, unknown> = {};
198 | 				const effort = reasoningConfig.effort;
199 | 				const maxTokensReasoning = reasoningConfig.max_tokens || 2000; // Default 2000 as per docs
200 | 				if (effort && effort !== "auto") {
201 | 					reasoningObj.effort = effort;
202 | 				} else {
203 | 					// If auto or unspecified, use max_tokens (Anthropic-style fallback)
204 | 					reasoningObj.max_tokens = maxTokensReasoning;
205 | 				}
206 | 				if (reasoningConfig.exclude !== undefined) {
207 | 					reasoningObj.exclude = reasoningConfig.exclude;
208 | 				}
209 | 				rb.reasoning = reasoningObj;
210 | 			}
211 | 		}
212 | 
213 | 		// stop
214 | 		if (options.modelOptions) {
215 | 			const mo = options.modelOptions as Record<string, unknown>;
216 | 			if (typeof mo.stop === "string" || Array.isArray(mo.stop)) {
217 | 				rb.stop = mo.stop;
218 | 			}
219 | 		}
220 | 
221 | 		// tools
222 | 		const toolConfig = convertToolsToOpenAI(options);
223 | 		if (toolConfig.tools) {
224 | 			rb.tools = toolConfig.tools;
225 | 		}
226 | 		if (toolConfig.tool_choice) {
227 | 			rb.tool_choice = toolConfig.tool_choice;
228 | 		}
229 | 
230 | 		// Configure user-defined additional parameters
231 | 		if (um?.top_k !== undefined) {
232 | 			rb.top_k = um.top_k;
233 | 		}
234 | 		if (um?.min_p !== undefined) {
235 | 			rb.min_p = um.min_p;
236 | 		}
237 | 		if (um?.frequency_penalty !== undefined) {
238 | 			rb.frequency_penalty = um.frequency_penalty;
239 | 		}
240 | 		if (um?.presence_penalty !== undefined) {
241 | 			rb.presence_penalty = um.presence_penalty;
242 | 		}
243 | 		if (um?.repetition_penalty !== undefined) {
244 | 			rb.repetition_penalty = um.repetition_penalty;
245 | 		}
246 | 
247 | 		// Process extra configuration parameters
248 | 		if (um?.extra && typeof um.extra === "object") {
249 | 			// Add all extra parameters directly to the request body
250 | 			for (const [key, value] of Object.entries(um.extra)) {
251 | 				if (value !== undefined) {
252 | 					rb[key] = value;
253 | 				}
254 | 			}
255 | 		}
256 | 
257 | 		return rb;
258 | 	}
259 | 
260 | 	/**
261 | 	 * Read and parse the HF Router streaming (SSE-like) response and report parts.
262 | 	 * @param responseBody The readable stream body.
263 | 	 * @param progress Progress reporter for streamed parts.
264 | 	 * @param token Cancellation token.
265 | 	 */
266 | 	async processStreamingResponse(
267 | 		responseBody: ReadableStream<Uint8Array>,
268 | 		progress: Progress<LanguageModelResponsePart2>,
269 | 		token: CancellationToken
270 | 	): Promise<void> {
271 | 		const reader = responseBody.getReader();
272 | 		const decoder = new TextDecoder();
273 | 		let buffer = "";
274 | 
275 | 		try {
276 | 			while (true) {
277 | 				if (token.isCancellationRequested) {
278 | 					break;
279 | 				}
280 | 
281 | 				const { done, value } = await reader.read();
282 | 				if (done) {
283 | 					break;
284 | 				}
285 | 
286 | 				buffer += decoder.decode(value, { stream: true });
287 | 				const lines = buffer.split("\n");
288 | 				buffer = lines.pop() || "";
289 | 
290 | 				for (const line of lines) {
291 | 					if (!line.startsWith("data:")) {
292 | 						continue;
293 | 					}
294 | 					const data = line.slice(5).trim();
295 | 					if (data === "[DONE]") {
296 | 						// Do not throw on [DONE]; any incomplete/empty buffers are ignored.
297 | 						await this.flushToolCallBuffers(progress, /*throwOnInvalid*/ false);
298 | 						continue;
299 | 					}
300 | 
301 | 					try {
302 | 						const parsed = JSON.parse(data);
303 | 						// console.debug("[OAI Compatible Model Provider] data:", JSON.stringify(parsed));
304 | 
305 | 						await this.processDelta(parsed, progress);
306 | 					} catch {
307 | 						// Silently ignore malformed SSE lines temporarily
308 | 					}
309 | 				}
310 | 			}
311 | 		} finally {
312 | 			reader.releaseLock();
313 | 			// If there's an active thinking sequence, end it first
314 | 			this.reportEndThinking(progress);
315 | 		}
316 | 	}
317 | 
318 | 	/**
319 | 	 * Handle a single streamed delta chunk, emitting text and tool call parts.
320 | 	 * @param delta Parsed SSE chunk from the Router.
321 | 	 * @param progress Progress reporter for parts.
322 | 	 */
323 | 	private async processDelta(
324 | 		delta: Record<string, unknown>,
325 | 		progress: Progress<LanguageModelResponsePart2>
326 | 	): Promise<boolean> {
327 | 		let emitted = false;
328 | 		const choice = (delta.choices as Record<string, unknown>[] | undefined)?.[0];
329 | 		if (!choice) {
330 | 			return false;
331 | 		}
332 | 
333 | 		const deltaObj = choice.delta as Record<string, unknown> | undefined;
334 | 
335 | 		// Process thinking content first (before regular text content)
336 | 		try {
337 | 			let maybeThinking =
338 | 				(choice as Record<string, unknown> | undefined)?.thinking ??
339 | 				(deltaObj as Record<string, unknown> | undefined)?.thinking ??
340 | 				(deltaObj as Record<string, unknown> | undefined)?.reasoning_content;
341 | 
342 | 			// OpenRouter/Claude reasoning_details array handling (new)
343 | 			const maybeReasoningDetails =
344 | 				(deltaObj as Record<string, unknown>)?.reasoning_details ??
345 | 				(choice as Record<string, unknown>)?.reasoning_details;
346 | 			if (maybeReasoningDetails && Array.isArray(maybeReasoningDetails) && maybeReasoningDetails.length > 0) {
347 | 				// Prioritize details array over simple reasoning
348 | 				const details: Array<ReasoningDetail> = maybeReasoningDetails as Array<ReasoningDetail>;
349 | 				// Sort by index to preserve order (in case out-of-order chunks)
350 | 				const sortedDetails = details.sort((a, b) => (a.index ?? 0) - (b.index ?? 0));
351 | 
352 | 				for (const detail of sortedDetails) {
353 | 					let extractedText = "";
354 | 					if (detail.type === "reasoning.summary") {
355 | 						extractedText = (detail as ReasoningSummaryDetail).summary;
356 | 					} else if (detail.type === "reasoning.text") {
357 | 						extractedText = (detail as ReasoningTextDetail).text;
358 | 					} else if (detail.type === "reasoning.encrypted") {
359 | 						extractedText = "[REDACTED]"; // As per docs
360 | 					} else {
361 | 						extractedText = JSON.stringify(detail); // Fallback for unknown
362 | 					}
363 | 
364 | 					if (extractedText) {
365 | 						this.bufferThinkingContent(extractedText, progress);
366 | 						emitted = true;
367 | 					}
368 | 				}
369 | 				maybeThinking = null; // Skip simple thinking if details present
370 | 			}
371 | 
372 | 			// Fallback to simple thinking if no details
373 | 			if (maybeThinking !== undefined && maybeThinking !== null) {
374 | 				let text = "";
375 | 				// let metadata: Record<string, unknown> | undefined;
376 | 				if (maybeThinking && typeof maybeThinking === "object") {
377 | 					const mt = maybeThinking as Record<string, unknown>;
378 | 					text = typeof mt["text"] === "string" ? (mt["text"] as string) : JSON.stringify(mt);
379 | 					// metadata = mt["metadata"] ? (mt["metadata"] as Record<string, unknown>) : undefined;
380 | 				} else if (typeof maybeThinking === "string") {
381 | 					text = maybeThinking;
382 | 				}
383 | 				if (text) {
384 | 					this.bufferThinkingContent(text, progress);
385 | 					emitted = true;
386 | 				}
387 | 			}
388 | 		} catch (e) {
389 | 			console.error("[OAI Compatible Model Provider] Failed to process thinking/reasoning_details:", e);
390 | 		}
391 | 
392 | 		if (deltaObj?.content) {
393 | 			const content = String(deltaObj.content);
394 | 
395 | 			// Process XML think blocks or text content (mutually exclusive)
396 | 			const xmlRes = this.processXmlThinkBlocks(content, progress);
397 | 			if (xmlRes.emittedAny) {
398 | 				emitted = true;
399 | 			} else {
400 | 				// If there's an active thinking sequence, end it first
401 | 				this.reportEndThinking(progress);
402 | 
403 | 				// Only process text content if no XML think blocks were emitted
404 | 				const res = this.processTextContent(content, progress);
405 | 				if (res.emittedText) {
406 | 					this._hasEmittedAssistantText = true;
407 | 				}
408 | 				if (res.emittedAny) {
409 | 					emitted = true;
410 | 				}
411 | 			}
412 | 		}
413 | 
414 | 		if (deltaObj?.tool_calls) {
415 | 			// If there's an active thinking sequence, end it first
416 | 			this.reportEndThinking(progress);
417 | 
418 | 			const toolCalls = deltaObj.tool_calls as Array<Record<string, unknown>>;
419 | 
420 | 			// SSEProcessor-like: if first tool call appears after text, emit a whitespace
421 | 			// to ensure any UI buffers/linkifiers are flushed without adding visible noise.
422 | 			if (!this._emittedBeginToolCallsHint && this._hasEmittedAssistantText && toolCalls.length > 0) {
423 | 				progress.report(new vscode.LanguageModelTextPart(" "));
424 | 				this._emittedBeginToolCallsHint = true;
425 | 			}
426 | 
427 | 			for (const tc of toolCalls) {
428 | 				const idx = (tc.index as number) ?? 0;
429 | 				// Ignore any further deltas for an index we've already completed
430 | 				if (this._completedToolCallIndices.has(idx)) {
431 | 					continue;
432 | 				}
433 | 				const buf = this._toolCallBuffers.get(idx) ?? { args: "" };
434 | 				if (tc.id && typeof tc.id === "string") {
435 | 					buf.id = tc.id as string;
436 | 				}
437 | 				const func = tc.function as Record<string, unknown> | undefined;
438 | 				if (func?.name && typeof func.name === "string") {
439 | 					buf.name = func.name as string;
440 | 				}
441 | 				if (typeof func?.arguments === "string") {
442 | 					buf.args += func.arguments as string;
443 | 				}
444 | 				this._toolCallBuffers.set(idx, buf);
445 | 
446 | 				// Emit immediately once arguments become valid JSON to avoid perceived hanging
447 | 				await this.tryEmitBufferedToolCall(idx, progress);
448 | 			}
449 | 		}
450 | 
451 | 		const finish = (choice.finish_reason as string | undefined) ?? undefined;
452 | 		if (finish === "tool_calls" || finish === "stop") {
453 | 			// On both 'tool_calls' and 'stop', emit any buffered calls and throw on invalid JSON
454 | 			await this.flushToolCallBuffers(progress, /*throwOnInvalid*/ true);
455 | 		}
456 | 		return emitted;
457 | 	}
458 | 
459 | 	/**
460 | 	 * Process streamed text content for inline tool-call control tokens and emit text/tool calls.
461 | 	 * Returns which parts were emitted for logging/flow control.
462 | 	 */
463 | 	private processTextContent(
464 | 		input: string,
465 | 		progress: Progress<LanguageModelResponsePart2>
466 | 	): { emittedText: boolean; emittedAny: boolean } {
467 | 		let emittedText = false;
468 | 		let emittedAny = false;
469 | 
470 | 		// Emit any visible text
471 | 		const textToEmit = input;
472 | 		if (textToEmit && textToEmit.length > 0) {
473 | 			progress.report(new vscode.LanguageModelTextPart(textToEmit));
474 | 			emittedText = true;
475 | 			emittedAny = true;
476 | 		}
477 | 
478 | 		return { emittedText, emittedAny };
479 | 	}
480 | 
481 | 	/**
482 | 	 * Process streamed text content for XML think blocks and emit thinking parts.
483 | 	 * Returns whether any thinking content was emitted.
484 | 	 */
485 | 	private processXmlThinkBlocks(
486 | 		input: string,
487 | 		progress: Progress<LanguageModelResponsePart2>
488 | 	): { emittedAny: boolean } {
489 | 		// If we've already attempted detection and found no THINK_START, skip processing
490 | 		if (this._xmlThinkDetectionAttempted && !this._xmlThinkActive) {
491 | 			return { emittedAny: false };
492 | 		}
493 | 
494 | 		const THINK_START = "<think>";
495 | 		const THINK_END = "</think>";
496 | 
497 | 		let data = input;
498 | 		let emittedAny = false;
499 | 
500 | 		while (data.length > 0) {
501 | 			if (!this._xmlThinkActive) {
502 | 				// Look for think start tag
503 | 				const startIdx = data.indexOf(THINK_START);
504 | 				if (startIdx === -1) {
505 | 					// No think start found, mark detection as attempted and skip future processing
506 | 					this._xmlThinkDetectionAttempted = true;
507 | 					data = "";
508 | 					break;
509 | 				}
510 | 
511 | 				// Found think start tag
512 | 				this._xmlThinkActive = true;
513 | 				// Generate a new thinking ID for this XML think block
514 | 				this._currentThinkingId = this.generateThinkingId();
515 | 
516 | 				// Skip the start tag and continue processing
517 | 				data = data.slice(startIdx + THINK_START.length);
518 | 				continue;
519 | 			}
520 | 
521 | 			// We are inside a think block, look for end tag
522 | 			const endIdx = data.indexOf(THINK_END);
523 | 			if (endIdx === -1) {
524 | 				// No end tag found, emit current chunk content as thinking part
525 | 				const thinkContent = data.trim();
526 | 				if (thinkContent) {
527 | 					progress.report(new vscode.LanguageModelThinkingPart(thinkContent, this._currentThinkingId || undefined));
528 | 					emittedAny = true;
529 | 				}
530 | 				data = "";
531 | 				break;
532 | 			}
533 | 
534 | 			// Found end tag, emit final thinking part
535 | 			const thinkContent = data.slice(0, endIdx);
536 | 			if (thinkContent) {
537 | 				progress.report(new vscode.LanguageModelThinkingPart(thinkContent, this._currentThinkingId || undefined));
538 | 				emittedAny = true;
539 | 			}
540 | 
541 | 			// Reset state and continue with remaining data
542 | 			this._xmlThinkActive = false;
543 | 			this._currentThinkingId = null;
544 | 			data = data.slice(endIdx + THINK_END.length);
545 | 		}
546 | 
547 | 		return { emittedAny };
548 | 	}
549 | }
550 | 


--------------------------------------------------------------------------------