├── .npmignore
├── .gitignore
├── src
    ├── vision-client.ts
    ├── utils
    │   ├── helpers.ts
    │   └── logger.ts
    ├── config.ts
    ├── qwen-client.ts
    ├── siliconflow-client.ts
    ├── volcengine-client.ts
    ├── zhipu-client.ts
    ├── image-processor.ts
    └── index.ts
├── tsconfig.json
├── LICENSE
├── .env.example
├── package.json
├── .github
    └── workflows
    │   └── release.yml
├── test
    ├── test-data-uri.ts
    ├── test-qwen.ts
    ├── test-deepseek-raw.ts
    └── test-local.ts
├── CHANGELOG.md
├── README.md
└── docs
    └── README_EN.md


/.npmignore:
--------------------------------------------------------------------------------
1 | src/
2 | docs/
3 | examples/
4 | *.log
5 | .DS_Store
6 | .env
7 | tsconfig.json
8 | PUBLISHING.md
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | node_modules/
 3 | 
 4 | # Build output
 5 | build/
 6 | 
 7 | # Logs
 8 | *.log
 9 | logs/
10 | .luma-mcp/
11 | 
12 | # Environment variables
13 | .env
14 | .env.local
15 | 
16 | # OS files
17 | .DS_Store
18 | Thumbs.db
19 | 
20 | # IDE
21 | .vscode/
22 | .idea/
23 | *.swp
24 | *.swo
25 | 
26 | # Test files
27 | test/image.png
28 | test/*.jpg
29 | test/*.jpeg
30 | 
31 | # Temporary files
32 | *.tmp
33 | *.temp
34 | mcp-server
35 | .claude


--------------------------------------------------------------------------------
/src/vision-client.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 视觉模型客户端统一接口
 3 |  */
 4 | 
 5 | export interface VisionClient {
 6 |   /**
 7 |    * 分析图片
 8 |    * @param imageDataUrl 图片 Data URL 或 URL
 9 |    * @param prompt 分析提示词
10 |    * @param enableThinking 是否启用思考模式（如果模型支持）
11 |    * @returns 分析结果文本
12 |    */
13 |   analyzeImage(
14 |     imageDataUrl: string,
15 |     prompt: string,
16 |     enableThinking?: boolean
17 |   ): Promise<string>;
18 | 
19 |   /**
20 |    * 获取模型名称
21 |    */
22 |   getModelName(): string;
23 | }
24 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "Node16",
 5 |     "moduleResolution": "Node16",
 6 |     "outDir": "./build",
 7 |     "rootDir": "./src",
 8 |     "strict": true,
 9 |     "esModuleInterop": true,
10 |     "skipLibCheck": true,
11 |     "forceConsistentCasingInFileNames": true,
12 |     "resolveJsonModule": true,
13 |     "declaration": true,
14 |     "declarationMap": true,
15 |     "sourceMap": true
16 |   },
17 |   "include": ["src/**/*"],
18 |   "exclude": ["node_modules", "build"]
19 | }
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Jochen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | # 模型提供商选择：zhipu | siliconflow | qwen | volcengine
 2 | MODEL_PROVIDER=zhipu
 3 | 
 4 | # ========== Zhipu AI (推荐 - GLM-4.6V) ==========
 5 | # 获取API Key: https://open.bigmodel.cn/
 6 | ZHIPU_API_KEY=your-zhipu-api-key-here
 7 | MODEL_NAME=glm-4.6v
 8 | 
 9 | # ========== SiliconFlow (免费 - DeepSeek-OCR) ==========
10 | # 获取API Key: https://siliconflow.cn/
11 | # SILICONFLOW_API_KEY=your-siliconflow-api-key
12 | # MODEL_NAME=deepseek-ai/DeepSeek-OCR
13 | 
14 | # ========== 阿里云 Qwen (Qwen3-VL-Flash) ==========
15 | # 获取API Key: https://dashscope.aliyun.com/
16 | # DASHSCOPE_API_KEY=your-dashscope-api-key
17 | # MODEL_NAME=qwen3-vl-flash
18 | 
19 | # ========== 火山方舟 Volcengine (Doubao-Seed-1.6) ==========
20 | # 获取API Key: https://console.volcengine.com/ark
21 | # VOLCENGINE_API_KEY=your-volcengine-api-key
22 | # MODEL_NAME=doubao-seed-1-6-flash-250828
23 | # 可选模型:
24 | # - doubao-seed-1-6-flash-250828 (性价比高，256k上下文)
25 | # - doubao-seed-1-6-vision-250815 (视觉优化，64k输出)
26 | # - doubao-seed-1-6-lite-251015 (轻量级)
27 | # 注意: 使用控制台中的实际模型ID
28 | 
29 | # ========== 通用参数 ==========
30 | MAX_TOKENS=16384
31 | TEMPERATURE=0.7
32 | TOP_P=0.7
33 | ENABLE_THINKING=true
34 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "luma-mcp",
 3 |   "version": "1.2.7",
 4 |   "description": "Multi-model vision understanding MCP server. Supports GLM-4.6V (Zhipu), DeepSeek-OCR (SiliconFlow - Free), Qwen3-VL-Flash (Aliyun), and Doubao-Seed-1.6 (Volcengine)",
 5 |   "type": "module",
 6 |   "bin": {
 7 |     "luma-mcp": "build/index.js"
 8 |   },
 9 |   "main": "./build/index.js",
10 |   "scripts": {
11 |     "build": "tsc",
12 |     "watch": "tsc --watch",
13 |     "prepare": "npm run build",
14 |     "test:local": "tsx test/test-local.ts"
15 |   },
16 |   "keywords": [
17 |     "mcp",
18 |     "vision",
19 |     "ai",
20 |     "glm-4.6v",
21 |     "zhipu",
22 |     "deepseek-ocr",
23 |     "siliconflow",
24 |     "qwen3-vl",
25 |     "aliyun",
26 |     "dashscope",
27 |     "doubao",
28 |     "volcengine",
29 |     "ark",
30 |     "ocr",
31 |     "free",
32 |     "image-understanding",
33 |     "multi-model"
34 |   ],
35 |   "author": "Jochen",
36 |   "license": "MIT",
37 |   "repository": {
38 |     "type": "git",
39 |     "url": "git+https://github.com/JochenYang/luma-mcp.git"
40 |   },
41 |   "bugs": {
42 |     "url": "https://github.com/JochenYang/luma-mcp/issues"
43 |   },
44 |   "homepage": "https://github.com/JochenYang/luma-mcp#readme",
45 |   "dependencies": {
46 |     "@modelcontextprotocol/sdk": "^1.0.4",
47 |     "axios": "^1.7.9",
48 |     "sharp": "^0.33.5",
49 |     "zod": "^3.25.76"
50 |   },
51 |   "devDependencies": {
52 |     "@types/node": "^22.10.2",
53 |     "tsx": "^4.20.6",
54 |     "typescript": "^5.7.2"
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Create Release
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*.*.*'
 7 | 
 8 | jobs:
 9 |   release:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       contents: write
13 |     
14 |     steps:
15 |       - name: Checkout code
16 |         uses: actions/checkout@v4
17 |         with:
18 |           fetch-depth: 0
19 | 
20 |       - name: Extract version from tag
21 |         id: version
22 |         run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT
23 | 
24 |       - name: Extract changelog for this version
25 |         id: changelog
26 |         run: |
27 |           VERSION=${{ steps.version.outputs.VERSION }}
28 |           echo "Extracting changelog for version $VERSION"
29 |           
30 |           # Extract changelog content between version headers
31 |           sed -n "/## \[${VERSION}\]/,/## \[/p" CHANGELOG.md | sed '$d' > release_notes.md
32 |           
33 |           # If empty, use a default message
34 |           if [ ! -s release_notes.md ]; then
35 |             echo "Release version ${VERSION}" > release_notes.md
36 |           fi
37 |           
38 |           cat release_notes.md
39 | 
40 |       - name: Create GitHub Release
41 |         uses: softprops/action-gh-release@v1
42 |         with:
43 |           tag_name: v${{ steps.version.outputs.VERSION }}
44 |           name: Release v${{ steps.version.outputs.VERSION }}
45 |           body_path: release_notes.md
46 |           draft: false
47 |           prerelease: false
48 |         env:
49 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
50 | 


--------------------------------------------------------------------------------
/src/utils/helpers.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 工具函数
 3 |  */
 4 | 
 5 | /**
 6 |  * 带重试机制的异步函数包装器
 7 |  */
 8 | export function withRetry<T>(
 9 |   fn: (...args: any[]) => Promise<T>,
10 |   maxRetries: number = 2,
11 |   initialDelay: number = 1000
12 | ): (...args: any[]) => Promise<T> {
13 |   return async (...args: any[]): Promise<T> => {
14 |     let lastError: Error;
15 | 
16 |     for (let attempt = 0; attempt <= maxRetries; attempt++) {
17 |       try {
18 |         return await fn(...args);
19 |       } catch (error) {
20 |         lastError = error instanceof Error ? error : new Error(String(error));
21 |         
22 |         if (attempt === maxRetries) {
23 |           throw lastError;
24 |         }
25 | 
26 |         // 指数退避
27 |         const delay = initialDelay * Math.pow(2, attempt);
28 |         await new Promise(resolve => setTimeout(resolve, delay));
29 |       }
30 |     }
31 | 
32 |     throw lastError!;
33 |   };
34 | }
35 | 
36 | /**
37 |  * 检查字符串是否为 URL
38 |  */
39 | export function isUrl(source: string): boolean {
40 |   try {
41 |     const url = new URL(source);
42 |     return url.protocol === 'http:' || url.protocol === 'https:';
43 |   } catch {
44 |     return false;
45 |   }
46 | }
47 | 
48 | /**
49 |  * 创建成功响应
50 |  */
51 | export function createSuccessResponse(data: string) {
52 |   return {
53 |     content: [{ type: 'text' as const, text: data }],
54 |   };
55 | }
56 | 
57 | /**
58 |  * 创建错误响应
59 |  */
60 | export function createErrorResponse(message: string) {
61 |   return {
62 |     content: [{ type: 'text' as const, text: `错误: ${message}` }],
63 |     isError: true,
64 |   };
65 | }
66 | 


--------------------------------------------------------------------------------
/src/config.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 配置管理模块
 3 |  * 从环境变量读取配置
 4 |  */
 5 | 
 6 | export type ModelProvider = 'zhipu' | 'siliconflow' | 'qwen' | 'volcengine';
 7 | 
 8 | export interface LumaConfig {
 9 |   provider: ModelProvider;
10 |   apiKey: string;
11 |   model: string;
12 |   maxTokens: number;
13 |   temperature: number;
14 |   topP: number;
15 |   enableThinking: boolean;
16 | }
17 | 
18 | /**
19 |  * 从环境变量加载配置
20 |  */
21 | export function loadConfig(): LumaConfig {
22 |   // 确定使用的模型提供商
23 |   const provider = (process.env.MODEL_PROVIDER?.toLowerCase() || 'zhipu') as ModelProvider;
24 |   
25 |   // 根据提供商获取 API Key
26 |   let apiKey: string | undefined;
27 |   let defaultModel: string;
28 |   
29 |   if (provider === 'siliconflow') {
30 |     apiKey = process.env.SILICONFLOW_API_KEY;
31 |     defaultModel = 'deepseek-ai/DeepSeek-OCR';
32 |     
33 |     if (!apiKey) {
34 |       throw new Error('SILICONFLOW_API_KEY environment variable is required when using SiliconFlow provider');
35 |     }
36 |   } else if (provider === 'qwen') {
37 |     apiKey = process.env.DASHSCOPE_API_KEY;
38 |     defaultModel = 'qwen3-vl-flash';
39 | 
40 |     if (!apiKey) {
41 |       throw new Error('DASHSCOPE_API_KEY environment variable is required when using Qwen provider');
42 |     }
43 |   } else if (provider === 'volcengine') {
44 |     apiKey = process.env.VOLCENGINE_API_KEY;
45 |     defaultModel = 'doubao-seed-1-6-flash-250828';
46 | 
47 |     if (!apiKey) {
48 |       throw new Error('VOLCENGINE_API_KEY environment variable is required when using Volcengine provider');
49 |     }
50 |   } else {
51 |     apiKey = process.env.ZHIPU_API_KEY;
52 |     defaultModel = 'glm-4.6v';
53 | 
54 |     if (!apiKey) {
55 |       throw new Error('ZHIPU_API_KEY environment variable is required when using Zhipu provider');
56 |     }
57 |   }
58 | 
59 |   return {
60 |     provider,
61 |     apiKey,
62 |     model: process.env.MODEL_NAME || defaultModel,
63 |     maxTokens: parseInt(process.env.MAX_TOKENS || '16384', 10),
64 |     temperature: parseFloat(process.env.TEMPERATURE || '0.7'),
65 |     topP: parseFloat(process.env.TOP_P || '0.7'),
66 |     enableThinking: process.env.ENABLE_THINKING !== 'false',
67 |   };
68 | }
69 | 


--------------------------------------------------------------------------------
/src/utils/logger.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 日志工具
 3 |  * 将日志输出到 stderr，避免污染 MCP 的 stdout JSON 通信
 4 |  */
 5 | 
 6 | import { writeFileSync, appendFileSync, mkdirSync } from 'fs';
 7 | import { dirname, join } from 'path';
 8 | import { homedir } from 'os';
 9 | 
10 | class Logger {
11 |   private logFilePath?: string;
12 | 
13 |   constructor() {
14 |     this.initLogFile();
15 |   }
16 | 
17 |   private initLogFile() {
18 |     try {
19 |       const homeDir = homedir();
20 |       const now = new Date();
21 |       const dateStr = now.toISOString().split('T')[0]; // YYYY-MM-DD
22 |       const logDir = join(homeDir, '.luma-mcp');
23 |       
24 |       mkdirSync(logDir, { recursive: true });
25 |       this.logFilePath = join(logDir, `luma-mcp-${dateStr}.log`);
26 |     } catch (error) {
27 |       // 如果无法创建日志文件，只输出到 stderr
28 |       process.stderr.write(`[WARN] Failed to initialize log file: ${error}\n`);
29 |     }
30 |   }
31 | 
32 |   private write(level: string, message: string, ...args: any[]) {
33 |     const timestamp = new Date().toISOString();
34 |     const argsStr = args.length > 0 ? ` ${JSON.stringify(args)}` : '';
35 |     const logMessage = `[${timestamp}] ${level.toUpperCase()}: ${message}${argsStr}`;
36 | 
37 |     // 输出到 stderr
38 |     process.stderr.write(logMessage + '\n');
39 | 
40 |     // 写入日志文件
41 |     if (this.logFilePath) {
42 |       try {
43 |         appendFileSync(this.logFilePath, logMessage + '\n');
44 |       } catch {
45 |         // 忽略文件写入错误
46 |       }
47 |     }
48 |   }
49 | 
50 |   info(message: string, ...args: any[]) {
51 |     this.write('info', message, ...args);
52 |   }
53 | 
54 |   error(message: string, ...args: any[]) {
55 |     this.write('error', message, ...args);
56 |   }
57 | 
58 |   warn(message: string, ...args: any[]) {
59 |     this.write('warn', message, ...args);
60 |   }
61 | 
62 |   debug(message: string, ...args: any[]) {
63 |     this.write('debug', message, ...args);
64 |   }
65 | }
66 | 
67 | export const logger = new Logger();
68 | 
69 | /**
70 |  * 重定向 console 到 logger，避免污染 stdout
71 |  */
72 | export function setupConsoleRedirection() {
73 |   console.log = logger.info.bind(logger);
74 |   console.info = logger.info.bind(logger);
75 |   console.error = logger.error.bind(logger);
76 |   console.warn = logger.warn.bind(logger);
77 |   console.debug = logger.debug.bind(logger);
78 | }
79 | 


--------------------------------------------------------------------------------
/test/test-data-uri.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 测试 Data URI 支持
 3 |  */
 4 | 
 5 | import { validateImageSource, imageToBase64 } from '../src/image-processor.js';
 6 | 
 7 | // 一个有效的 1x1 像素 PNG 图片的 Data URI
 8 | const validDataUri = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
 9 | 
10 | // 无效的 Data URI（不支持的格式）
11 | const invalidDataUri = 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIxMDAiIGhlaWdodD0iMTAwIj48Y2lyY2xlIGN4PSI1MCIgY3k9IjUwIiByPSI0MCIgc3R5bGU9ImZpbGw6I2ZmZiIgLz48L3N2Zz4=';
12 | 
13 | async function testDataUri() {
14 |   console.log('🧪 测试 Data URI 支持\n');
15 | 
16 |   // 测试 1: 验证有效的 Data URI
17 |   try {
18 |     console.log('测试 1: 验证有效的 PNG Data URI');
19 |     await validateImageSource(validDataUri);
20 |     console.log('✅ 通过：有效的 Data URI 验证成功\n');
21 |   } catch (error) {
22 |     console.log(`❌ 失败: ${error instanceof Error ? error.message : String(error)}\n`);
23 |   }
24 | 
25 |   // 测试 2: 验证无效的 Data URI（不支持的格式）
26 |   try {
27 |     console.log('测试 2: 验证不支持的格式 (SVG)');
28 |     await validateImageSource(invalidDataUri);
29 |     console.log('❌ 失败：应该抛出错误\n');
30 |   } catch (error) {
31 |     console.log(`✅ 通过：正确拒绝不支持的格式 - ${error instanceof Error ? error.message : String(error)}\n`);
32 |   }
33 | 
34 |   // 测试 3: Data URI 转换（应该直接返回）
35 |   try {
36 |     console.log('测试 3: Data URI 转换');
37 |     const result = await imageToBase64(validDataUri);
38 |     if (result === validDataUri) {
39 |       console.log('✅ 通过：Data URI 正确传递（未修改）\n');
40 |     } else {
41 |       console.log('❌ 失败：Data URI 被修改了\n');
42 |     }
43 |   } catch (error) {
44 |     console.log(`❌ 失败: ${error instanceof Error ? error.message : String(error)}\n`);
45 |   }
46 | 
47 |   // 测试 4: 大小验证（创建一个超过10MB的Data URI）
48 |   try {
49 |     console.log('测试 4: 验证大小限制 (>10MB)');
50 |     // 创建一个约 15MB 的 base64 字符串（20MB * 3/4 = 15MB）
51 |     const largeBase64 = 'A'.repeat(20 * 1024 * 1024);
52 |     const largeDataUri = `data:image/png;base64,${largeBase64}`;
53 |     await validateImageSource(largeDataUri);
54 |     console.log('❌ 失败：应该拒绝过大的文件\n');
55 |   } catch (error) {
56 |     console.log(`✅ 通过：正确拒绝超大文件 - ${error instanceof Error ? error.message : String(error)}\n`);
57 |   }
58 | 
59 |   console.log('==========================================');
60 |   console.log('✅ Data URI 测试完成！');
61 |   console.log('==========================================\n');
62 | }
63 | 
64 | testDataUri().catch(console.error);
65 | 


--------------------------------------------------------------------------------
/test/test-qwen.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Qwen 客户端测试
 3 |  * 测试阿里云通义千问VL视觉理解
 4 |  */
 5 | 
 6 | import { QwenClient } from '../src/qwen-client.js';
 7 | import { imageToBase64 } from '../src/image-processor.js';
 8 | 
 9 | async function testQwen() {
10 |   const apiKey = process.env.DASHSCOPE_API_KEY;
11 |   
12 |   if (!apiKey) {
13 |     console.error('❌ 错误: 需要设置 DASHSCOPE_API_KEY 环境变量');
14 |     console.log('设置方法:');
15 |     console.log('  macOS/Linux: export DASHSCOPE_API_KEY="your-api-key"');
16 |     console.log('  Windows: $env:DASHSCOPE_API_KEY="your-api-key"');
17 |     process.exit(1);
18 |   }
19 | 
20 |   // 获取图片路径
21 |   const imagePath = process.argv[2];
22 |   if (!imagePath) {
23 |     console.error('❌ 错误: 请提供图片路径');
24 |     console.log('用法: tsx test/test-qwen.ts <图片路径>');
25 |     console.log('示例: tsx test/test-qwen.ts ./test.png');
26 |     process.exit(1);
27 |   }
28 | 
29 |   console.log('🚀 开始测试 Qwen3-VL-Flash...\n');
30 | 
31 |   try {
32 |     // 1. 初始化客户端
33 |     console.log('1️⃣ 初始化 Qwen 客户端...');
34 |     const client = new QwenClient(
35 |       apiKey,
36 |       'qwen3-vl-flash',  // 使用高性价比的 Flash 版本
37 |       4096,
38 |       0.7
39 |     );
40 |     console.log(`✅ 客户端初始化成功: ${client.getModelName()}\n`);
41 | 
42 |     // 2. 读取图片
43 |     console.log('2️⃣ 读取图片...');
44 |     const imageData = await imageToBase64(imagePath);
45 |     console.log(`✅ 图片读取成功 (${imagePath})\n`);
46 | 
47 |     // 3. 测试基础分析
48 |     console.log('3️⃣ 测试基础分析（不启用思考模式）...');
49 |     const basicResult = await client.analyzeImage(
50 |       imageData,
51 |       '请详细分析这张图片的内容',
52 |       false
53 |     );
54 |     console.log('📊 基础分析结果:');
55 |     console.log(basicResult);
56 |     console.log('\n');
57 | 
58 |     // 4. 测试思考模式
59 |     console.log('4️⃣ 测试思考模式（enable_thinking=true）...');
60 |     const thinkingResult = await client.analyzeImage(
61 |       imageData,
62 |       '请详细分析这张图片的内容，包括所有细节',
63 |       true  // 启用思考模式
64 |     );
65 |     console.log('🧠 思考模式分析结果:');
66 |     console.log(thinkingResult);
67 |     console.log('\n');
68 | 
69 |     // 5. 测试 OCR
70 |     console.log('5️⃣ 测试 OCR 能力...');
71 |     const ocrResult = await client.analyzeImage(
72 |       imageData,
73 |       '识别图片中的所有文字',
74 |       false
75 |     );
76 |     console.log('📝 OCR 结果:');
77 |     console.log(ocrResult);
78 |     console.log('\n');
79 | 
80 |     console.log('✅ 所有测试完成！');
81 | 
82 |   } catch (error) {
83 |     console.error('❌ 测试失败:', error instanceof Error ? error.message : error);
84 |     process.exit(1);
85 |   }
86 | }
87 | 
88 | testQwen();
89 | 


--------------------------------------------------------------------------------
/src/qwen-client.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 阿里云通义千问VL客户端
 3 |  * 使用 OpenAI 兼容接口
 4 |  * API 文档: https://help.aliyun.com/zh/model-studio/vision
 5 |  */
 6 | 
 7 | import axios, { AxiosInstance } from 'axios';
 8 | import { VisionClient } from './vision-client.js';
 9 | import type { LumaConfig } from './config.js';
10 | 
11 | export class QwenClient implements VisionClient {
12 |   private client: AxiosInstance;
13 |   private apiKey: string;
14 |   private model: string;
15 |   private maxTokens: number;
16 |   private temperature: number;
17 | 
18 |   constructor(config: LumaConfig) {
19 |     this.apiKey = config.apiKey;
20 |     this.model = config.model;
21 |     this.maxTokens = config.maxTokens;
22 |     this.temperature = config.temperature;
23 | 
24 |     // 使用阿里云百炼的 OpenAI 兼容接口
25 |     this.client = axios.create({
26 |       baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
27 |       headers: {
28 |         'Authorization': `Bearer ${config.apiKey}`,
29 |         'Content-Type': 'application/json',
30 |       },
31 |       timeout: 180000, // 180秒超时
32 |     });
33 |   }
34 | 
35 |   async analyzeImage(imageDataUrl: string, prompt: string, enableThinking?: boolean): Promise<string> {
36 |     try {
37 |       // Qwen3-VL 支持思考模式，使用 extra_body 传递非标准参数
38 |       const requestBody: any = {
39 |         model: this.model,
40 |         messages: [
41 |           {
42 |             role: 'user',
43 |             content: [
44 |               {
45 |                 type: 'image_url',
46 |                 image_url: {
47 |                   url: imageDataUrl
48 |                 }
49 |               },
50 |               {
51 |                 type: 'text',
52 |                 text: prompt
53 |               }
54 |             ]
55 |           }
56 |         ],
57 |         max_tokens: this.maxTokens,
58 |         temperature: this.temperature,
59 |         stream: false
60 |       };
61 | 
62 |       // 如果启用思考模式，添加 extra_body 参数
63 |       if (enableThinking) {
64 |         requestBody.extra_body = {
65 |           enable_thinking: true,
66 |           thinking_budget: 81920  // 最大思考 Token 数
67 |         };
68 |       }
69 | 
70 |       const response = await this.client.post('/chat/completions', requestBody);
71 | 
72 |       if (!response.data?.choices?.[0]?.message?.content) {
73 |         throw new Error('Invalid response format from Qwen API');
74 |       }
75 | 
76 |       return response.data.choices[0].message.content;
77 | 
78 |     } catch (error) {
79 |       if (axios.isAxiosError(error)) {
80 |         const errorMessage = error.response?.data?.error?.message || error.message;
81 |         throw new Error(`Qwen API error: ${errorMessage}`);
82 |       }
83 |       throw error;
84 |     }
85 |   }
86 | 
87 |   getModelName(): string {
88 |     return `Qwen (${this.model})`;
89 |   }
90 | }
91 | 


--------------------------------------------------------------------------------
/test/test-deepseek-raw.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 直接测试 DeepSeek-OCR API（无任何包装）
  3 |  */
  4 | 
  5 | import axios from 'axios';
  6 | import * as fs from 'fs';
  7 | import * as path from 'path';
  8 | 
  9 | async function testDeepSeekOCR(imagePath: string) {
 10 |   console.log('\n🧪 测试 DeepSeek-OCR API（原始调用）\n');
 11 | 
 12 |   const apiKey = process.env.SILICONFLOW_API_KEY;
 13 |   
 14 |   if (!apiKey) {
 15 |     console.error('❌ 错误: 需要设置 SILICONFLOW_API_KEY 环境变量');
 16 |     console.error('示例: $env:SILICONFLOW_API_KEY="your-api-key"');
 17 |     process.exit(1);
 18 |   }
 19 |   
 20 |   // 读取图片并转为 base64
 21 |   const imageBuffer = fs.readFileSync(imagePath);
 22 |   const base64Image = imageBuffer.toString('base64');
 23 |   const mimeType = imagePath.endsWith('.png') ? 'image/png' : 'image/jpeg';
 24 |   const imageDataUrl = `data:${mimeType};base64,${base64Image}`;
 25 | 
 26 |   console.log(`📸 图片: ${imagePath}`);
 27 |   console.log(`📦 大小: ${(imageBuffer.length / 1024).toFixed(2)} KB\n`);
 28 | 
 29 |   // 测试不同的 prompt
 30 |   const prompts = [
 31 |     '识别图片中的所有文字',
 32 |     'OCR',
 33 |     'Extract all text from this image',
 34 |     'What do you see in this image?',
 35 |     '请详细描述这张图片'
 36 |   ];
 37 | 
 38 |   for (const prompt of prompts) {
 39 |     console.log(`\n🔍 测试 Prompt: "${prompt}"`);
 40 |     console.log('─'.repeat(50));
 41 | 
 42 |     try {
 43 |       const response = await axios.post(
 44 |         'https://api.siliconflow.cn/v1/chat/completions',
 45 |         {
 46 |           model: 'deepseek-ai/DeepSeek-OCR',
 47 |           messages: [
 48 |             {
 49 |               role: 'user',
 50 |               content: [
 51 |                 {
 52 |                   type: 'image_url',
 53 |                   image_url: {
 54 |                     url: imageDataUrl,
 55 |                   },
 56 |                 },
 57 |                 {
 58 |                   type: 'text',
 59 |                   text: prompt,
 60 |                 },
 61 |               ],
 62 |             },
 63 |           ],
 64 |           temperature: 0.7,
 65 |           max_tokens: 4096,
 66 |         },
 67 |         {
 68 |           headers: {
 69 |             'Authorization': `Bearer ${apiKey}`,
 70 |             'Content-Type': 'application/json',
 71 |           },
 72 |           timeout: 60000,
 73 |         }
 74 |       );
 75 | 
 76 |       const result = response.data.choices[0].message.content;
 77 |       const usage = response.data.usage;
 78 | 
 79 |       console.log(`✅ Tokens: ${usage.total_tokens} (prompt: ${usage.prompt_tokens}, completion: ${usage.completion_tokens})`);
 80 |       console.log(`📝 响应长度: ${result?.length || 0} 字符`);
 81 |       
 82 |       if (result && result.trim().length > 0) {
 83 |         console.log('\n📊 结果:');
 84 |         console.log('─'.repeat(50));
 85 |         console.log(result);
 86 |         console.log('─'.repeat(50));
 87 |         console.log('\n✅ 找到有效响应！');
 88 |         break;
 89 |       } else {
 90 |         console.log('❌ 空响应');
 91 |       }
 92 |     } catch (error: any) {
 93 |       console.log(`❌ 错误: ${error.message}`);
 94 |     }
 95 |   }
 96 | }
 97 | 
 98 | // 运行测试
 99 | const imagePath = path.join(process.cwd(), 'test.png');
100 | testDeepSeekOCR(imagePath).catch(console.error);
101 | 


--------------------------------------------------------------------------------
/test/test-local.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Luma MCP 本地测试脚本
  3 |  * 直接测试图片分析功能，不需要MCP客户端
  4 |  */
  5 | 
  6 | import { loadConfig } from '../src/config.js';
  7 | import type { VisionClient } from '../src/vision-client.js';
  8 | import { ZhipuClient } from '../src/zhipu-client.js';
  9 | import { SiliconFlowClient } from '../src/siliconflow-client.js';
 10 | import { imageToBase64, validateImageSource } from '../src/image-processor.js';
 11 | import { buildAnalysisPrompt } from '../src/prompts.js';
 12 | import { logger } from '../src/utils/logger.js';
 13 | 
 14 | async function testImageAnalysis(imagePath: string, question?: string) {
 15 |   console.log('\n==========================================');
 16 |   console.log('🧪 测试 Luma MCP 图片分析');
 17 |   console.log('==========================================\n');
 18 | 
 19 |   try {
 20 |     // 1. 加载配置
 21 |     console.log('📝 加载配置...');
 22 |     const config = loadConfig();
 23 |     console.log(`✅ 配置加载成功: 提供商 ${config.provider}, 模型 ${config.model}\n`);
 24 | 
 25 |     // 2. 验证图片
 26 |     console.log('🔍 验证图片来源...');
 27 |     await validateImageSource(imagePath);
 28 |     console.log(`✅ 图片验证通过: ${imagePath}\n`);
 29 | 
 30 |     // 3. 处理图片
 31 |     console.log('🖼️  处理图片...');
 32 |     const imageDataUrl = await imageToBase64(imagePath);
 33 |     const isUrl = imagePath.startsWith('http');
 34 |     console.log(`✅ 图片处理完成: ${isUrl ? 'URL' : 'Base64编码'}\n`);
 35 | 
 36 |     // 4. 构建提示词
 37 |     console.log('💬 构建提示词...');
 38 |     // DeepSeek-OCR 需要简洁 prompt
 39 |     const prompt = config.provider === 'siliconflow'
 40 |       ? (question || '请详细分析这张图片的内容')
 41 |       : buildAnalysisPrompt(question);
 42 |     console.log(`✅ 提示词: ${question || '通用描述'}\n`);
 43 | 
 44 |     // 5. 创建客户端并调用API
 45 |     const client: VisionClient = config.provider === 'siliconflow'
 46 |       ? new SiliconFlowClient(config)
 47 |       : new ZhipuClient(config);
 48 |     
 49 |     const modelName = config.provider === 'siliconflow' ? 'DeepSeek-OCR' : 'GLM-4.5V';
 50 |     console.log(`🤖 调用 ${modelName} API...`);
 51 |     const result = await client.analyzeImage(imageDataUrl, prompt);
 52 | 
 53 |     // 6. 显示结果
 54 |     console.log('\n==========================================');
 55 |     console.log('📊 分析结果');
 56 |     console.log('==========================================\n');
 57 |     console.log(result);
 58 |     console.log('\n==========================================');
 59 |     console.log('✅ 测试完成！');
 60 |     console.log('==========================================\n');
 61 | 
 62 |   } catch (error) {
 63 |     console.error('\n❌ 测试失败:');
 64 |     console.error(error instanceof Error ? error.message : String(error));
 65 |     process.exit(1);
 66 |   }
 67 | }
 68 | 
 69 | // 解析命令行参数
 70 | const args = process.argv.slice(2);
 71 | 
 72 | if (args.length === 0) {
 73 |   console.log(`
 74 | 使用方法:
 75 |   npm run test:local <图片路径或URL> [问题]
 76 | 
 77 | 示例:
 78 |   # 分析本地图片
 79 |   npm run test:local ./test.png
 80 | 
 81 |   # 分析本地图片并提问
 82 |   npm run test:local ./code-error.png "这段代码为什么报错？"
 83 | 
 84 |   # 分析远程图片
 85 |   npm run test:local https://example.com/image.jpg
 86 | 
 87 | 环境变量:
 88 |   # 使用智谱 GLM-4.5V
 89 |   ZHIPU_API_KEY=your-api-key
 90 |   
 91 |   # 使用硅基流动 DeepSeek-OCR
 92 |   MODEL_PROVIDER=siliconflow
 93 |   SILICONFLOW_API_KEY=your-api-key
 94 |   `);
 95 |   process.exit(1);
 96 | }
 97 | 
 98 | const imagePath = args[0];
 99 | const question = args.slice(1).join(' ') || undefined;
100 | 
101 | testImageAnalysis(imagePath, question);
102 | 


--------------------------------------------------------------------------------
/src/siliconflow-client.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 硅基流动 DeepSeek-OCR API 客户端
  3 |  * 基于 OpenAI 兼容 API
  4 |  */
  5 | 
  6 | import axios from 'axios';
  7 | import type { VisionClient } from './vision-client.js';
  8 | import type { LumaConfig } from './config.js';
  9 | import { logger } from './utils/logger.js';
 10 | 
 11 | interface SiliconFlowMessage {
 12 |   role: string;
 13 |   content: Array<{
 14 |     type: string;
 15 |     text?: string;
 16 |     image_url?: {
 17 |       url: string;
 18 |     };
 19 |   }>;
 20 | }
 21 | 
 22 | interface SiliconFlowRequest {
 23 |   model: string;
 24 |   messages: SiliconFlowMessage[];
 25 |   temperature?: number;
 26 |   max_tokens?: number;
 27 |   top_p?: number;
 28 |   stream?: boolean;
 29 | }
 30 | 
 31 | interface SiliconFlowResponse {
 32 |   id: string;
 33 |   object: string;
 34 |   created: number;
 35 |   model: string;
 36 |   choices: Array<{
 37 |     index: number;
 38 |     message: {
 39 |       role: string;
 40 |       content: string;
 41 |     };
 42 |     finish_reason: string;
 43 |   }>;
 44 |   usage: {
 45 |     prompt_tokens: number;
 46 |     completion_tokens: number;
 47 |     total_tokens: number;
 48 |   };
 49 | }
 50 | 
 51 | /**
 52 |  * 硅基流动 API 客户端
 53 |  */
 54 | export class SiliconFlowClient implements VisionClient {
 55 |   private apiKey: string;
 56 |   private model: string;
 57 |   private maxTokens: number;
 58 |   private temperature: number;
 59 |   private apiEndpoint = 'https://api.siliconflow.cn/v1/chat/completions';
 60 | 
 61 |   constructor(config: LumaConfig) {
 62 |     this.apiKey = config.apiKey;
 63 |     this.model = config.model;
 64 |     this.maxTokens = config.maxTokens;
 65 |     this.temperature = config.temperature;
 66 |   }
 67 | 
 68 |   /**
 69 |    * 分析图片
 70 |    */
 71 |   async analyzeImage(imageDataUrl: string, prompt: string, enableThinking?: boolean): Promise<string> {
 72 |     const requestBody: SiliconFlowRequest = {
 73 |       model: this.model,
 74 |       messages: [
 75 |         {
 76 |           role: 'user',
 77 |           content: [
 78 |             {
 79 |               type: 'image_url',
 80 |               image_url: {
 81 |                 url: imageDataUrl,
 82 |               },
 83 |             },
 84 |             {
 85 |               type: 'text',
 86 |               text: prompt,
 87 |             },
 88 |           ],
 89 |         },
 90 |       ],
 91 |       temperature: this.temperature,
 92 |       max_tokens: this.maxTokens,
 93 |       stream: false,
 94 |     };
 95 | 
 96 |     logger.info('Calling SiliconFlow DeepSeek-OCR API', { 
 97 |       model: this.model,
 98 |     });
 99 | 
100 |     try {
101 |       const response = await axios.post<SiliconFlowResponse>(
102 |         this.apiEndpoint,
103 |         requestBody,
104 |         {
105 |           headers: {
106 |             'Authorization': `Bearer ${this.apiKey}`,
107 |             'Content-Type': 'application/json',
108 |           },
109 |           timeout: 60000, // 60秒超时
110 |         }
111 |       );
112 | 
113 |       if (!response.data.choices || response.data.choices.length === 0) {
114 |         throw new Error('No response from DeepSeek-OCR');
115 |       }
116 | 
117 |       const result = response.data.choices[0].message.content;
118 |       const usage = response.data.usage;
119 | 
120 |       logger.info('SiliconFlow API call successful', { 
121 |         tokens: usage?.total_tokens || 0,
122 |         model: response.data.model
123 |       });
124 | 
125 |       return result;
126 |     } catch (error) {
127 |       logger.error('SiliconFlow API call failed', { 
128 |         error: error instanceof Error ? error.message : String(error) 
129 |       });
130 | 
131 |       if (axios.isAxiosError(error)) {
132 |         const message = error.response?.data?.error?.message || error.message;
133 |         const status = error.response?.status;
134 |         throw new Error(`SiliconFlow API error (${status || 'unknown'}): ${message}`);
135 |       }
136 |       throw error;
137 |     }
138 |   }
139 | 
140 |   /**
141 |    * 获取模型名称
142 |    */
143 |   getModelName(): string {
144 |     return `DeepSeek (${this.model})`;
145 |   }
146 | }
147 | 


--------------------------------------------------------------------------------
/src/volcengine-client.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 火山方舟 Volcengine Doubao 视觉模型客户端
  3 |  * 支持 Doubao-Seed-1.6 系列模型（flash、vision、lite）
  4 |  * 使用 Chat Completions API 格式
  5 |  */
  6 | 
  7 | import axios from "axios";
  8 | import type { VisionClient } from "./vision-client.js";
  9 | import type { LumaConfig } from "./config.js";
 10 | import { logger } from "./utils/logger.js";
 11 | 
 12 | interface VolcengineMessage {
 13 |   role: string;
 14 |   content: Array<{
 15 |     type: string;
 16 |     text?: string;
 17 |     image_url?: {
 18 |       url: string;
 19 |     };
 20 |   }>;
 21 | }
 22 | 
 23 | interface VolcengineRequest {
 24 |   model: string;
 25 |   messages: VolcengineMessage[];
 26 |   temperature?: number;
 27 |   max_tokens?: number;
 28 |   top_p?: number;
 29 |   stream?: boolean;
 30 | }
 31 | 
 32 | interface VolcengineResponse {
 33 |   id: string;
 34 |   object: string;
 35 |   created: number;
 36 |   model: string;
 37 |   choices: Array<{
 38 |     index: number;
 39 |     message: {
 40 |       role: string;
 41 |       content: string;
 42 |     };
 43 |     finish_reason: string;
 44 |   }>;
 45 |   usage: {
 46 |     prompt_tokens: number;
 47 |     completion_tokens: number;
 48 |     total_tokens: number;
 49 |   };
 50 | }
 51 | 
 52 | /**
 53 |  * 火山方舟客户端
 54 |  */
 55 | export class VolcengineClient implements VisionClient {
 56 |   private apiKey: string;
 57 |   private model: string;
 58 |   private maxTokens: number;
 59 |   private temperature: number;
 60 |   private apiEndpoint =
 61 |     "https://ark.cn-beijing.volces.com/api/v3/chat/completions";
 62 | 
 63 |   constructor(config: LumaConfig) {
 64 |     this.apiKey = config.apiKey;
 65 |     this.model = config.model;
 66 |     this.maxTokens = config.maxTokens;
 67 |     this.temperature = config.temperature;
 68 |   }
 69 | 
 70 |   /**
 71 |    * 分析图片
 72 |    */
 73 |   async analyzeImage(
 74 |     imageDataUrl: string,
 75 |     prompt: string,
 76 |     enableThinking?: boolean
 77 |   ): Promise<string> {
 78 |     const requestBody: VolcengineRequest = {
 79 |       model: this.model,
 80 |       messages: [
 81 |         {
 82 |           role: "user",
 83 |           content: [
 84 |             {
 85 |               type: "image_url",
 86 |               image_url: {
 87 |                 url: imageDataUrl,
 88 |               },
 89 |             },
 90 |             {
 91 |               type: "text",
 92 |               text: prompt,
 93 |             },
 94 |           ],
 95 |         },
 96 |       ],
 97 |       temperature: this.temperature,
 98 |       max_tokens: this.maxTokens,
 99 |       stream: false,
100 |     };
101 | 
102 |     logger.info("Calling Volcengine Doubao API", {
103 |       model: this.model,
104 |       thinking: !!enableThinking,
105 |     });
106 | 
107 |     try {
108 |       const response = await axios.post<VolcengineResponse>(
109 |         this.apiEndpoint,
110 |         requestBody,
111 |         {
112 |           headers: {
113 |             Authorization: `Bearer ${this.apiKey}`,
114 |             "Content-Type": "application/json",
115 |           },
116 |           timeout: 120000, // 120秒超时
117 |         }
118 |       );
119 | 
120 |       if (!response.data.choices || response.data.choices.length === 0) {
121 |         throw new Error("No response from Volcengine Doubao");
122 |       }
123 | 
124 |       const result = response.data.choices[0].message.content;
125 |       const usage = response.data.usage;
126 | 
127 |       logger.info("Volcengine Doubao API call successful", {
128 |         tokens: usage?.total_tokens || 0,
129 |         model: response.data.model,
130 |       });
131 | 
132 |       return result;
133 |     } catch (error) {
134 |       logger.error("Volcengine Doubao API call failed", {
135 |         error: error instanceof Error ? error.message : String(error),
136 |       });
137 | 
138 |       if (axios.isAxiosError(error)) {
139 |         const message = error.response?.data?.error?.message || error.message;
140 |         const status = error.response?.status;
141 |         throw new Error(
142 |           `Volcengine Doubao API error (${status || "unknown"}): ${message}`
143 |         );
144 |       }
145 |       throw error;
146 |     }
147 |   }
148 | 
149 |   /**
150 |    * 获取模型名称
151 |    */
152 |   getModelName(): string {
153 |     return `Doubao (${this.model})`;
154 |   }
155 | }
156 | 


--------------------------------------------------------------------------------
/src/zhipu-client.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 智谱 GLM-4.6V API 客户端
  3 |  */
  4 | 
  5 | import axios from "axios";
  6 | import type { VisionClient } from "./vision-client.js";
  7 | import type { LumaConfig } from "./config.js";
  8 | import { logger } from "./utils/logger.js";
  9 | 
 10 | interface ZhipuMessage {
 11 |   role: string;
 12 |   content: Array<{
 13 |     type: string;
 14 |     text?: string;
 15 |     image_url?: {
 16 |       url: string;
 17 |     };
 18 |   }>;
 19 | }
 20 | 
 21 | interface ZhipuRequest {
 22 |   model: string;
 23 |   messages: ZhipuMessage[];
 24 |   temperature: number;
 25 |   max_tokens: number;
 26 |   top_p: number;
 27 |   thinking?: {
 28 |     type: string;
 29 |   };
 30 | }
 31 | 
 32 | interface ZhipuResponse {
 33 |   id: string;
 34 |   created: number;
 35 |   model: string;
 36 |   choices: Array<{
 37 |     index: number;
 38 |     message: {
 39 |       role: string;
 40 |       content: string;
 41 |     };
 42 |     finish_reason: string;
 43 |   }>;
 44 |   usage: {
 45 |     prompt_tokens: number;
 46 |     completion_tokens: number;
 47 |     total_tokens: number;
 48 |   };
 49 | }
 50 | 
 51 | /**
 52 |  * 智谱 API 客户端
 53 |  */
 54 | export class ZhipuClient implements VisionClient {
 55 |   private apiKey: string;
 56 |   private model: string;
 57 |   private maxTokens: number;
 58 |   private temperature: number;
 59 |   private topP: number;
 60 |   private apiEndpoint = "https://open.bigmodel.cn/api/paas/v4/chat/completions";
 61 | 
 62 |   constructor(config: LumaConfig) {
 63 |     this.apiKey = config.apiKey;
 64 |     this.model = config.model;
 65 |     this.maxTokens = config.maxTokens;
 66 |     this.temperature = config.temperature;
 67 |     this.topP = config.topP;
 68 |   }
 69 | 
 70 |   /**
 71 |    * 分析图片
 72 |    */
 73 |   async analyzeImage(
 74 |     imageDataUrl: string,
 75 |     prompt: string,
 76 |     enableThinking?: boolean
 77 |   ): Promise<string> {
 78 |     const requestBody: ZhipuRequest = {
 79 |       model: this.model,
 80 |       messages: [
 81 |         {
 82 |           role: "user",
 83 |           content: [
 84 |             {
 85 |               type: "image_url",
 86 |               image_url: {
 87 |                 url: imageDataUrl,
 88 |               },
 89 |             },
 90 |             {
 91 |               type: "text",
 92 |               text: prompt,
 93 |             },
 94 |           ],
 95 |         },
 96 |       ],
 97 |       temperature: this.temperature,
 98 |       max_tokens: this.maxTokens,
 99 |       top_p: this.topP,
100 |       thinking: { type: "enabled" }, // 默认启用思考模式，提高分析准确性
101 |     };
102 | 
103 |     // 允许显式禁用 thinking（如需要更快速度）
104 |     if (enableThinking === false) {
105 |       delete requestBody.thinking;
106 |     }
107 | 
108 |     logger.info("Calling GLM-4.6V API", {
109 |       model: this.model,
110 |       thinking: !!requestBody.thinking,
111 |     });
112 | 
113 |     try {
114 |       const response = await axios.post<ZhipuResponse>(
115 |         this.apiEndpoint,
116 |         requestBody,
117 |         {
118 |           headers: {
119 |             Authorization: `Bearer ${this.apiKey}`,
120 |             "Content-Type": "application/json",
121 |           },
122 |           timeout: 60000, // 60秒超时
123 |         }
124 |       );
125 | 
126 |       if (!response.data.choices || response.data.choices.length === 0) {
127 |         throw new Error("No response from GLM-4.6V");
128 |       }
129 | 
130 |       const result = response.data.choices[0].message.content;
131 |       const usage = response.data.usage;
132 | 
133 |       logger.info("GLM-4.6V API call successful", {
134 |         tokens: usage?.total_tokens || 0,
135 |         model: response.data.model,
136 |       });
137 | 
138 |       return result;
139 |     } catch (error) {
140 |       logger.error("GLM-4.6V API call failed", {
141 |         error: error instanceof Error ? error.message : String(error),
142 |       });
143 | 
144 |       if (axios.isAxiosError(error)) {
145 |         const message = error.response?.data?.error?.message || error.message;
146 |         const status = error.response?.status;
147 |         throw new Error(
148 |           `GLM-4.6V API error (${status || "unknown"}): ${message}`
149 |         );
150 |       }
151 |       throw error;
152 |     }
153 |   }
154 | 
155 |   /**
156 |    * 获取模型名称
157 |    */
158 |   getModelName(): string {
159 |     return `GLM (${this.model})`;
160 |   }
161 | }
162 | 


--------------------------------------------------------------------------------
/src/image-processor.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * 图片处理工具
  3 |  * 负责读取、压缩和编码图片（支持本地文件和远程 URL）
  4 |  */
  5 | 
  6 | import { readFile, stat } from "fs/promises";
  7 | import sharp from "sharp";
  8 | import { isUrl } from "./utils/helpers.js";
  9 | import { logger } from "./utils/logger.js";
 10 | 
 11 | // 判断是否为 Data URI（data:image/png;base64,....）
 12 | function isDataUri(input: string): boolean {
 13 |   return (
 14 |     typeof input === "string" &&
 15 |     input.startsWith("data:") &&
 16 |     /;base64,/.test(input)
 17 |   );
 18 | }
 19 | 
 20 | // 从 Data URI 获取 mimeType
 21 | function getMimeFromDataUri(input: string): string | null {
 22 |   const match = input.match(/^data:([^;]+);base64,/i);
 23 |   return match ? match[1].toLowerCase() : null;
 24 | }
 25 | 
 26 | // 估算 Data URI 的原始字节大小（不含头部）
 27 | function estimateBytesFromDataUri(input: string): number {
 28 |   try {
 29 |     const base64 = input.split(",")[1] || "";
 30 |     // base64 长度 * 3/4，忽略 padding 近似即可
 31 |     return Math.floor((base64.length * 3) / 4);
 32 |   } catch {
 33 |     return 0;
 34 |   }
 35 | }
 36 | 
 37 | /**
 38 |  * 规范化本地图像路径（例如去掉前缀符号）
 39 |  * 某些客户端会使用 "@path/to/file" 作为文件引用，这里统一转换为真实路径
 40 |  */
 41 | function normalizeImageSourcePath(source: string): string {
 42 |   if (typeof source === "string" && source.startsWith("@")) {
 43 |     const normalized = source.slice(1);
 44 |     logger.debug("Normalized @-prefixed image path", {
 45 |       original: source,
 46 |       normalized,
 47 |     });
 48 |     return normalized;
 49 |   }
 50 |   return source;
 51 | }
 52 | 
 53 | /**
 54 |  * 验证图片来源（文件或URL）
 55 |  */
 56 | export async function validateImageSource(
 57 |   imageSource: string,
 58 |   maxSizeMB: number = 10
 59 | ): Promise<void> {
 60 |   // 先规范化可能带有前缀符号的本地路径（如 "@image.png"）
 61 |   const normalizedSource = normalizeImageSourcePath(imageSource);
 62 | 
 63 |   // 如果是 URL，直接返回
 64 |   if (isUrl(normalizedSource)) {
 65 |     logger.debug("Image source is URL, skipping validation");
 66 |     return;
 67 |   }
 68 | 
 69 |   // 验证本地文件
 70 |   try {
 71 |     const stats = await stat(normalizedSource);
 72 |     const fileSizeMB = stats.size / (1024 * 1024);
 73 | 
 74 |     if (fileSizeMB > maxSizeMB) {
 75 |       throw new Error(
 76 |         `Image file too large: ${fileSizeMB.toFixed(2)}MB (max: ${maxSizeMB}MB)`
 77 |       );
 78 |     }
 79 | 
 80 |     // 验证文件格式
 81 |     const ext = normalizedSource.toLowerCase().split(".").pop();
 82 |     const supportedFormats = ["jpg", "jpeg", "png", "webp", "gif"];
 83 | 
 84 |     if (!ext || !supportedFormats.includes(ext)) {
 85 |       throw new Error(
 86 |         `Unsupported image format: ${ext}. Supported: ${supportedFormats.join(
 87 |           ", "
 88 |         )}`
 89 |       );
 90 |     }
 91 |   } catch (error) {
 92 |     if ((error as any).code === "ENOENT") {
 93 |       throw new Error(`Image file not found: ${normalizedSource}`);
 94 |     }
 95 |     throw error;
 96 |   }
 97 | }
 98 | 
 99 | /**
100 |  * 将图片转换为 base64 data URL 或返回URL
101 |  */
102 | export async function imageToBase64(imagePath: string): Promise<string> {
103 |   try {
104 |     // 规范化本地路径（处理可能的前缀符号）
105 |     const normalizedPath = normalizeImageSourcePath(imagePath);
106 | 
107 |     // 如果是 URL，直接返回
108 |     if (isUrl(normalizedPath)) {
109 |       logger.info("Using remote image URL", { url: normalizedPath });
110 |       return normalizedPath;
111 |     }
112 | 
113 |     // 本地文件：读取并编码
114 |     let imageBuffer: Buffer = await readFile(normalizedPath);
115 | 
116 |     // 检查文件大小，如果超过 2MB 则压缩
117 |     if (imageBuffer.length > 2 * 1024 * 1024) {
118 |       logger.info("Compressing large image", {
119 |         originalSize: `${(imageBuffer.length / (1024 * 1024)).toFixed(2)}MB`,
120 |       });
121 |       imageBuffer = Buffer.from(await compressImage(imageBuffer));
122 |     }
123 | 
124 |     // 转换为 base64
125 |     const base64 = imageBuffer.toString("base64");
126 |     const mimeType = getMimeType(normalizedPath);
127 | 
128 |     return `data:${mimeType};base64,${base64}`;
129 |   } catch (error) {
130 |     throw new Error(
131 |       `Failed to process image: ${
132 |         error instanceof Error ? error.message : "Unknown error"
133 |       }`
134 |     );
135 |   }
136 | }
137 | 
138 | /**
139 |  * 压缩图片
140 |  */
141 | async function compressImage(imageBuffer: Buffer): Promise<Buffer> {
142 |   return sharp(imageBuffer)
143 |     .resize(2048, 2048, {
144 |       fit: "inside",
145 |       withoutEnlargement: true,
146 |     })
147 |     .jpeg({ quality: 85 })
148 |     .toBuffer();
149 | }
150 | 
151 | /**
152 |  * 根据文件扩展名获取 MIME 类型
153 |  */
154 | function getMimeType(filePath: string): string {
155 |   const ext = filePath.toLowerCase().split(".").pop();
156 | 
157 |   switch (ext) {
158 |     case "jpg":
159 |     case "jpeg":
160 |       return "image/jpeg";
161 |     case "png":
162 |       return "image/png";
163 |     case "webp":
164 |       return "image/webp";
165 |     case "gif":
166 |       return "image/gif";
167 |     default:
168 |       return "image/jpeg"; // 默认使用 jpeg
169 |   }
170 | }
171 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | 
  3 | /**
  4 |  * Luma MCP Server
  5 |  * 通用图像理解 MCP 服务器，支持多家视觉模型提供商
  6 |  */
  7 | 
  8 | // 第一件事：重定向console到stderr，避免污染MCP的stdout
  9 | import { setupConsoleRedirection, logger } from "./utils/logger.js";
 10 | setupConsoleRedirection();
 11 | 
 12 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 13 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 14 | import { z } from "zod";
 15 | 
 16 | import { loadConfig } from "./config.js";
 17 | import type { VisionClient } from "./vision-client.js";
 18 | import { ZhipuClient } from "./zhipu-client.js";
 19 | import { SiliconFlowClient } from "./siliconflow-client.js";
 20 | import { QwenClient } from "./qwen-client.js";
 21 | import { VolcengineClient } from "./volcengine-client.js";
 22 | import { imageToBase64, validateImageSource } from "./image-processor.js";
 23 | import {
 24 |   withRetry,
 25 |   createSuccessResponse,
 26 |   createErrorResponse,
 27 | } from "./utils/helpers.js";
 28 | 
 29 | /**
 30 |  * 创建 MCP 服务器
 31 |  */
 32 | async function createServer() {
 33 |   logger.info("Initializing Luma MCP Server");
 34 | 
 35 |   // 加载配置
 36 |   const config = loadConfig();
 37 | 
 38 |   // 根据配置选择模型客户端
 39 |   let visionClient: VisionClient;
 40 | 
 41 |   if (config.provider === "siliconflow") {
 42 |     visionClient = new SiliconFlowClient(config);
 43 |   } else if (config.provider === "qwen") {
 44 |     visionClient = new QwenClient(config);
 45 |   } else if (config.provider === "volcengine") {
 46 |     visionClient = new VolcengineClient(config);
 47 |   } else {
 48 |     visionClient = new ZhipuClient(config);
 49 |   }
 50 | 
 51 |   logger.info("Vision client initialized", {
 52 |     provider: config.provider,
 53 |     model: visionClient.getModelName(),
 54 |   });
 55 | 
 56 |   // 创建服务器 - 使用 McpServer
 57 |   const server = new McpServer(
 58 |     {
 59 |       name: "luma-mcp",
 60 |       version: "1.0.0",
 61 |     },
 62 |     {
 63 |       capabilities: {
 64 |         tools: {},
 65 |       },
 66 |     }
 67 |   );
 68 | 
 69 |   // 创建带重试的分析函数
 70 |   const analyzeWithRetry = withRetry(
 71 |     async (imageSource: string, prompt: string) => {
 72 |       // 1. 验证图片来源
 73 |       await validateImageSource(imageSource);
 74 | 
 75 |       // 2. 处理图片（读取或返回URL）
 76 |       const imageDataUrl = await imageToBase64(imageSource);
 77 | 
 78 |       // 3. 直接使用原始提示词（不进行包装或增强）
 79 |       const fullPrompt = prompt;
 80 | 
 81 |       // 4. 调用视觉模型分析图片
 82 |       return await visionClient.analyzeImage(imageDataUrl, fullPrompt);
 83 |     },
 84 |     2, // 最多重试2次
 85 |     1000 // 初始延补1秒
 86 |   );
 87 | 
 88 |   // 注册工具 - 使用 McpServer.tool() API
 89 |   server.tool(
 90 |     "analyze_image",
 91 |     "图像分析工具：支持三种使用方式：1) 用户粘贴图片时直接调用，无需手动指定路径 2) 指定本地图片路径，如./screenshot.png 3) 指定图片URL，如https://example.com/image.png。AI应根据用户问题生成专业的分析提示词（如用户问'网站布局有什么问题'，应生成'请详细分析这个网站界面的布局问题，包括视觉层次、对齐方式、间距、响应式设计等方面的问题'），然后传递提示词和图片进行调用。",
 92 |     {
 93 |       image_source: z
 94 |         .string()
 95 |         .describe(
 96 |           "要分析的图片来源：支持三种方式 1) 用户粘贴图片时由Claude Desktop自动提供路径 2) 本地文件路径，如./screenshot.png 3) HTTP(S)图片URL，如https://example.com/image.png（支持 PNG、JPG、JPEG、WebP、GIF，最大 10MB）"
 97 |         ),
 98 |       prompt: z
 99 |         .string()
100 |         .describe(
101 |           '分析提示词：AI根据用户问题生成的专业分析提示词。应该包含具体的分析要求和期望的输出格式。'
102 |         ),
103 |     },
104 |     async (params) => {
105 |       try {
106 |         // AI应该已经根据用户问题生成了合适的prompt
107 |         const prompt = params.prompt;
108 | 
109 |         logger.info("Analyzing image", {
110 |           source: params.image_source,
111 |           prompt,
112 |         });
113 | 
114 |         // 执行分析（带重试）
115 |         const result = await analyzeWithRetry(params.image_source, prompt);
116 | 
117 |         logger.info("Image analysis completed successfully");
118 |         return createSuccessResponse(result);
119 |       } catch (error) {
120 |         logger.error("Image analysis failed", {
121 |           error: error instanceof Error ? error.message : String(error),
122 |         });
123 | 
124 |         return createErrorResponse(
125 |           error instanceof Error ? error.message : "Unknown error"
126 |         );
127 |       }
128 |     }
129 |   );
130 | 
131 |   return server;
132 | }
133 | 
134 | /**
135 |  * 主函数
136 |  */
137 | async function main() {
138 |   try {
139 |     const server = await createServer();
140 |     const transport = new StdioServerTransport();
141 |     await server.connect(transport);
142 | 
143 |     logger.info("Luma MCP server started successfully on stdio");
144 |   } catch (error) {
145 |     logger.error("Failed to start Luma MCP server", {
146 |       error: error instanceof Error ? error.message : String(error),
147 |     });
148 |     process.exit(1);
149 |   }
150 | }
151 | 
152 | // 全局错误处理
153 | process.on("uncaughtException", (error) => {
154 |   logger.error("Uncaught exception", {
155 |     error: error.message,
156 |     stack: error.stack,
157 |   });
158 |   process.exit(1);
159 | });
160 | 
161 | process.on("unhandledRejection", (reason) => {
162 |   logger.error("Unhandled rejection", { reason });
163 |   process.exit(1);
164 | });
165 | 
166 | process.on("SIGINT", () => {
167 |   logger.info("Received SIGINT, shutting down gracefully");
168 |   process.exit(0);
169 | });
170 | 
171 | process.on("SIGTERM", () => {
172 |   logger.info("Received SIGTERM, shutting down gracefully");
173 |   process.exit(0);
174 | });
175 | 
176 | main();
177 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | 本项目的所有重大变更都将记录在此文件中。
  4 | 
  5 | ## [1.2.7] - 2025-12-17
  6 | 
  7 | ### Added
  8 | 
  9 | - 🆕 **火山方舟 Provider**: 新增第四个视觉模型提供商 - 火山方舟 Volcengine
 10 | - 🎯 **Doubao-Seed-1.6 系列**: 支持 flash、vision、lite 多种版本
 11 | - 🔧 **统一配置架构**: 客户端构造函数改为接受 LumaConfig 对象，实现配置集中管理
 12 | - 🖼️ **完整图片格式支持**: 火山方舟支持 base64 数据、URL 链接和本地文件
 13 | 
 14 | ### Changed
 15 | 
 16 | - 🏗️ **架构重构**: 三个现有客户端（Zhipu、SiliconFlow、Qwen）重构为统一配置对象模式
 17 | - 🗃️ **客户端优化**: 移除硬编码默认值，所有配置统一从环境变量读取
 18 | - 📝 **API 格式统一**: 火山方舟客户端改为使用 Chat Completions API 格式，与其他 provider 保持一致
 19 | - 📚 **文档完善**: 更新中英文 README，添加火山方舟配置示例和模型对比
 20 | 
 21 | ### Technical Details
 22 | 
 23 | - `src/config.ts`: 新增 volcengine provider 支持，添加 VOLCENGINE_API_KEY 环境变量
 24 | - `src/volcengine-client.ts`: 新文件，完整实现 VolcengineClient 类，支持 Chat Completions API
 25 | - `src/zhipu-client.ts`: 重构构造函数，移除硬编码参数，支持 LumaConfig
 26 | - `src/siliconflow-client.ts`: 重构构造函数，支持统一配置对象
 27 | - `src/qwen-client.ts`: 重构构造函数，支持统一配置对象
 28 | - `src/index.ts`: 添加 VolcengineClient 导入和实例化逻辑
 29 | - `.env.example`: 添加火山方舟配置示例和说明
 30 | - `README.md` & `docs/README_EN.md`: 新增火山方舟特性说明和配置示例
 31 | 
 32 | ### Provider Summary
 33 | 
 34 | 现在支持 4 个视觉模型提供商:
 35 | 
 36 | 1. **智谱 GLM-4.6V** (默认): 中文理解优秀，16384 tokens
 37 | 2. **硅基流动 DeepSeek-OCR**: 免费使用，OCR 能力强
 38 | 3. **阿里云 Qwen3-VL-Flash**: 速度快成本低，支持思考模式
 39 | 4. **火山方舟 Doubao-Seed-1.6**: 性价比高，256k 上下文，支持多种版本
 40 | 
 41 | ## [1.2.6] - 2025-12-16
 42 | 
 43 | ### Changed
 44 | 
 45 | - 🚀 **模型升级**: 更新智谱模型从 GLM-4.5V 升级至 GLM-4.6V，性能和理解能力提升
 46 | - 📈 **Token 限制提升**: 默认 maxTokens 从 8192 提升至 16384，支持更详细的分析输出
 47 | - 💡 **思考模式默认开启**: ENABLE_THINKING 默认为 true，提供更准确的分析结果
 48 | - 🧹 **代码清理**: 移除 prompts.ts 提示词模板文件，简化架构
 49 | - 🔧 **TypeScript 优化**: 清理未使用的类型导入，修复 TS6133 警告
 50 | - 📝 **文档完善**: 更新中英文 README，强化三种使用方式说明（粘贴图片、本地路径、URL）
 51 | 
 52 | ### Technical Details
 53 | 
 54 | - `src/config.ts`: 更新默认模型为 glm-4.6v，默认 maxTokens 改为 16384，enableThinking 默认为 true
 55 | - `src/zhipu-client.ts`: 更新模型引用，清理未使用导入
 56 | - `src/siliconflow-client.ts`: 清理未使用的类型导入
 57 | - `src/index.ts`: 简化 prompt 处理逻辑，直接使用原始提示词
 58 | - 删除 `src/prompts.ts`: 移除 buildAnalysisPrompt 函数
 59 | - README 更新: 模型信息、Token 配置、项目结构、思考模式配置
 60 | 
 61 | ## [1.2.4] - 2025-12-16 (Reverted)
 62 | 
 63 | ### Note
 64 | 
 65 | 此版本因代码回滚问题被回退，所有优化内容已整合至 v1.2.6
 66 | 
 67 | ## [1.2.3] - 2025-11-21
 68 | 
 69 | ### Changed
 70 | 
 71 | - 🧹 **代码清理**: 移除 Claude 特定调试注释和实验性代码
 72 | - 📝 **工具描述优化**: 简化和专业化工具说明，提升 AI 模型调用成功率
 73 | - 🔧 **路径处理通用化**: 重构 @ 前缀路径处理，移除平台特定命名
 74 | 
 75 | ### Technical Details
 76 | 
 77 | - 移除 Claude 资源读取相关的实验性代码
 78 | - 重命名 `stripAtPrefix()` 为 `normalizeImageSourcePath()`
 79 | - 清理所有客户端适配器中的调试日志和注释
 80 | - 统一代码风格和注释规范
 81 | 
 82 | ## [1.2.2] - 2025-11-20
 83 | 
 84 | ### Added
 85 | 
 86 | - ✨ **@ 路径支持**: 自动处理 Claude Code 的 @ 文件引用前缀，修复第一次调用失败的问题
 87 | - 📝 **智能 Prompt**: 通用请求自动添加详细指引，保证全面分析
 88 | 
 89 | ### Changed
 90 | 
 91 | - 🔧 **Prompt 统一**: 简化为单一通用 prompt，智能处理不同场景
 92 | - ✨ **表述优化**: 融合 Minimax 的经典表述，强调“不遗漏细节”和“完整提取”
 93 | - 📚 **文档更新**: 更新项目结构，添加 qwen-client.ts 和测试文件
 94 | 
 95 | ### Fixed
 96 | 
 97 | - 🐛 **@ 路径问题**: 修复 Claude Code 中 `@folder/image.png` 导致的路径错误
 98 | - 🐛 **编译错误**: 修复 image-processor.ts 中重复声明的变量
 99 | 
100 | ### Technical Details
101 | 
102 | - 新增 `stripAtPrefix()` 函数处理 Claude Code 的文件引用语法
103 | - 简化 `buildAnalysisPrompt()` 从两套逻辑到单一逻辑
104 | - 添加智能请求检测，自动补充详细分析指引
105 | 
106 | ## [1.2.1] - 2025-11-18
107 | 
108 | ### Changed
109 | 
110 | - 📝 **文档优化**: 精简 README，移除冲余配置文件路径说明
111 | - 📝 **更新日志简化**: 将 README 中的详细更新日志替换为 CHANGELOG.md 链接
112 | - ✨ **Qwen 测试示例**: 添加 Qwen3-VL-Flash 本地测试命令
113 | - 💰 **定价信息**: 添加阿里云通义千问定价参考链接
114 | - 📋 **模型对比**: 更新模型选择表，完善 Qwen3-VL-Flash 信息
115 | - 🔗 **API Key 获取**: 添加阿里云百炼 API Key 获取指南
116 | - 📚 **相关链接**: 新增阿里云百炼平台和 Qwen3-VL 文档链接
117 | - 🐛 **错误信息**: 优化 API 调用失败排查提示，包含阿里云账户
118 | 
119 | ### Fixed
120 | 
121 | - 🐛 **描述修正**: 修正 package.json 中模型名称为 qwen3-vl-flash
122 | - 📝 **注释精简**: 简化 prompts.ts 注释头
123 | 
124 | ## [1.2.0] - 2025-11-17
125 | 
126 | ### Added
127 | 
128 | - 🎉 **第三个视觉模型**: 新增阿里云通义千问 Qwen3-VL-Flash 支持
129 | - 💡 **思考模式**: Qwen3-VL-Flash 支持深度思考模式（enable_thinking），提升复杂场景分析准确性
130 | - ⚡ **高性价比**: Flash 版本速度更快、成本更低，适合大量使用
131 | - 🔌 **OpenAI 兼容**: 使用阿里云百炼的 OpenAI 兼容 API，统一接口设计
132 | - 🌐 **多地域支持**: 默认使用北京地域，支持新加坡地域配置
133 | 
134 | ### Changed
135 | 
136 | - ⚙️ 新增 `MODEL_PROVIDER=qwen` 和 `DASHSCOPE_API_KEY` 环境变量配置
137 | - 📝 更新所有文档（中英文），添加 Qwen3-VL-Flash 配置示例
138 | - 💰 默认使用 qwen3-vl-flash 模型，兹顾性能与成本
139 | - 🏗️ 重构客户端构造函数，统一参数传递方式
140 | 
141 | ### Technical Details
142 | 
143 | - 新增文件:
144 |   - `src/qwen-client.ts` - 阿里云通义千问 VL API 客户端实现
145 | - 修改文件:
146 |   - `src/config.ts` - 添加 'qwen' 提供商支持
147 |   - `src/zhipu-client.ts` - 重构构造函数，支持独立参数
148 |   - `src/siliconflow-client.ts` - 重构构造函数，支持独立参数
149 |   - `src/index.ts` - 添加 Qwen 客户端初始化逻辑
150 |   - `package.json` - 更新版本至 1.2.0，添加 qwen/aliyun/dashscope 关键词
151 | 
152 | ## [1.1.1] - 2025-11-13
153 | 
154 | ### Added
155 | 
156 | - 🖼️ **Data URI 支持**: 支持接收 base64 编码的图片数据 (data:image/png;base64,...)
157 | - 🚀 **为未来做准备**: 当 MCP 客户端支持时，可直接传递用户粘贴的图片
158 | 
159 | ### Changed
160 | 
161 | - 📝 更新工具描述，说明支持三种输入格式：本地路径、URL、Data URI
162 | - ✅ 新增 Data URI 格式验证（MIME 类型、大小限制）
163 | 
164 | ## [1.1.0] - 2025-11-13
165 | 
166 | ### Added
167 | 
168 | - 🎉 **多模型支持**: 新增硅基流动 DeepSeek-OCR 支持
169 | - 🆓 **免费选项**: DeepSeek-OCR 通过硅基流动提供完全免费的 OCR 服务
170 | - 📐 **统一接口**: 创建 VisionClient 接口，支持灵活扩展更多视觉模型
171 | - ⚙️ **灵活配置**: 通过 `MODEL_PROVIDER` 环境变量轻松切换模型
172 | 
173 | ### Changed
174 | 
175 | - 🔧 环境变量命名优化，支持通用配置（`MODEL_NAME`、`MAX_TOKENS` 等）
176 | - 📝 更新文档，提供双模型配置说明和选择建议
177 | - 🏗️ 重构代码结构，提升可维护性
178 | 
179 | ### Technical Details
180 | 
181 | - 新增文件:
182 |   - `src/vision-client.ts` - 视觉模型客户端统一接口
183 |   - `src/siliconflow-client.ts` - 硅基流动 API 客户端实现
184 |   - `.env.example` - 配置示例文件
185 | - 修改文件:
186 |   - `src/config.ts` - 支持多提供商配置
187 |   - `src/zhipu-client.ts` - 实现 VisionClient 接口
188 |   - `src/index.ts` - 根据配置动态选择客户端
189 |   - `README.md` - 完整的双模型使用文档
190 | 
191 | ## [1.0.3] - 2025-11-12
192 | 
193 | ### Features
194 | 
195 | - 基于智谱 GLM-4.5V 的视觉理解能力
196 | - 支持本地文件和远程 URL
197 | - 内置重试机制
198 | - 思考模式支持
199 | 
200 | ---
201 | 
202 | **模型对比**:
203 | 
204 | || 特性 | GLM-4.5V | DeepSeek-OCR | Qwen3-VL-Flash |
205 | ||----------|----------|--------------|----------------|
206 | || 提供商 | 智谱清言 | 硅基流动 | 阿里云百炼 |
207 | || 费用 | 收费 | **免费** | 收费 |
208 | || 中文理解 | 优秀 | 良好 | **优秀** |
209 | || OCR 能力 | 良好 | **优秀** | 优秀 |
210 | || 思考模式 | ✅ | ❌ | ✅ |
211 | || 速度/成本 | 中等 | 免费 | **快/低** |
212 | || 综合能力 | 良好 | OCR 专精 | **优秀** |
213 | || 3D 定位 | ❌ | ❌ | ✅ |
214 | 
215 | **推荐使用场景**:
216 | 
217 | - 需要 OCR/文字识别 → **DeepSeek-OCR** (免费)
218 | - 需要深度图片理解 → **Qwen3-VL-Flash** 或 **GLM-4.5V**
219 | - 需要思考模式 → **Qwen3-VL-Flash** 或 **GLM-4.5V**
220 | - 需要高性价比 → **Qwen3-VL-Flash** (速度快、成本低)
221 | - 需要 3D 定位/复杂分析 → **Qwen3-VL-Flash**
222 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Luma MCP
  2 | 
  3 | 多模型视觉理解 MCP 服务器，为不支持图片理解的 AI 助手提供视觉能力。
  4 | 
  5 | [English](./docs/README_EN.md) | 中文
  6 | 
  7 | ## 特性
  8 | 
  9 | - **多模型支持**: 支持四个视觉模型
 10 |   - GLM-4.6V（智谱清言）- 付费，中文理解优秀
 11 |   - DeepSeek-OCR（硅基流动）- **免费使用**，OCR 能力强
 12 |   - Qwen3-VL-Flash（阿里云通义千问）- 付费，速度快成本低，支持思考模式
 13 |   - Doubao-Seed-1.6（火山方舟）- 付费，性价比高，支持多种版本
 14 | - **简单设计**: 单一 `analyze_image` 工具处理所有图片分析任务
 15 | - **智能理解**: 自动识别代码、UI、错误等不同场景
 16 | - **全面支持**: 代码截图、界面设计、错误诊断、OCR 文字识别
 17 | - **标准 MCP 协议**: 无缝集成 Claude Desktop、Cline 等 MCP 客户端
 18 | - **URL 支持**: 支持本地文件和远程图片 URL
 19 | - **重试机制**: 内置指数退避重试，提高可靠性
 20 | 
 21 | ## 快速开始
 22 | 
 23 | ### 前置要求
 24 | 
 25 | - Node.js >= 18.0.0
 26 | - **选择一种模型**：
 27 |   - **方案 A**: 智谱 AI API Key ([获取地址](https://open.bigmodel.cn/)) - 中文理解优秀
 28 |   - **方案 B**: 硅基流动 API Key ([获取地址](https://cloud.siliconflow.cn/)) - **免费使用**，OCR 能力强
 29 |   - **方案 C**: 阿里云百炼 API Key ([获取地址](https://bailian.console.aliyun.com/)) - 速度快成本低，支持思考模式
 30 |   - **方案 D**: 火山方舟 API Key ([获取地址](https://console.volcengine.com/ark)) - 性价比高，支持多种版本
 31 | 
 32 | ### 安装
 33 | 
 34 | #### 方式 1: 本地开发（推荐用于测试）
 35 | 
 36 | ```bash
 37 | git clone https://github.com/JochenYang/luma-mcp.git
 38 | cd luma-mcp
 39 | npm install
 40 | npm run build
 41 | ```
 42 | 
 43 | #### 方式 2: 使用 npx（需要先发布到 npm）
 44 | 
 45 | ```bash
 46 | npx luma-mcp
 47 | ```
 48 | 
 49 | ### 配置
 50 | 
 51 | #### Claude Desktop
 52 | 
 53 | **方案 A: 使用智谱 GLM-4.6V**:
 54 | 
 55 | ```json
 56 | {
 57 |   "mcpServers": {
 58 |     "luma": {
 59 |       "command": "npx",
 60 |       "args": ["-y", "luma-mcp"],
 61 |       "env": {
 62 |         "ZHIPU_API_KEY": "your-zhipu-api-key"
 63 |       }
 64 |     }
 65 |   }
 66 | }
 67 | ```
 68 | 
 69 | **方案 B: 使用硅基流动 DeepSeek-OCR（免费）**:
 70 | 
 71 | ```json
 72 | {
 73 |   "mcpServers": {
 74 |     "luma": {
 75 |       "command": "npx",
 76 |       "args": ["-y", "luma-mcp"],
 77 |       "env": {
 78 |         "MODEL_PROVIDER": "siliconflow",
 79 |         "SILICONFLOW_API_KEY": "your-siliconflow-api-key"
 80 |       }
 81 |     }
 82 |   }
 83 | }
 84 | ```
 85 | 
 86 | **方案 C: 使用阿里云通义千问 Qwen3-VL-Flash**:
 87 | 
 88 | ```json
 89 | {
 90 |   "mcpServers": {
 91 |     "luma": {
 92 |       "command": "npx",
 93 |       "args": ["-y", "luma-mcp"],
 94 |       "env": {
 95 |         "MODEL_PROVIDER": "qwen",
 96 |         "DASHSCOPE_API_KEY": "your-dashscope-api-key"
 97 |       }
 98 |     }
 99 |   }
100 | }
101 | ```
102 | 
103 | **方案 D: 使用火山方舟 Doubao-Seed-1.6**:
104 | 
105 | ```json
106 | {
107 |   "mcpServers": {
108 |     "luma": {
109 |       "command": "npx",
110 |       "args": ["-y", "luma-mcp"],
111 |       "env": {
112 |         "MODEL_PROVIDER": "volcengine",
113 |         "VOLCENGINE_API_KEY": "your-volcengine-api-key",
114 |         "MODEL_NAME": "doubao-seed-1-6-flash-250828"
115 |       }
116 |     }
117 |   }
118 | }
119 | ```
120 | 
121 | **本地开发（智谱）**:
122 | 
123 | ```json
124 | {
125 |   "mcpServers": {
126 |     "luma": {
127 |       "command": "node",
128 |       "args": ["D:\\codes\\Luma_mcp\\build\\index.js"],
129 |       "env": {
130 |         "ZHIPU_API_KEY": "your-zhipu-api-key"
131 |       }
132 |     }
133 |   }
134 | }
135 | ```
136 | 
137 | **本地开发（硅基流动）**:
138 | 
139 | ```json
140 | {
141 |   "mcpServers": {
142 |     "luma": {
143 |       "command": "node",
144 |       "args": ["D:\\codes\\Luma_mcp\\build\\index.js"],
145 |       "env": {
146 |         "MODEL_PROVIDER": "siliconflow",
147 |         "SILICONFLOW_API_KEY": "your-siliconflow-api-key"
148 |       }
149 |     }
150 |   }
151 | }
152 | ```
153 | 
154 | 配置完成后重启 Claude Desktop。
155 | 
156 | #### Cline (VSCode)
157 | 
158 | 在项目根目录或 `.vscode/` 目录下创建 `mcp.json`
159 | 
160 | **方案 A: 使用智谱 GLM-4.6V**:
161 | 
162 | ```json
163 | {
164 |   "mcpServers": {
165 |     "luma": {
166 |       "command": "npx",
167 |       "args": ["-y", "luma-mcp"],
168 |       "env": {
169 |         "ZHIPU_API_KEY": "your-zhipu-api-key"
170 |       }
171 |     }
172 |   }
173 | }
174 | ```
175 | 
176 | **方案 B: 使用硅基流动 DeepSeek-OCR（免费）**:
177 | 
178 | ```json
179 | {
180 |   "mcpServers": {
181 |     "luma": {
182 |       "command": "npx",
183 |       "args": ["-y", "luma-mcp"],
184 |       "env": {
185 |         "MODEL_PROVIDER": "siliconflow",
186 |         "SILICONFLOW_API_KEY": "your-siliconflow-api-key"
187 |       }
188 |     }
189 |   }
190 | }
191 | ```
192 | 
193 | **方案 C: 使用阿里云通义千问 Qwen3-VL-Flash**:
194 | 
195 | ```json
196 | {
197 |   "mcpServers": {
198 |     "luma": {
199 |       "command": "npx",
200 |       "args": ["-y", "luma-mcp"],
201 |       "env": {
202 |         "MODEL_PROVIDER": "qwen",
203 |         "DASHSCOPE_API_KEY": "your-dashscope-api-key"
204 |       }
205 |     }
206 |   }
207 | }
208 | ```
209 | 
210 | #### Claude Code (命令行)
211 | 
212 | **使用智谱 GLM-4.6V**:
213 | 
214 | ```bash
215 | claude mcp add -s user luma-mcp --env ZHIPU_API_KEY=your-api-key -- npx -y luma-mcp
216 | ```
217 | 
218 | **使用硅基流动 DeepSeek-OCR（免费）**:
219 | 
220 | ```bash
221 | claude mcp add -s user luma-mcp --env MODEL_PROVIDER=siliconflow --env SILICONFLOW_API_KEY=your-api-key -- npx -y luma-mcp
222 | ```
223 | 
224 | **使用阿里云通义千问 Qwen3-VL-Flash**:
225 | 
226 | ```bash
227 | claude mcp add -s user luma-mcp --env MODEL_PROVIDER=qwen --env DASHSCOPE_API_KEY=your-api-key -- npx -y luma-mcp
228 | ```
229 | 
230 | #### 其他工具
231 | 
232 | 更多 MCP 客户端配置方法请参考[智谱官方文档](https://docs.bigmodel.cn/cn/coding-plan/mcp/vision-mcp-server#claude-code)
233 | 
234 | ## 使用方法
235 | 
236 | ### 重要提示
237 | 
238 | **MCP 工具调用机制**:
239 | 
240 | - MCP 工具需要 AI 模型**主动调用**才会执行
241 | - 如果使用的 AI 模型本身支持视觉（如 Claude 4.5 Sonnet），它会优先使用自己的视觉能力
242 | - Luma MCP 主要服务于**不支持视觉的模型**（如 GPT-4、Claude Opus 等文本模型）
243 | 
244 | **如何确保工具被调用**:
245 | 
246 | 1. 使用完整工具名：`使用 mcp__luma-mcp__analyze_image 工具分析这张图片`
247 | 2. 使用简化名称：`用 analyze_image 工具查看 ./screenshot.png`
248 | 3. 提供图片路径：`请用图片分析工具查看 ./screenshot.png 中的代码错误`
249 | 4. 明确提及服务器：`通过 luma-mcp 服务器分析这张图片`
250 | 
251 | **注意**: 直接在聊天框粘贴图片，非视觉模型不会自动调用 Luma，需要明确指示。
252 | 
253 | ### 在 Claude code 中使用
254 | 
255 | 配置完成后，在 Claude 对话中可以这样使用：
256 | 
257 | **推荐用法（明确指示）**:
258 | 
259 | ```
260 | 用户: 使用 Luma 分析 ./code-error.png，这段代码为什么报错？
261 | Claude: [调用 Luma 分析图片，返回详细分析]
262 | ```
263 | 
264 | **或提供图片路径**:
265 | 
266 | ```
267 | 用户: 请分析 https://example.com/screenshot.jpg 中的界面问题
268 | Claude: [自动调用 analyze_image 工具]
269 | ```
270 | 
271 | ### 本地测试
272 | 
273 | 不需要 MCP 客户端即可测试：
274 | 
275 | **测试智谱 GLM-4.6V**:
276 | 
277 | ```bash
278 | # 设置 API Key
279 | export ZHIPU_API_KEY="your-api-key"  # macOS/Linux
280 | $env:ZHIPU_API_KEY="your-api-key"    # Windows PowerShell
281 | 
282 | # 测试本地图片
283 | npm run test:local ./test.png
284 | ```
285 | 
286 | **测试硅基流动 DeepSeek-OCR**:
287 | 
288 | ```bash
289 | # 设置 API Key 和提供商
290 | export MODEL_PROVIDER=siliconflow
291 | export SILICONFLOW_API_KEY="your-api-key"  # macOS/Linux
292 | 
293 | $env:MODEL_PROVIDER="siliconflow"
294 | $env:SILICONFLOW_API_KEY="your-api-key"    # Windows PowerShell
295 | 
296 | # 测试本地图片
297 | npm run test:local ./test.png
298 | ```
299 | 
300 | **测试阿里云通义千问 Qwen3-VL-Flash**:
301 | 
302 | ```bash
303 | # 设置 API Key 和提供商
304 | export MODEL_PROVIDER=qwen
305 | export DASHSCOPE_API_KEY="your-api-key"  # macOS/Linux
306 | 
307 | $env:MODEL_PROVIDER="qwen"
308 | $env:DASHSCOPE_API_KEY="your-api-key"    # Windows PowerShell
309 | 
310 | # 测试本地图片
311 | npm run test:local ./test.png
312 | ```
313 | 
314 | **其他测试命令**:
315 | 
316 | ```bash
317 | # 测试并提问
318 | npm run test:local ./code-error.png "这段代码有什么问题？"
319 | 
320 | # 测试远程URL
321 | npm run test:local https://example.com/image.jpg
322 | ```
323 | 
324 | ## 工具说明
325 | 
326 | ### analyze_image
327 | 
328 | 分析图片内容的通用工具。
329 | 
330 | **参数**:
331 | 
332 | - `image_source` (必需): 图片来源，支持三种格式
333 |   - **本地文件**: 绝对路径或相对路径（例：`./image.png`, `C:\Users\...\image.jpg`）
334 |   - **远程 URL**: https:// 开头的 URL（例：`https://example.com/pic.jpg`）
335 |   - **Data URI**: Base64 编码的图片数据（例：`data:image/png;base64,iVBORw0KGg...`）
336 |   - 支持格式: JPG, PNG, WebP, GIF
337 | - `prompt` (必需): 分析指令或问题
338 | 
339 | **示例**:
340 | 
341 | ```typescript
342 | // 通用分析
343 | analyze_image({
344 |   image_source: "./screenshot.png",
345 |   prompt: "请详细分析这张图片的内容",
346 | });
347 | 
348 | // 代码分析
349 | analyze_image({
350 |   image_source: "./code-error.png",
351 |   prompt: "这段代码为什么报错？请提供修复建议",
352 | });
353 | 
354 | // UI 分析
355 | analyze_image({
356 |   image_source: "https://example.com/ui.png",
357 |   prompt: "分析这个界面的布局和可用性问题",
358 | });
359 | 
360 | // Data URI （当客户端支持时）
361 | analyze_image({
362 |   image_source: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...",
363 |   prompt: "识别图片中的所有文字",
364 | });
365 | ```
366 | 
367 | ## 环境变量
368 | 
369 | ### 通用配置
370 | 
371 | | 变量名            | 必需 | 默认值  | 说明                                           |
372 | | ----------------- | ---- | ------- | ---------------------------------------------- |
373 | | `MODEL_PROVIDER`  | 否   | `zhipu` | 模型提供商：`zhipu`、`siliconflow` 或 `qwen`   |
374 | | `MODEL_NAME`      | 否   | 见下文  | 模型名称（自动根据提供商选择）                 |
375 | | `MAX_TOKENS`      | 否   | `16384` | 最大生成 tokens                                |
376 | | `TEMPERATURE`     | 否   | `0.7`   | 温度参数 (0-1)                                 |
377 | | `TOP_P`           | 否   | `0.7`   | Top-p 参数 (0-1)                               |
378 | | `ENABLE_THINKING` | 否   | `true`  | 是否启用思考模式（GLM-4.6V 和 Qwen3-VL-Flash） |
379 | 
380 | ### 智谱 GLM-4.6V 专用
381 | 
382 | | 变量名          | 必需             | 默认值 | 说明                |
383 | | --------------- | ---------------- | ------ | ------------------- |
384 | | `ZHIPU_API_KEY` | 是（使用智谱时） | -      | 智谱 AI 的 API 密钥 |
385 | 
386 | 默认模型：`glm-4.6v`
387 | 
388 | ### 硅基流动 DeepSeek-OCR 专用
389 | 
390 | | 变量名                | 必需                 | 默认值 | 说明                |
391 | | --------------------- | -------------------- | ------ | ------------------- |
392 | | `SILICONFLOW_API_KEY` | 是（使用硅基流动时） | -      | 硅基流动的 API 密钥 |
393 | 
394 | 默认模型：`deepseek-ai/DeepSeek-OCR`
395 | 
396 | ### 阿里云通义千问 Qwen3-VL-Flash 专用
397 | 
398 | | 变量名              | 必需             | 默认值 | 说明                  |
399 | | ------------------- | ---------------- | ------ | --------------------- |
400 | | `DASHSCOPE_API_KEY` | 是（使用千问时） | -      | 阿里云百炼的 API 密钥 |
401 | 
402 | 默认模型：`qwen3-vl-flash`
403 | 
404 | **思考模式说明**:
405 | 
406 | - 默认开启，提高图片分析的准确性和详细程度
407 | - 如需关闭（提高速度、降低成本），请在配置文件中设置：
408 |   ```json
409 |   {
410 |     "mcpServers": {
411 |       "luma": {
412 |         "command": "npx",
413 |         "args": ["-y", "luma-mcp"],
414 |         "env": {
415 |           "ZHIPU_API_KEY": "your-api-key",
416 |           "ENABLE_THINKING": "false"
417 |         }
418 |       }
419 |     }
420 |   }
421 |   ```
422 | - 关闭后可节省 20-30% tokens 消耗，响应速度提升约 30%
423 | 
424 | ## 开发
425 | 
426 | ```bash
427 | # 开发模式（监听文件变化）
428 | npm run watch
429 | 
430 | # 构建
431 | npm run build
432 | 
433 | # 本地测试
434 | npm run test:local <图片路径> [问题]
435 | ```
436 | 
437 | ## 项目结构
438 | 
439 | ```
440 | luma-mcp/
441 | ├── src/
442 | │   ├── index.ts              # MCP 服务器入口
443 | │   ├── config.ts             # 配置管理（支持多模型）
444 | │   ├── vision-client.ts      # 视觉模型客户端接口
445 | │   ├── zhipu-client.ts       # GLM-4.6V API 客户端
446 | │   ├── siliconflow-client.ts # DeepSeek-OCR API 客户端
447 | │   ├── qwen-client.ts        # Qwen3-VL API 客户端
448 | │   ├── volcengine-client.ts  # Doubao-Seed-1.6 API 客户端
449 | │   ├── image-processor.ts    # 图片处理
450 | │   └── utils/
451 | │       ├── logger.ts         # 日志工具
452 | │       └── helpers.ts        # 工具函数
453 | ├── test/
454 | │   ├── test-local.ts         # 本地测试脚本
455 | │   ├── test-qwen.ts          # Qwen 测试脚本
456 | │   ├── test-deepseek-raw.ts  # DeepSeek 原始测试脚本
457 | │   └── test-data-uri.ts      # Data URI 测试脚本
458 | ├── docs/
459 | │   ├── design.md             # 设计文档
460 | │   ├── installation.md       # 安装指南
461 | │   └── README_EN.md          # 英文文档
462 | ├── build/                    # 编译输出
463 | └── package.json
464 | ```
465 | 
466 | ## 常见问题
467 | 
468 | ### 如何获取 API Key？
469 | 
470 | **智谱 GLM-4.6V**:
471 | 
472 | 1. 访问 [智谱开放平台](https://open.bigmodel.cn/)
473 | 2. 注册/登录账号
474 | 3. 进入控制台创建 API Key
475 | 4. 复制 API Key 到配置文件
476 | 
477 | **硅基流动 DeepSeek-OCR（免费）**:
478 | 
479 | 1. 访问 [硅基流动平台](https://cloud.siliconflow.cn/)
480 | 2. 注册/登录账号
481 | 3. 进入 API 管理创建 API Key
482 | 4. 复制 API Key 到配置文件
483 | 
484 | **阿里云通义千问 Qwen3-VL-Flash**:
485 | 
486 | 1. 访问 [阿里云百炼平台](https://bailian.console.aliyun.com/)
487 | 2. 注册/登录账号
488 | 3. 进入 API-KEY 管理创建 API Key
489 | 4. 复制 API Key 到配置文件
490 | 
491 | ### 支持哪些图片格式？
492 | 
493 | 支持 JPG、PNG、WebP、GIF 格式。建议使用 JPG 格式以获得更好的压缩率。
494 | 
495 | ### 什么是 Data URI？
496 | 
497 | Data URI 是一种将图片数据嵌入字符串的方式，格式为：
498 | 
499 | ```
500 | data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
501 | ```
502 | 
503 | **使用场景**：
504 | 
505 | - 当 MCP 客户端（如 Claude Desktop）支持时，可以直接传递用户粘贴的图片
506 | - 无需保存为临时文件，更加高效
507 | - 当前支持状态：**服务器已支持**，等待客户端实现
508 | 
509 | ### 图片大小限制？
510 | 
511 | - 最大文件大小: 10MB
512 | - 超过 2MB 的图片会自动压缩
513 | - 推荐分辨率: 800-2048 像素
514 | 
515 | ### 如何查看日志？
516 | 
517 | 日志文件位置: `~/.luma-mcp/luma-mcp-YYYY-MM-DD.log`
518 | 
519 | ### API 调用失败怎么办？
520 | 
521 | 1. 检查 API Key 是否正确
522 | 2. 确认账户余额充足（智谱/阿里云）
523 | 3. 检查网络连接
524 | 4. 查看日志文件了解详细错误信息
525 | 
526 | ### 成本如何？
527 | 
528 | **硅基流动 DeepSeek-OCR**: **完全免费**，无需付费！
529 | 
530 | **智谱 GLM-4.6V**: 定价请参考[智谱官方定价](https://open.bigmodel.cn/pricing)。
531 | 
532 | **阿里云通义千问 Qwen3-VL-Flash**: 定价请参考[阿里云百炼定价](https://help.aliyun.com/zh/model-studio/getting-started/models)。
533 | 
534 | 典型场景估算（已启用思考模式）：
535 | 
536 | - 简单图片理解: 500-1000 tokens
537 | - 代码截图分析: 1500-2500 tokens
538 | - 详细 UI 分析: 2000-3000 tokens
539 | 
540 | 关闭思考模式可节省约 20-30% tokens。如需关闭，请设置 `ENABLE_THINKING=false`。
541 | 
542 | ### 如何选择模型？
543 | 
544 | | 特性          | GLM-4.6V（智谱） | DeepSeek-OCR（硅基流动） | Qwen3-VL-Flash（阿里云） |
545 | | ------------- | ---------------- | ------------------------ | ------------------------ |
546 | | **费用**      | 收费             | **完全免费**             | 收费                     |
547 | | **中文理解**  | 优秀             | 良好                     | **优秀**                 |
548 | | **OCR 能力**  | 良好             | **优秀**                 | 优秀                     |
549 | | **思考模式**  | 支持             | 不支持                   | 支持                     |
550 | | **速度/成本** | 中等             | 免费                     | **快速/低成本**          |
551 | | **适用场景**  | 通用图片分析     | OCR、文字识别            | 快速分析、3D 定位        |
552 | 
553 | **推荐**:
554 | 
555 | - 需要 OCR 或文字识别：选择 **DeepSeek-OCR**（免费）
556 | - 需要快速低成本分析：选择 **Qwen3-VL-Flash**
557 | - 需要深度图片理解：选择 **GLM-4.6V**
558 | 
559 | ## 贡献
560 | 
561 | 欢迎提交 Issue 和 Pull Request！
562 | 
563 | ## 许可证
564 | 
565 | MIT License
566 | 
567 | ## 相关链接
568 | 
569 | - [智谱 AI 开放平台](https://open.bigmodel.cn/)
570 | - [GLM-4.6V 文档](https://docs.bigmodel.cn/cn/guide/models/vlm/glm-4.6v)
571 | - [硅基流动平台](https://cloud.siliconflow.cn/)
572 | - [DeepSeek-OCR 文档](https://docs.siliconflow.cn/cn/api-reference/chat-completions/chat-completions)
573 | - [阿里云百炼平台](https://bailian.console.aliyun.com/)
574 | - [Qwen3-VL 文档](https://help.aliyun.com/zh/model-studio/getting-started/models)
575 | - [MCP 协议文档](https://modelcontextprotocol.io/)
576 | 
577 | ## 更新日志
578 | 
579 | 更多更新历史请查看 [CHANGELOG.md](./CHANGELOG.md)
580 | 
581 | ## 作者
582 | 
583 | Jochen
584 | 
585 | ---
586 | 
587 | **注意**: 请勿在公开仓库中提交包含真实 API Key 的配置文件。
588 | 


--------------------------------------------------------------------------------
/docs/README_EN.md:
--------------------------------------------------------------------------------
  1 | # Luma MCP
  2 | 
  3 | Multi-model vision understanding MCP server, providing visual capabilities to AI assistants that don't natively support image understanding.
  4 | 
  5 | English | [中文](../README.md)
  6 | 
  7 | ## Features
  8 | 
  9 | - **Multi-Model Support**: Supports four vision models
 10 |   - GLM-4.6V (Zhipu) - Paid, excellent Chinese understanding
 11 |   - DeepSeek-OCR (SiliconFlow) - **Free to use**, strong OCR capability
 12 |   - Qwen3-VL-Flash (Aliyun) - Paid, fast and cost-effective, supports thinking mode
 13 |   - Doubao-Seed-1.6 (Volcengine) - Paid, cost-effective, supports multiple versions
 14 | - **Simple Design**: Single `analyze_image` tool handles all image analysis tasks
 15 | - **Smart Understanding**: Automatically recognizes different scenarios (code, UI, errors, etc.)
 16 | - **Comprehensive Support**: Code screenshots, UI design, error diagnosis, OCR text recognition
 17 | - **Standard MCP Protocol**: Seamless integration with Claude Desktop, Cline, and other MCP clients
 18 | - **URL Support**: Handles both local files and remote image URLs
 19 | - **Retry Mechanism**: Built-in exponential backoff retry for reliability
 20 | 
 21 | ## Quick Start
 22 | 
 23 | ### Prerequisites
 24 | 
 25 | - Node.js >= 18.0.0
 26 | - **Choose one model**:
 27 |   - **Option A**: Zhipu AI API Key ([Get it here](https://open.bigmodel.cn/)) - Excellent Chinese understanding
 28 |   - **Option B**: SiliconFlow API Key ([Get it here](https://cloud.siliconflow.cn/)) - **Free to use**, Strong OCR capability
 29 |   - **Option C**: Aliyun Bailian API Key ([Get it here](https://bailian.console.aliyun.com/)) - Fast and cost-effective, supports thinking mode
 30 |   - **Option D**: Volcengine API Key ([Get it here](https://console.volcengine.com/ark)) - Cost-effective, supports multiple versions
 31 | 
 32 | ### Installation
 33 | 
 34 | #### Method 1: Local Development (Recommended for testing)
 35 | 
 36 | ```bash
 37 | git clone https://github.com/yourusername/luma-mcp.git
 38 | cd luma-mcp
 39 | npm install
 40 | npm run build
 41 | ```
 42 | 
 43 | #### Method 2: Using npx (After publishing to npm)
 44 | 
 45 | ```bash
 46 | npx luma-mcp
 47 | ```
 48 | 
 49 | ### Configuration
 50 | 
 51 | #### Claude Desktop
 52 | 
 53 | **Option A: Using Zhipu GLM-4.6V**:
 54 | 
 55 | ```json
 56 | {
 57 |   "mcpServers": {
 58 |     "luma": {
 59 |       "command": "npx",
 60 |       "args": ["-y", "luma-mcp"],
 61 |       "env": {
 62 |         "ZHIPU_API_KEY": "your-zhipu-api-key"
 63 |       }
 64 |     }
 65 |   }
 66 | }
 67 | ```
 68 | 
 69 | **Option B: Using SiliconFlow DeepSeek-OCR (Free)**:
 70 | 
 71 | ```json
 72 | {
 73 |   "mcpServers": {
 74 |     "luma": {
 75 |       "command": "npx",
 76 |       "args": ["-y", "luma-mcp"],
 77 |       "env": {
 78 |         "MODEL_PROVIDER": "siliconflow",
 79 |         "SILICONFLOW_API_KEY": "your-siliconflow-api-key"
 80 |       }
 81 |     }
 82 |   }
 83 | }
 84 | ```
 85 | 
 86 | **Option C: Using Aliyun Qwen3-VL-Flash**:
 87 | 
 88 | ```json
 89 | {
 90 |   "mcpServers": {
 91 |     "luma": {
 92 |       "command": "npx",
 93 |       "args": ["-y", "luma-mcp"],
 94 |       "env": {
 95 |         "MODEL_PROVIDER": "qwen",
 96 |         "DASHSCOPE_API_KEY": "your-dashscope-api-key"
 97 |       }
 98 |     }
 99 |   }
100 | }
101 | ```
102 | 
103 | **Option D: Using Volcengine Doubao-Seed-1.6**:
104 | 
105 | ```json
106 | {
107 |   "mcpServers": {
108 |     "luma": {
109 |       "command": "npx",
110 |       "args": ["-y", "luma-mcp"],
111 |       "env": {
112 |         "MODEL_PROVIDER": "volcengine",
113 |         "VOLCENGINE_API_KEY": "your-volcengine-api-key",
114 |         "MODEL_NAME": "doubao-seed-1-6-flash-250828"
115 |       }
116 |     }
117 |   }
118 | }
119 | ```
120 | 
121 | **Local Development (Zhipu)**:
122 | 
123 | ```json
124 | {
125 |   "mcpServers": {
126 |     "luma": {
127 |       "command": "node",
128 |       "args": ["D:\\codes\\Luma_mcp\\build\\index.js"],
129 |       "env": {
130 |         "ZHIPU_API_KEY": "your-zhipu-api-key"
131 |       }
132 |     }
133 |   }
134 | }
135 | ```
136 | 
137 | **Local Development (SiliconFlow)**:
138 | 
139 | ```json
140 | {
141 |   "mcpServers": {
142 |     "luma": {
143 |       "command": "node",
144 |       "args": ["D:\\codes\\Luma_mcp\\build\\index.js"],
145 |       "env": {
146 |         "MODEL_PROVIDER": "siliconflow",
147 |         "SILICONFLOW_API_KEY": "your-siliconflow-api-key"
148 |       }
149 |     }
150 |   }
151 | }
152 | ```
153 | 
154 | Restart Claude Desktop after configuration.
155 | 
156 | #### Cline (VSCode)
157 | 
158 | Create `mcp.json` in project root or `.vscode/` directory
159 | 
160 | **Option A: Using Zhipu GLM-4.6V**:
161 | 
162 | ```json
163 | {
164 |   "mcpServers": {
165 |     "luma": {
166 |       "command": "npx",
167 |       "args": ["-y", "luma-mcp"],
168 |       "env": {
169 |         "ZHIPU_API_KEY": "your-zhipu-api-key"
170 |       }
171 |     }
172 |   }
173 | }
174 | ```
175 | 
176 | **Option B: Using SiliconFlow DeepSeek-OCR (Free)**:
177 | 
178 | ```json
179 | {
180 |   "mcpServers": {
181 |     "luma": {
182 |       "command": "npx",
183 |       "args": ["-y", "luma-mcp"],
184 |       "env": {
185 |         "MODEL_PROVIDER": "siliconflow",
186 |         "SILICONFLOW_API_KEY": "your-siliconflow-api-key"
187 |       }
188 |     }
189 |   }
190 | }
191 | ```
192 | 
193 | **Option C: Using Aliyun Qwen3-VL-Flash**:
194 | 
195 | ```json
196 | {
197 |   "mcpServers": {
198 |     "luma": {
199 |       "command": "npx",
200 |       "args": ["-y", "luma-mcp"],
201 |       "env": {
202 |         "MODEL_PROVIDER": "qwen",
203 |         "DASHSCOPE_API_KEY": "your-dashscope-api-key"
204 |       }
205 |     }
206 |   }
207 | }
208 | ```
209 | 
210 | #### Claude Code (CLI)
211 | 
212 | **Using Zhipu GLM-4.6V**:
213 | ```bash
214 | claude mcp add -s user luma-mcp --env ZHIPU_API_KEY=your-api-key -- npx -y luma-mcp
215 | ```
216 | 
217 | **Using SiliconFlow DeepSeek-OCR (Free)**:
218 | ```bash
219 | claude mcp add -s user luma-mcp --env MODEL_PROVIDER=siliconflow --env SILICONFLOW_API_KEY=your-api-key -- npx -y luma-mcp
220 | ```
221 | 
222 | **Using Aliyun Qwen3-VL-Flash**:
223 | ```bash
224 | claude mcp add -s user luma-mcp --env MODEL_PROVIDER=qwen --env DASHSCOPE_API_KEY=your-api-key -- npx -y luma-mcp
225 | ```
226 | 
227 | #### Other Tools
228 | 
229 | For more MCP client configuration methods, refer to [Zhipu Official Documentation](https://docs.bigmodel.cn/cn/coding-plan/mcp/vision-mcp-server#claude-code)
230 | 
231 | ## Usage
232 | 
233 | ### Important Notes
234 | 
235 | **MCP Tool Invocation Mechanism**:
236 | - MCP tools require the AI model to **actively call** them to execute
237 | - If the AI model itself supports vision (like Claude 4.5 Sonnet), it will prioritize its native vision capabilities
238 | - Luma MCP primarily serves **non-vision models** (like GPT-4, Claude Opus, etc.)
239 | 
240 | **How to Ensure Tool Invocation**:
241 | 1. Use full tool name: `Use mcp__luma-mcp__analyze_image tool to analyze this image`
242 | 2. Use simplified name: `Use analyze_image tool to view ./screenshot.png`
243 | 3. Provide image path: `Use image analysis tool to check ./screenshot.png for code errors`
244 | 4. Mention server explicitly: `Analyze this image via luma-mcp server`
245 | 
246 | **Note**: Simply pasting an image in the chat box won't automatically trigger Luma for non-vision models - explicit instruction is required.
247 | 
248 | ### Using in Claude Desktop
249 | 
250 | After configuration, use it in Claude conversations like this:
251 | 
252 | **Recommended Usage (Explicit Instruction)**:
253 | ```
254 | User: Use Luma to analyze ./code-error.png, why is this code throwing an error?
255 | Claude: [Calls Luma to analyze the image and returns detailed analysis]
256 | ```
257 | 
258 | **Or Provide Image Path**:
259 | ```
260 | User: Please analyze the interface issues in https://example.com/screenshot.jpg
261 | Claude: [Automatically calls analyze_image tool]
262 | ```
263 | 
264 | ### Local Testing
265 | 
266 | Test without MCP clients:
267 | 
268 | **Test Zhipu GLM-4.6V**:
269 | ```bash
270 | # Set API Key
271 | export ZHIPU_API_KEY="your-api-key"  # macOS/Linux
272 | $env:ZHIPU_API_KEY="your-api-key"    # Windows PowerShell
273 | 
274 | # Test local image
275 | npm run test:local ./test.png
276 | ```
277 | 
278 | **Test SiliconFlow DeepSeek-OCR**:
279 | ```bash
280 | # Set API Key and provider
281 | export MODEL_PROVIDER=siliconflow
282 | export SILICONFLOW_API_KEY="your-api-key"  # macOS/Linux
283 | 
284 | $env:MODEL_PROVIDER="siliconflow"
285 | $env:SILICONFLOW_API_KEY="your-api-key"    # Windows PowerShell
286 | 
287 | # Test local image
288 | npm run test:local ./test.png
289 | ```
290 | 
291 | **Test Aliyun Qwen3-VL-Flash**:
292 | ```bash
293 | # Set API Key and provider
294 | export MODEL_PROVIDER=qwen
295 | export DASHSCOPE_API_KEY="your-api-key"  # macOS/Linux
296 | 
297 | $env:MODEL_PROVIDER="qwen"
298 | $env:DASHSCOPE_API_KEY="your-api-key"    # Windows PowerShell
299 | 
300 | # Test local image
301 | npm run test:local ./test.png
302 | ```
303 | 
304 | **Other test commands**:
305 | ```bash
306 | # Test with question
307 | npm run test:local ./code-error.png "What's wrong with this code?"
308 | 
309 | # Test remote URL
310 | npm run test:local https://example.com/image.jpg
311 | ```
312 | 
313 | ## Tool Reference
314 | 
315 | ### analyze_image
316 | 
317 | Universal tool for analyzing image content.
318 | 
319 | **Parameters**:
320 | 
321 | - `image_source` (required): Image source, supports three formats
322 |   - **Local file**: Absolute or relative path (e.g., `./image.png`, `C:\Users\...\image.jpg`)
323 |   - **Remote URL**: URL starting with https:// (e.g., `https://example.com/pic.jpg`)
324 |   - **Data URI**: Base64-encoded image data (e.g., `data:image/png;base64,iVBORw0KGg...`)
325 |   - Supported formats: JPG, PNG, WebP, GIF
326 | - `prompt` (required): Analysis instruction or question about the image
327 | 
328 | **Examples**:
329 | 
330 | ```typescript
331 | // General analysis
332 | analyze_image({
333 |   image_source: "./screenshot.png",
334 |   prompt: "Please analyze this image in detail"
335 | })
336 | 
337 | // Code analysis
338 | analyze_image({
339 |   image_source: "./code-error.png",
340 |   prompt: "Why is this code throwing an error? Provide fix suggestions"
341 | })
342 | 
343 | // UI analysis
344 | analyze_image({
345 |   image_source: "https://example.com/ui.png",
346 |   prompt: "Analyze the layout and usability issues of this interface"
347 | })
348 | 
349 | // Data URI (when client supports it)
350 | analyze_image({
351 |   image_source: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...",
352 |   prompt: "Extract all text from the image"
353 | })
354 | ```
355 | 
356 | ## Environment Variables
357 | 
358 | ### General Configuration
359 | 
360 | | Variable          | Required | Default   | Description                                                  |
361 | |-------------------|----------|-----------|--------------------------------------------------------------|
362 | | `MODEL_PROVIDER`  | No       | `zhipu`   | Model provider: `zhipu`, `siliconflow`, or `qwen`            |
363 | | `MODEL_NAME`      | No       | See below | Model name (auto-selected based on provider)                 |
364 | | `MAX_TOKENS`      | No       | `4096`    | Maximum tokens to generate                                   |
365 | | `TEMPERATURE`     | No       | `0.7`     | Temperature (0-1)                                            |
366 | | `TOP_P`           | No       | `0.7`     | Top-p parameter (0-1)                                        |
367 | | `ENABLE_THINKING` | No       | `false`   | Enable thinking mode (GLM-4.6V and Qwen3-VL-Flash)           |
368 | 
369 | ### Zhipu GLM-4.6V Specific
370 | 
371 | | Variable         | Required             | Default    | Description       |
372 | |------------------|----------------------|------------|-------------------|
373 | | `ZHIPU_API_KEY`  | Yes (when using Zhipu) | -        | Zhipu AI API key  |
374 | 
375 | Default model: `glm-4.5v`
376 | 
377 | ### SiliconFlow DeepSeek-OCR Specific
378 | 
379 | | Variable               | Required                      | Default                      | Description            |
380 | |------------------------|-------------------------------|------------------------------|------------------------|
381 | | `SILICONFLOW_API_KEY`  | Yes (when using SiliconFlow)  | -                            | SiliconFlow API key    |
382 | 
383 | Default model: `deepseek-ai/DeepSeek-OCR`
384 | 
385 | ### Aliyun Qwen3-VL-Flash Specific
386 | 
387 | | Variable              | Required                   | Default  | Description                  |
388 | |-----------------------|----------------------------|----------|------------------------------|
389 | | `DASHSCOPE_API_KEY`   | Yes (when using Qwen)      | -        | Aliyun Bailian API key       |
390 | 
391 | Default model: `qwen3-vl-flash`
392 | 
393 | **Thinking Mode**:
394 | - Enabled by default for better accuracy and detailed analysis
395 | - To disable (faster speed, lower cost), set in config:
396 |   ```json
397 |   {
398 |     "mcpServers": {
399 |       "luma": {
400 |         "command": "npx",
401 |         "args": ["-y", "luma-mcp"],
402 |         "env": {
403 |           "ZHIPU_API_KEY": "your-api-key",
404 |           "ZHIPU_ENABLE_THINKING": "false"
405 |         }
406 |       }
407 |     }
408 |   }
409 |   ```
410 | - Disabling saves ~20-30% tokens and improves speed by ~30%
411 | 
412 | ## Development
413 | 
414 | ```bash
415 | # Development mode (watch for changes)
416 | npm run watch
417 | 
418 | # Build
419 | npm run build
420 | 
421 | # Local test
422 | npm run test:local <image-path> [question]
423 | ```
424 | 
425 | ## Project Structure
426 | 
427 | ```
428 | luma-mcp/
429 | ├── src/
430 | │   ├── index.ts              # MCP server entry
431 | │   ├── config.ts             # Configuration management (multi-model)
432 | │   ├── vision-client.ts      # Vision model client interface
433 | │   ├── zhipu-client.ts       # GLM-4.6V API client
434 | │   ├── siliconflow-client.ts # DeepSeek-OCR API client
435 | │   ├── qwen-client.ts        # Qwen3-VL API client
436 | │   ├── volcengine-client.ts  # Doubao-Seed-1.6 API client
437 | │   ├── image-processor.ts    # Image processing
438 | │   └── utils/
439 | │       ├── logger.ts         # Logging utilities
440 | │       └── helpers.ts        # Helper functions
441 | ├── test/
442 | │   ├── test-local.ts         # Local testing script
443 | │   ├── test-qwen.ts          # Qwen testing script
444 | │   ├── test-deepseek-raw.ts  # DeepSeek raw testing script
445 | │   └── test-data-uri.ts      # Data URI testing script
446 | ├── docs/
447 | │   ├── design.md             # Design documentation
448 | │   ├── installation.md       # Installation guide
449 | │   └── README_EN.md          # English documentation
450 | ├── build/                    # Build output
451 | └── package.json
452 | ```
453 | 
454 | ## FAQ
455 | 
456 | ### How to get API Key?
457 | 
458 | **Zhipu GLM-4.6V**:
459 | 1. Visit [Zhipu Open Platform](https://open.bigmodel.cn/)
460 | 2. Register/Login
461 | 3. Go to console and create API Key
462 | 4. Copy API Key to configuration file
463 | 
464 | **SiliconFlow DeepSeek-OCR (Free)**:
465 | 1. Visit [SiliconFlow Platform](https://cloud.siliconflow.cn/)
466 | 2. Register/Login
467 | 3. Go to API management and create API Key
468 | 4. Copy API Key to configuration file
469 | 
470 | **Aliyun Qwen3-VL-Flash**:
471 | 1. Visit [Aliyun Bailian Platform](https://bailian.console.aliyun.com/)
472 | 2. Register/Login
473 | 3. Go to API-KEY management and create API Key
474 | 4. Copy API Key to configuration file
475 | 
476 | ### What image formats are supported?
477 | 
478 | Supports JPG, PNG, WebP, GIF. JPG format is recommended for better compression.
479 | 
480 | ### What is a Data URI?
481 | 
482 | A Data URI is a way to embed image data into a string, formatted as:
483 | ```
484 | data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
485 | ```
486 | 
487 | **Use cases**:
488 | - When MCP clients (like Claude Desktop) support it, can directly pass user-pasted images
489 | - No need to save as temporary files, more efficient
490 | - Current status: **Server supports**, waiting for client implementation
491 | 
492 | ### Image size limits?
493 | 
494 | - Maximum file size: 10MB
495 | - Images over 2MB will be automatically compressed
496 | - Recommended resolution: 800-2048 pixels
497 | 
498 | ### How to view logs?
499 | 
500 | Log file location: `~/.luma-mcp/luma-mcp-YYYY-MM-DD.log`
501 | 
502 | ### What if API call fails?
503 | 
504 | 1. Check if API Key is correct
505 | 2. Confirm sufficient balance in your account (Zhipu/Aliyun)
506 | 3. Check network connection
507 | 4. View log file for detailed error information
508 | 
509 | ### What's the cost?
510 | 
511 | **SiliconFlow DeepSeek-OCR**: **Completely free**, no charges!
512 | 
513 | **Zhipu GLM-4.6V**: For pricing, refer to [Zhipu Official Pricing](https://open.bigmodel.cn/pricing).
514 | 
515 | **Aliyun Qwen3-VL-Flash**: For pricing, refer to [Aliyun Bailian Pricing](https://help.aliyun.com/zh/model-studio/getting-started/models).
516 | 
517 | Typical scenario estimates:
518 | - Simple image understanding: 500-1000 tokens
519 | - Code screenshot analysis: 1500-2500 tokens
520 | - Detailed UI analysis: 2000-3000 tokens
521 | 
522 | Enabling thinking mode (GLM-4.6V/Qwen3-VL-Flash) increases tokens by approximately 20-30%.
523 | 
524 | ### How to choose a model?
525 | 
526 | | Feature          | GLM-4.6V (Zhipu)  | DeepSeek-OCR (SiliconFlow) | Qwen3-VL-Flash (Aliyun) |
527 | |------------------|-------------------|----------------------------|-------------------------|
528 | | **Cost**         | Paid              | **Completely Free**        | Paid                    |
529 | | **Chinese**      | Excellent         | Good                       | **Excellent**           |
530 | | **OCR**          | Good              | **Excellent**              | Excellent               |
531 | | **Thinking Mode**| Supported         | Not supported              | Supported               |
532 | | **Speed/Cost**   | Medium            | Free                       | **Fast/Low Cost**       |
533 | | **Use Cases**    | General analysis  | OCR, Text recognition      | Fast analysis, 3D positioning |
534 | 
535 | **Recommendations**:
536 | - Need OCR/text recognition → **DeepSeek-OCR** (free)
537 | - Need fast and cost-effective analysis → **Qwen3-VL-Flash**
538 | - Need deep image understanding → **GLM-4.6V**
539 | 
540 | ## Contributing
541 | 
542 | Issues and Pull Requests are welcome!
543 | 
544 | ## License
545 | 
546 | MIT License
547 | 
548 | ## Related Links
549 | 
550 | - [Zhipu AI Open Platform](https://open.bigmodel.cn/)
551 | - [GLM-4.6V Documentation](https://docs.bigmodel.cn/cn/guide/models/vlm/glm-4.5v)
552 | - [SiliconFlow Platform](https://cloud.siliconflow.cn/)
553 | - [DeepSeek-OCR Documentation](https://docs.siliconflow.cn/cn/api-reference/chat-completions/chat-completions)
554 | - [Aliyun Bailian Platform](https://bailian.console.aliyun.com/)
555 | - [Qwen3-VL Documentation](https://help.aliyun.com/zh/model-studio/getting-started/models)
556 | - [MCP Protocol Documentation](https://modelcontextprotocol.io/)
557 | 
558 | ## Changelog
559 | 
560 | For more update history, see [CHANGELOG.md](../CHANGELOG.md)
561 | 
562 | ## Author
563 | 
564 | Jochen
565 | 
566 | ---
567 | 
568 | **Note**: Do not commit configuration files containing real API Keys to public repositories.
569 | 


--------------------------------------------------------------------------------