├── spectrumlab ├── cli │ ├── py.typed │ ├── __init__.py │ ├── api.py │ └── main.py ├── config │ ├── __init__.py │ └── base_config.py ├── benchmark │ ├── signal_group.py │ ├── semantic_group.py │ ├── generation_group.py │ ├── perception_group.py │ ├── __init__.py.bak │ ├── __init__.py │ └── base.py ├── evaluator │ ├── __init__.py │ ├── choice_evaluator.py │ └── open_evaluator.py ├── utils │ ├── __init__.py │ └── image_utils.py └── models │ ├── base.py │ ├── __init__.py │ ├── base_api.py │ ├── deepseek_api.py │ ├── deepseek_vl.py │ ├── gpt4o_api.py │ ├── grok_api.py │ ├── internvl_api.py │ ├── README.md │ ├── llama_api.py │ ├── gpt4_v_api.py │ ├── doubao_api.py │ └── qwen_vl_api.py ├── leaderboard ├── gradio │ └── requirements.txt ├── vue │ ├── env.d.ts │ ├── .vscode │ │ └── extensions.json │ ├── public │ │ └── favicon.ico │ ├── tsconfig.json │ ├── src │ │ ├── views │ │ │ ├── HomeView.vue │ │ │ └── AboutView.vue │ │ ├── assets │ │ │ ├── logo.svg │ │ │ ├── main.css │ │ │ └── base.css │ │ ├── main.ts │ │ ├── components │ │ │ ├── icons │ │ │ │ ├── IconSupport.vue │ │ │ │ ├── IconTooling.vue │ │ │ │ ├── IconCommunity.vue │ │ │ │ ├── IconDocumentation.vue │ │ │ │ └── IconEcosystem.vue │ │ │ ├── HelloWorld.vue │ │ │ ├── WelcomeItem.vue │ │ │ └── TheWelcome.vue │ │ ├── stores │ │ │ └── counter.ts │ │ ├── router │ │ │ └── index.ts │ │ └── App.vue │ ├── index.html │ ├── tsconfig.app.json │ ├── .gitignore │ ├── vite.config.ts │ ├── tsconfig.node.json │ ├── package.json │ └── README.md ├── batch_import_models.py └── manage_leaderboard.py ├── docs ├── .gitignore ├── package.json ├── .vitepress │ ├── theme │ │ ├── index.ts │ │ └── custom.css │ ├── config.mts │ ├── en.ts │ └── zh.ts ├── index.md ├── zh │ ├── index.md │ ├── benchmark.md │ ├── tutorial.md │ └── api.md ├── en │ ├── index.md │ ├── benchmark.md │ ├── tutorial.md │ └── api.md └── README.md ├── .gitattributes ├── .vscode ├── extensions.json ├── python.code-snippets └── settings.json ├── tests └── models │ ├── test_deepseek.py │ ├── test_gpt_4_1.py │ ├── test_claude_opus_4.py │ ├── test_claude_sonnet_4.py │ ├── test_qwen_vl_max.py │ ├── test_gpt4o.py │ ├── test_gpt_4_v.py │ ├── test_claude_haiku_3_5.py │ ├── test_claude_sonnet_3_5.py │ ├── test_deepseek_vl_2.py │ ├── test_internvl.py │ ├── test_grok_2_v.py │ ├── test_qwen_vl_2_5_32b.py │ ├── test_qwen_vl_2_5_72b.py │ ├── test_llama_3_2_vision_11b.py │ ├── test_llama_3_2_vision_90b.py │ ├── test_doubao_1_5_vision_pro.py │ └── test_doubao_1_5_vision_pro_thinking.py ├── .pre-commit-config.yaml ├── .gitignore ├── scripts └── start_leaderboard.sh ├── pyproject.toml ├── .github └── workflows │ └── docs-deploy.yml ├── CONTRIBUTING.md ├── run_evaluation.py ├── README.md ├── run_ablation_experiments.py └── .env.example /spectrumlab/cli/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /leaderboard/gradio/requirements.txt: -------------------------------------------------------------------------------- 1 | gradio==5.35.0 -------------------------------------------------------------------------------- /leaderboard/vue/env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | /.vitepress/dist 3 | /.vitepress/cache 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /leaderboard/vue/.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": ["Vue.volar"] 3 | } 4 | -------------------------------------------------------------------------------- /spectrumlab/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_config import Config 2 | 3 | 4 | __all__ = ["Config"] 5 | -------------------------------------------------------------------------------- /leaderboard/vue/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/little1d/SpectrumLab/HEAD/leaderboard/vue/public/favicon.ico -------------------------------------------------------------------------------- /spectrumlab/cli/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1" 2 | 3 | 4 | def hello() -> str: 5 | return "Hello from SpectrumLab!" 6 | -------------------------------------------------------------------------------- /spectrumlab/benchmark/signal_group.py: -------------------------------------------------------------------------------- 1 | from .base import BaseGroup 2 | 3 | 4 | class SignalGroup(BaseGroup): 5 | def __init__(self, path: str = "./data"): 6 | super().__init__(level="Signal", path=path) 7 | -------------------------------------------------------------------------------- /leaderboard/vue/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [], 3 | "references": [ 4 | { 5 | "path": "./tsconfig.node.json" 6 | }, 7 | { 8 | "path": "./tsconfig.app.json" 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /spectrumlab/benchmark/semantic_group.py: -------------------------------------------------------------------------------- 1 | from .base import BaseGroup 2 | 3 | 4 | class SemanticGroup(BaseGroup): 5 | def __init__(self, path: str = "./data"): 6 | super().__init__(level="Semantic", path=path) 7 | -------------------------------------------------------------------------------- /spectrumlab/benchmark/generation_group.py: -------------------------------------------------------------------------------- 1 | from .base import BaseGroup 2 | 3 | 4 | class GenerationGroup(BaseGroup): 5 | def __init__(self, path: str = "./data"): 6 | super().__init__(level="Generation", path=path) 7 | -------------------------------------------------------------------------------- /spectrumlab/benchmark/perception_group.py: -------------------------------------------------------------------------------- 1 | from .base import BaseGroup 2 | 3 | 4 | class PerceptionGroup(BaseGroup): 5 | def __init__(self, path: str = "./data"): 6 | super().__init__(level="Perception", path=path) 7 | -------------------------------------------------------------------------------- /leaderboard/vue/src/views/HomeView.vue: -------------------------------------------------------------------------------- 1 | 4 | 5 | 10 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "ms-python.black-formatter", 4 | "editorconfig.editorconfig", 5 | "eamodio.gitlens", 6 | // 代码标记,比如 TODO, FIXME 7 | "gruntfuggly.todo-tree", 8 | ] 9 | } -------------------------------------------------------------------------------- /spectrumlab/evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | from .choice_evaluator import ChoiceEvaluator 2 | 3 | 4 | def get_evaluator(level: str): 5 | """ 6 | 获取评估器 7 | 目前所有level都使用ChoiceEvaluator,后续可以根据level返回不同的evaluator 8 | """ 9 | return ChoiceEvaluator() 10 | -------------------------------------------------------------------------------- /docs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "devDependencies": { 3 | "markdown-it-mathjax3": "^4.3.2", 4 | "vitepress": "^1.6.3" 5 | }, 6 | "scripts": { 7 | "docs:dev": "vitepress dev .", 8 | "docs:build": "vitepress build .", 9 | "docs:preview": "vitepress preview ." 10 | } 11 | } -------------------------------------------------------------------------------- /spectrumlab/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .image_utils import ( 2 | encode_image_to_base64, 3 | get_image_mime_type, 4 | prepare_images_for_prompt, 5 | ) 6 | 7 | __all__ = [ 8 | "encode_image_to_base64", 9 | "get_image_mime_type", 10 | "prepare_images_for_prompt", 11 | ] 12 | -------------------------------------------------------------------------------- /leaderboard/vue/src/assets/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /spectrumlab/benchmark/__init__.py.bak: -------------------------------------------------------------------------------- 1 | from .signal_group import SignalGroup 2 | from .perception_group import PerceptionGroup 3 | from .generation_group import GenerationGroup 4 | from .semantic_group import SemanticGroup 5 | 6 | __all__ = ["SignalGroup", "PerceptionGroup", "GenerationGroup", "SemanticGroup"] 7 | -------------------------------------------------------------------------------- /tests/models/test_deepseek.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import DeepSeek 2 | 3 | 4 | def test_deepseek_text_generation(): 5 | model = DeepSeek() 6 | prompt = "What is spectroscopy?" 7 | response = model.generate(prompt) 8 | assert isinstance(response, str) 9 | assert len(response) > 0 10 | -------------------------------------------------------------------------------- /leaderboard/vue/src/views/AboutView.vue: -------------------------------------------------------------------------------- 1 | 6 | 7 | 16 | -------------------------------------------------------------------------------- /leaderboard/vue/src/main.ts: -------------------------------------------------------------------------------- 1 | import './assets/main.css' 2 | 3 | import { createApp } from 'vue' 4 | import { createPinia } from 'pinia' 5 | 6 | import App from './App.vue' 7 | import router from './router' 8 | 9 | const app = createApp(App) 10 | 11 | app.use(createPinia()) 12 | app.use(router) 13 | 14 | app.mount('#app') 15 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # .pre-commit-config.yaml 2 | repos: 3 | - repo: https://github.com/astral-sh/ruff-pre-commit 4 | rev: v0.7.4 5 | hooks: 6 | - id: ruff 7 | # auto fix 8 | args: [--fix, --exit-non-zero-on-fix, --show-fixes] 9 | exclude: ^docs 10 | - id: ruff-format 11 | exclude: ^docs -------------------------------------------------------------------------------- /leaderboard/vue/src/components/icons/IconSupport.vue: -------------------------------------------------------------------------------- 1 | 8 | -------------------------------------------------------------------------------- /leaderboard/vue/src/stores/counter.ts: -------------------------------------------------------------------------------- 1 | import { ref, computed } from 'vue' 2 | import { defineStore } from 'pinia' 3 | 4 | export const useCounterStore = defineStore('counter', () => { 5 | const count = ref(0) 6 | const doubleCount = computed(() => count.value * 2) 7 | function increment() { 8 | count.value++ 9 | } 10 | 11 | return { count, doubleCount, increment } 12 | }) 13 | -------------------------------------------------------------------------------- /leaderboard/vue/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Vite App 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /leaderboard/vue/tsconfig.app.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@vue/tsconfig/tsconfig.dom.json", 3 | "include": [ 4 | "env.d.ts", 5 | "src/**/*", 6 | "src/**/*.vue" 7 | ], 8 | "exclude": [ 9 | "src/**/__tests__/*" 10 | ], 11 | "compilerOptions": { 12 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo", 13 | "paths": { 14 | "@/*": [ 15 | "./src/*" 16 | ] 17 | } 18 | } 19 | } -------------------------------------------------------------------------------- /docs/.vitepress/theme/index.ts: -------------------------------------------------------------------------------- 1 | import type { Theme } from 'vitepress' 2 | import DefaultTheme from 'vitepress/theme' 3 | import './custom.css' 4 | 5 | export default { 6 | extends: DefaultTheme, 7 | enhanceApp({ app, router, siteData }) { 8 | // 注册全局组件 9 | // app.component('MyGlobalComponent', MyGlobalComponent) 10 | 11 | // 全局属性 12 | // app.config.globalProperties.$myGlobalProperty = () => {} 13 | } 14 | } satisfies Theme -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build artifacts 2 | /dist/ 3 | 4 | # Virtual environments & Python cache 5 | *.venv/ 6 | *__pycache__/ 7 | .ruff_cache/ 8 | 9 | # System files 10 | .DS_Store 11 | 12 | # Environment configuration 13 | *.gradio/ 14 | .python-version 15 | 16 | # Local data & playground 17 | data/ 18 | data_test/ 19 | playground/ 20 | swanlog/ 21 | log 22 | /ablation_internvl3_78b_baselines_evaluation_results 23 | /ablation_internvl3_78b_temp_0.5_evaluation_results 24 | 25 | 26 | .env -------------------------------------------------------------------------------- /leaderboard/vue/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | .DS_Store 12 | dist 13 | dist-ssr 14 | coverage 15 | *.local 16 | 17 | /cypress/videos/ 18 | /cypress/screenshots/ 19 | 20 | # Editor directories and files 21 | .vscode/* 22 | !.vscode/extensions.json 23 | .idea 24 | *.suo 25 | *.ntvs* 26 | *.njsproj 27 | *.sln 28 | *.sw? 29 | 30 | *.tsbuildinfo 31 | -------------------------------------------------------------------------------- /leaderboard/vue/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { fileURLToPath, URL } from 'node:url' 2 | 3 | import { defineConfig } from 'vite' 4 | import vue from '@vitejs/plugin-vue' 5 | import vueDevTools from 'vite-plugin-vue-devtools' 6 | 7 | // https://vite.dev/config/ 8 | export default defineConfig({ 9 | plugins: [ 10 | vue(), 11 | vueDevTools(), 12 | ], 13 | resolve: { 14 | alias: { 15 | '@': fileURLToPath(new URL('./src', import.meta.url)) 16 | }, 17 | }, 18 | }) 19 | -------------------------------------------------------------------------------- /leaderboard/vue/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@tsconfig/node22/tsconfig.json", 3 | "include": [ 4 | "vite.config.*", 5 | "vitest.config.*", 6 | "cypress.config.*", 7 | "nightwatch.conf.*", 8 | "playwright.config.*", 9 | "eslint.config.*" 10 | ], 11 | "compilerOptions": { 12 | "noEmit": true, 13 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo", 14 | 15 | "module": "ESNext", 16 | "moduleResolution": "Bundler", 17 | "types": ["node"] 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /scripts/start_leaderboard.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 6 | PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" 7 | 8 | echo "🚀 Starting Spectral Hub Leaderboard..." 9 | echo "📁 Project root: $PROJECT_ROOT" 10 | 11 | cd "$PROJECT_ROOT" 12 | 13 | echo "📦 Checking Gradio..." 14 | if ! python -c "import gradio" 2>/dev/null; then 15 | echo "❌ Gradio not found. Installing..." 16 | pip install gradio pandas 17 | echo "✅ Dependencies installed" 18 | else 19 | echo "✅ Gradio found" 20 | fi 21 | 22 | cd leaderboard/gradio 23 | python app.py -------------------------------------------------------------------------------- /docs/.vitepress/theme/custom.css: -------------------------------------------------------------------------------- 1 | /* 自定义 CSS 变量 */ 2 | :root { 3 | --vp-c-brand-1: #646cff; 4 | --vp-c-brand-2: #747bff; 5 | --vp-c-brand-3: #535bf2; 6 | } 7 | 8 | /* 深色模式下的品牌色 */ 9 | .dark { 10 | --vp-c-brand-1: #c9cbff; 11 | --vp-c-brand-2: #a6a9ff; 12 | --vp-c-brand-3: #8285f4; 13 | } 14 | 15 | /* 自定义样式 */ 16 | .VPHero .name { 17 | background: linear-gradient(120deg, #bd34fe 30%, #41d1ff); 18 | background-clip: text; 19 | -webkit-background-clip: text; 20 | -webkit-text-fill-color: transparent; 21 | } 22 | 23 | .VPNavBarTitle .VPImage { 24 | width: 40px !important; 25 | height: 40px !important; 26 | } -------------------------------------------------------------------------------- /leaderboard/vue/src/assets/main.css: -------------------------------------------------------------------------------- 1 | @import './base.css'; 2 | 3 | #app { 4 | max-width: 1280px; 5 | margin: 0 auto; 6 | padding: 2rem; 7 | font-weight: normal; 8 | } 9 | 10 | a, 11 | .green { 12 | text-decoration: none; 13 | color: hsla(160, 100%, 37%, 1); 14 | transition: 0.4s; 15 | padding: 3px; 16 | } 17 | 18 | @media (hover: hover) { 19 | a:hover { 20 | background-color: hsla(160, 100%, 37%, 0.2); 21 | } 22 | } 23 | 24 | @media (min-width: 1024px) { 25 | body { 26 | display: flex; 27 | place-items: center; 28 | } 29 | 30 | #app { 31 | display: grid; 32 | grid-template-columns: 1fr 1fr; 33 | padding: 0 2rem; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /leaderboard/vue/src/router/index.ts: -------------------------------------------------------------------------------- 1 | import { createRouter, createWebHistory } from 'vue-router' 2 | import HomeView from '../views/HomeView.vue' 3 | 4 | const router = createRouter({ 5 | history: createWebHistory(import.meta.env.BASE_URL), 6 | routes: [ 7 | { 8 | path: '/', 9 | name: 'home', 10 | component: HomeView, 11 | }, 12 | { 13 | path: '/about', 14 | name: 'about', 15 | // route level code-splitting 16 | // this generates a separate chunk (About.[hash].js) for this route 17 | // which is lazy-loaded when the route is visited. 18 | component: () => import('../views/AboutView.vue'), 19 | }, 20 | ], 21 | }) 22 | 23 | export default router 24 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: home 3 | 4 | hero: 5 | name: "SpectrumLab" 6 | text: "" 7 | tagline: "开创性的统一平台,助力系统化与高效的光谱深度学习研究" 8 | actions: 9 | - theme: brand 10 | text: 快速开始 11 | link: /zh/tutorial 12 | - theme: alt 13 | text: 查看源码 14 | link: https://github.com/little1d/SpectrumLab 15 | 16 | features: 17 | - title: 🔬 多模态评估 18 | details: 支持图像+文本的多模态光谱数据评估,兼容多种深度学习模型 19 | - title: 🤖 模型集成 20 | details: 集成 GPT-4o、Claude、DeepSeek、Qwen-VL 等先进模型的 API 接口 21 | - title: 📊 基准测试 22 | details: 提供标准化的评估流程和指标,支持多种光谱学任务类型 23 | - title: 🏆 排行榜 24 | details: 实时更新的模型性能排行榜,支持多维度对比分析 25 | - title: 🚀 命令行工具 26 | details: 简洁的命令行界面,支持批量评估和结果管理 27 | - title: 🔧 易于扩展 28 | details: 模块化设计,支持自定义评估器和模型的快速集成 29 | --- -------------------------------------------------------------------------------- /docs/zh/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: home 3 | 4 | hero: 5 | name: "SpectrumLab" 6 | text: "" 7 | tagline: "开创性的统一平台,助力系统化与高效的光谱深度学习研究" 8 | actions: 9 | - theme: brand 10 | text: 快速开始 11 | link: /tutorial 12 | - theme: alt 13 | text: 查看源码 14 | link: https://github.com/little1d/SpectrumLab 15 | 16 | features: 17 | - title: 🔬 多模态评估 18 | details: 支持图像+文本的多模态光谱数据评估,兼容多种深度学习模型 19 | - title: 🤖 模型集成 20 | details: 集成 GPT-4o、Claude、DeepSeek、Qwen-VL 等先进模型的 API 接口 21 | - title: 📊 基准测试 22 | details: 提供标准化的评估流程和指标,支持多种光谱学任务类型 23 | - title: 🏆 排行榜 24 | details: 实时更新的模型性能排行榜,支持多维度对比分析 25 | - title: 🚀 命令行工具 26 | details: 简洁的命令行界面,支持批量评估和结果管理 27 | - title: 🔧 易于扩展 28 | details: 模块化设计,支持自定义评估器和模型的快速集成 29 | --- -------------------------------------------------------------------------------- /leaderboard/vue/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "leaderboard", 3 | "version": "0.0.0", 4 | "private": true, 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "run-p type-check \"build-only {@}\" --", 9 | "preview": "vite preview", 10 | "build-only": "vite build", 11 | "type-check": "vue-tsc --build" 12 | }, 13 | "dependencies": { 14 | "pinia": "^3.0.3", 15 | "vue": "^3.5.17", 16 | "vue-router": "^4.5.1" 17 | }, 18 | "devDependencies": { 19 | "@tsconfig/node22": "^22.0.2", 20 | "@types/node": "^22.15.32", 21 | "@vitejs/plugin-vue": "^6.0.0", 22 | "@vue/tsconfig": "^0.7.0", 23 | "npm-run-all2": "^8.0.4", 24 | "typescript": "~5.8.0", 25 | "vite": "^5.4.10", 26 | "vite-plugin-vue-devtools": "^7.7.7", 27 | "vue-tsc": "^2.2.10" 28 | } 29 | } -------------------------------------------------------------------------------- /spectrumlab/benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | from .signal_group import SignalGroup 2 | from .perception_group import PerceptionGroup 3 | from .generation_group import GenerationGroup 4 | from .semantic_group import SemanticGroup 5 | 6 | __all__ = [ 7 | "SignalGroup", 8 | "PerceptionGroup", 9 | "GenerationGroup", 10 | "SemanticGroup", 11 | "get_benchmark_group", 12 | ] 13 | 14 | 15 | def get_benchmark_group(level: str, path: str = "./data"): 16 | level_map = { 17 | "signal": SignalGroup, 18 | "perception": PerceptionGroup, 19 | "semantic": SemanticGroup, 20 | "generation": GenerationGroup, 21 | } 22 | 23 | level_lower = level.lower() 24 | if level_lower not in level_map: 25 | raise ValueError(f"不支持的评估级别: {level}. 可选值: {list(level_map.keys())}") 26 | 27 | return level_map[level_lower](path=path) 28 | -------------------------------------------------------------------------------- /leaderboard/vue/src/components/HelloWorld.vue: -------------------------------------------------------------------------------- 1 | 6 | 7 | 17 | 18 | 42 | -------------------------------------------------------------------------------- /.vscode/python.code-snippets: -------------------------------------------------------------------------------- 1 | { 2 | "python-comment": { 3 | "prefix": "comm", 4 | "scope": "python,notebook", 5 | "description": "python文件块注释", 6 | "body": [ 7 | "# =============================================", 8 | "# ${1:SECTION TITLE} ", 9 | "# =============================================", 10 | ] 11 | }, 12 | "adapted-from": { 13 | "prefix": "adapted", 14 | "scope": "python,notebook", 15 | "description": "添加代码改编来源注释", 16 | "body": [ 17 | "# Adapted from: ${1:source_description}", 18 | "# Source: ${2:https://github.com/example/repo}", 19 | ] 20 | }, 21 | "model-doc-link": { // <<< 新增片段 22 | "prefix": "doclink", // 触发关键字 23 | "scope": "python,notebook", 24 | "description": "插入外部模型/库的文档链接", 25 | "body": [ 26 | "# Link: ${1:https://example.com}" 27 | ] 28 | } 29 | } -------------------------------------------------------------------------------- /leaderboard/vue/README.md: -------------------------------------------------------------------------------- 1 | # This template should help get you started developing with Vue 3 in Vite 2 | 3 | ## Recommended IDE Setup 4 | 5 | [VSCode](https://code.visualstudio.com/) + [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) (and disable Vetur). 6 | 7 | ## Type Support for `.vue` Imports in TS 8 | 9 | TypeScript cannot handle type information for `.vue` imports by default, so we replace the `tsc` CLI with `vue-tsc` for type checking. In editors, we need [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) to make the TypeScript language service aware of `.vue` types. 10 | 11 | ## Customize configuration 12 | 13 | See [Vite Configuration Reference](https://vite.dev/config/). 14 | 15 | ## Project Setup 16 | 17 | ```sh 18 | npm install 19 | ``` 20 | 21 | ### Compile and Hot-Reload for Development 22 | 23 | ```sh 24 | npm run dev 25 | ``` 26 | 27 | ### Type-Check, Compile and Minify for Production 28 | 29 | ```sh 30 | npm run build 31 | ``` 32 | -------------------------------------------------------------------------------- /leaderboard/vue/src/components/icons/IconTooling.vue: -------------------------------------------------------------------------------- 1 | 2 | 20 | -------------------------------------------------------------------------------- /spectrumlab/models/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Union, Any, Dict 3 | 4 | 5 | class BaseModel(ABC): 6 | is_api: bool = False 7 | 8 | def __init__(self, path: str, max_seq_len: int = 2048): 9 | self.path = path 10 | self.max_seq_len = max_seq_len 11 | 12 | @abstractmethod 13 | def generate( 14 | self, prompt: Union[str, Dict[str, Any]], max_out_len: int = 512 15 | ) -> str: 16 | """ 17 | Generate response for a single prompt. 18 | 19 | Args: 20 | prompt: Input prompt, can be: 21 | - str: Simple text prompt 22 | - Dict: Multimodal prompt with format: 23 | { 24 | "text": "question text", 25 | "images": [{"type": "image_url", "image_url": {"url": "data:..."}}] 26 | } 27 | max_out_len: Maximum output length 28 | 29 | Returns: 30 | Generated response string 31 | """ 32 | pass 33 | -------------------------------------------------------------------------------- /leaderboard/vue/src/components/icons/IconCommunity.vue: -------------------------------------------------------------------------------- 1 | 8 | -------------------------------------------------------------------------------- /spectrumlab/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .deepseek_api import DeepSeek 2 | from .gpt4o_api import GPT4o 3 | from .internvl_api import InternVL 4 | from .claude_api import ( 5 | Claude_Sonnet_3_5, 6 | Claude_Opus_4, 7 | Claude_Haiku_3_5, 8 | Claude_Sonnet_4, 9 | ) 10 | from .gpt4_v_api import GPT4_1, GPT4_Vision 11 | from .grok_api import Grok_2_Vision 12 | from .deepseek_vl import DeepSeek_VL2 13 | from .qwen_vl_api import Qwen_VL_Max, Qwen_2_5_VL_32B, Qwen_2_5_VL_72B 14 | from .llama_api import Llama_Vision_11B, Llama_Vision_90B 15 | from .doubao_api import Doubao_1_5_Vision_Pro, Doubao_1_5_Vision_Pro_Thinking 16 | 17 | __all__ = [ 18 | "DeepSeek", 19 | "GPT4o", 20 | "InternVL", 21 | "Claude_Sonnet_3_5", 22 | "Claude_Opus_4", 23 | "Claude_Haiku_3_5", 24 | "Claude_Sonnet_4", 25 | "GPT4_1", 26 | "GPT4_Vision", 27 | "Grok_2_Vision", 28 | "Qwen_VL_Max", 29 | "DeepSeek_VL2", 30 | "Qwen_2_5_VL_32B", 31 | "Qwen_2_5_VL_72B", 32 | "Llama_Vision_11B", 33 | "Llama_Vision_90B", 34 | "Doubao_1_5_Vision_Pro", 35 | "Doubao_1_5_Vision_Pro_Thinking", 36 | ] 37 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "spectrumlab" 7 | version = "0.0.1" 8 | description = "A pioneering unified platform designed to systematize and accelerate deep learning research in spectroscopy." 9 | readme = "README.md" 10 | requires-python = ">=3.10" 11 | authors = [ 12 | { name = "Zhuo Yang", email = "yzachary1551@gmail.com" }, 13 | { name = "Tianfan Fu", email = "futianfan@gmail.com" }, 14 | ] 15 | keywords = ["benchmark", "chemistry", "spectroscopy", "evaluation"] 16 | 17 | dependencies = ["dotenv>=0.9.9", "openai>=1.93.0"] 18 | 19 | [project.optional-dependencies] 20 | dev = ["pytest>=7.4.0", "black>=23.0.0", "ruff>=0.1.0", "pre-commit>=4.2.0"] 21 | test = ["pytest>=8.4.1", "pytest-asyncio>=1.1.0"] 22 | 23 | [project.scripts] 24 | spectrumlab = "spectrumlab.cli.main:main" 25 | 26 | [tool.hatch.build.targets.wheel] 27 | packages = ["spectrumlab"] 28 | 29 | [tool.hatch.metadata] 30 | allow-direct-references = true 31 | 32 | [tool.black] 33 | line-length = 120 34 | skip-string-normalization = true 35 | 36 | [tool.pytest.ini_options] 37 | testpaths = ["tests"] 38 | python_files = ["test_*.py"] 39 | python_classes = ["Test*"] 40 | python_functions = ["test_*"] 41 | addopts = "-v --tb=short" 42 | -------------------------------------------------------------------------------- /leaderboard/vue/src/components/icons/IconDocumentation.vue: -------------------------------------------------------------------------------- 1 | 8 | -------------------------------------------------------------------------------- /spectrumlab/models/base_api.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from typing import Dict, Any, Union 3 | from .base import BaseModel 4 | 5 | 6 | class BaseAPIModel(BaseModel): 7 | is_api: bool = True 8 | 9 | def __init__(self, model_name: str = "api_model", max_seq_len: int = 2048): 10 | """ 11 | Initialize API model. 12 | 13 | Args: 14 | model_name: Name of the model 15 | max_seq_len: Maximum sequence length 16 | """ 17 | super().__init__(path=model_name, max_seq_len=max_seq_len) 18 | 19 | @abstractmethod 20 | def generate( 21 | self, prompt: Union[str, Dict[str, Any]], max_out_len: int = 512 22 | ) -> str: 23 | """ 24 | Generate response for a single prompt. 25 | 26 | Args: 27 | prompt: Input prompt, can be: 28 | - str: Simple text prompt 29 | - Dict: Multimodal prompt with format: 30 | { 31 | "text": "question text", 32 | "images": [{"type": "image_url", "image_url": {"url": "data:..."}}] 33 | } 34 | max_out_len: Maximum output length 35 | 36 | Returns: 37 | Generated response string 38 | """ 39 | pass 40 | -------------------------------------------------------------------------------- /docs/en/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: home 3 | 4 | hero: 5 | name: "SpectrumLab" 6 | text: "" 7 | tagline: "A pioneering unified platform designed to systematize and accelerate deep learning research in spectroscopy" 8 | actions: 9 | - theme: brand 10 | text: Quick Start 11 | link: /en/tutorial 12 | - theme: alt 13 | text: View Source Code 14 | link: https://github.com/little1d/SpectrumLab 15 | 16 | features: 17 | - title: 🔬 Multimodal evaluation 18 | details: It supports multimodal spectral data evaluation combining images and text, and is compatible with various deep learning models. 19 | - title: 🤖 Model Integration 20 | details: Integrates API interfaces of advanced models such as GPT-4o, Claude, DeepSeek, and Qwen-VL 21 | - title: 📊 Benchmark 22 | details: Provides standardized evaluation processes and metrics, supporting multiple types of spectroscopy tasks. 23 | - title: 🏆 Leaderboard 24 | details: A real-time updated model performance leaderboard that supports multi-dimensional comparative analysis. 25 | - title: 🚀 Command-line tool 26 | details: A concise command-line interface that supports batch evaluation and result management. 27 | - title: 🔧 Easy to extend 28 | details: Modular design that supports the rapid integration of custom evaluators and models. 29 | --- -------------------------------------------------------------------------------- /spectrumlab/cli/api.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Optional, Any 2 | from ..benchmark import get_benchmark_group 3 | from ..evaluator import get_evaluator 4 | 5 | 6 | def run_evaluation( 7 | model, 8 | level: str, 9 | subcategories: Optional[List[str]] = None, 10 | data_path: str = "./data", 11 | save_path: str = "./results", 12 | max_out_len: int = 512, 13 | ) -> Dict[str, Any]: 14 | print("🚀 Starting evaluation") 15 | print(f"📊 Model: {model.__class__.__name__}") 16 | print(f"📁 Level: {level}") 17 | print(f"📂 Data path: {data_path}") 18 | print(f"💾 Save path: {save_path}") 19 | 20 | print("\n📥 Loading benchmark data...") 21 | benchmark = get_benchmark_group(level, data_path) 22 | 23 | if subcategories: 24 | data = benchmark.get_data_by_subcategories(subcategories) 25 | print(f"📋 Subcategories: {subcategories}") 26 | else: 27 | data = benchmark.get_data_by_subcategories("all") 28 | print("📋 Subcategories: all") 29 | 30 | print(f"📊 Total data items: {len(data)}") 31 | 32 | print("\n⚙️ Getting evaluator...") 33 | evaluator = get_evaluator(level) 34 | 35 | print("\n🔄 Running evaluation...") 36 | results = evaluator.evaluate( 37 | data_items=data, 38 | model=model, 39 | max_out_len=max_out_len, 40 | save_path=save_path, 41 | ) 42 | 43 | return results 44 | -------------------------------------------------------------------------------- /docs/zh/benchmark.md: -------------------------------------------------------------------------------- 1 | # 基准测试 2 | 3 | ## Benchmark 概述 4 | 5 | SpectrumLab 的 Benchmark 采用分层架构设计,从信号处理到高级语义理解,全面评估模型在光谱学任务上的能力。基准测试包含四个主要层级,每个层级包含多个子任务,适用于不同类型的谱图分析。 6 | 7 | ## Benchmark 详情 8 | 9 | ### 1. 信号层(Signal Level) 10 | 11 | 基础的谱图信号处理和分析,包括以下子任务: 12 | 13 | - **谱图类型分类(Spectrum Type Classification)**:识别不同类型的谱图(红外、核磁、拉曼等)。 14 | - **谱图质量评估(Spectrum Quality Assessment)**:识别谱图是否清晰、完整、以及是否存在明显噪声。 15 | - **基础特征提取(Basic Feature Extraction)**:识别谱图中的基线、峰、峰位、峰强等基本特征。 16 | - **杂质峰检测(Impurity Peak Detection)**:识别谱图中的杂质峰和异常信号。 17 | 18 | ### 2. 感知层(Perception Level) 19 | 20 | 进一步的谱图视觉理解和模式识别,涵盖: 21 | 22 | - **基本化学性质预测(Basic Property Prediction)**:基于谱图特征预测分子离子峰、溶解性、酸碱性等直接关联的性质。 23 | - **元素组成预测(Elemental Compositional Prediction)**:从质谱等中识别元素组成和同位素模式。 24 | - **官能团识别(Functional Group Recognition)**:根据谱图特征(特别是特征峰位)预测分子可能存在的官能团。 25 | - **谱峰归属(Peak Assignment)**:对谱图中的主要峰进行初步的化学归属。 26 | 27 | ### 3. 语义层(Semantic Level) 28 | 29 | 深层的谱图语义理解和化学知识推理,包括: 30 | 31 | - **多模态谱图融合(Fusing Spectroscopic Modalities)**:结合多种光谱或分子信息进行综合判断。 32 | - **分子结构解析(Molecular Structure Elucidation)**:根据光谱信息,从多个候选项中匹配正确的分子结构。 33 | - **多模态推理/问答(Multimodal Molecular Reasoning)**:基于光谱、文本信息,进行复杂的化学推理问答。 34 | 35 | ### 4. 生成层(Generation Level) 36 | 37 | 创造性地生成新化学信息,主要任务有: 38 | 39 | - **前向问题(Forward Problems)**:谱图、SMILES 或两者结合,推断分子结构。 40 | - **逆向问题(Inverse Problems)**:分子结构生成谱图、SMILES 等。 41 | - **无条件生成(De Novo Generation)**:根据特定目标(如特定性质的分子、特定靶点的配体)从头生成新颖、多样且合理的分子结构(SMILES、2D图)及/或预测的多模态信息(谱图、性质)。 42 | -------------------------------------------------------------------------------- /leaderboard/batch_import_models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Batch import models to leaderboard 4 | Usage: python batch_import_models.py models_data.json 5 | """ 6 | 7 | import json 8 | import sys 9 | from pathlib import Path 10 | 11 | # Add the leaderboard directory to Python path 12 | sys.path.insert(0, str(Path(__file__).parent)) 13 | 14 | from manage_leaderboard import LeaderboardManager 15 | 16 | 17 | def batch_import(models_file: str): 18 | """Batch import models from JSON file""" 19 | 20 | with open(models_file, "r", encoding="utf-8") as f: 21 | models_data = json.load(f) 22 | 23 | manager = LeaderboardManager() 24 | 25 | # Clear existing models if specified 26 | if models_data.get("clear_existing", False): 27 | print("🗑️ Clearing existing models...") 28 | manager.data["models"] = [] 29 | 30 | # Import models 31 | imported_count = 0 32 | for model_data in models_data["models"]: 33 | model_info = model_data["model_info"] 34 | subcategory_scores = model_data["scores"] 35 | 36 | success = manager.add_model(model_info, subcategory_scores) 37 | if success: 38 | imported_count += 1 39 | 40 | print( 41 | f"\n✅ Successfully imported {imported_count}/{len(models_data['models'])} models" 42 | ) 43 | 44 | 45 | if __name__ == "__main__": 46 | if len(sys.argv) != 2: 47 | print("Usage: python batch_import_models.py models_data.json") 48 | sys.exit(1) 49 | 50 | batch_import(sys.argv[1]) 51 | -------------------------------------------------------------------------------- /.github/workflows/docs-deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy VitePress Docs Site to Pages 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | paths: 7 | - 'docs/**' 8 | - '.github/workflows/docs-deploy.yml' 9 | workflow_dispatch: 10 | 11 | permissions: 12 | contents: read 13 | pages: write 14 | id-token: write 15 | 16 | concurrency: 17 | group: pages 18 | cancel-in-progress: false 19 | 20 | jobs: 21 | build: 22 | runs-on: ubuntu-latest 23 | steps: 24 | - name: Checkout 25 | uses: actions/checkout@v4 26 | with: 27 | fetch-depth: 0 # 如果未启用 lastUpdated,则不需要 28 | 29 | - name: Setup Node 30 | uses: actions/setup-node@v4 31 | with: 32 | node-version: 22 33 | cache: npm 34 | cache-dependency-path: docs/package-lock.json 35 | 36 | - name: Setup Pages 37 | uses: actions/configure-pages@v4 38 | 39 | - name: Install dependencies 40 | working-directory: docs 41 | run: npm ci 42 | 43 | - name: Build with VitePress 44 | working-directory: docs 45 | run: npm run docs:build 46 | 47 | - name: Upload artifact 48 | uses: actions/upload-pages-artifact@v3 49 | with: 50 | path: docs/.vitepress/dist 51 | 52 | deploy: 53 | environment: 54 | name: github-pages 55 | url: ${{ steps.deployment.outputs.page_url }} 56 | needs: build 57 | runs-on: ubuntu-latest 58 | steps: 59 | - name: Deploy to GitHub Pages 60 | id: deployment 61 | uses: actions/deploy-pages@v4 -------------------------------------------------------------------------------- /leaderboard/vue/src/App.vue: -------------------------------------------------------------------------------- 1 | 5 | 6 | 22 | 23 | 86 | -------------------------------------------------------------------------------- /leaderboard/vue/src/components/WelcomeItem.vue: -------------------------------------------------------------------------------- 1 | 14 | 15 | 88 | -------------------------------------------------------------------------------- /tests/models/test_gpt_4_1.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import GPT4_1 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_gpt_4_1_text_generation(): 8 | model = GPT4_1() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_gpt_4_1_multimodal_generation(): 16 | model = GPT4_1() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_gpt_4_1_signalgroup_evaluation(): 34 | model = GPT4_1() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_gpt_4_1_signalgroup_evaluation_parallel(): 44 | model = GPT4_1() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] 51 | -------------------------------------------------------------------------------- /tests/models/test_claude_opus_4.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import Claude_Opus_4 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_claude_text_generation(): 8 | model = Claude_Opus_4() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_claude_multimodal_generation(): 16 | model = Claude_Opus_4() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_claude_signalgroup_evaluation(): 34 | model = Claude_Opus_4() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_claude_signalgroup_evaluation_parallel(): 44 | model = Claude_Opus_4() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] -------------------------------------------------------------------------------- /tests/models/test_claude_sonnet_4.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import Claude_Sonnet_4 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_claude_text_generation(): 8 | model = Claude_Sonnet_4() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_claude_multimodal_generation(): 16 | model = Claude_Sonnet_4() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_claude_signalgroup_evaluation(): 34 | model = Claude_Sonnet_4() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_claude_signalgroup_evaluation_parallel(): 44 | model = Claude_Sonnet_4() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] -------------------------------------------------------------------------------- /tests/models/test_qwen_vl_max.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import Qwen_VL_Max 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_qwen_vl_max_text_generation(): 8 | model = Qwen_VL_Max() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_qwen_vl_max_multimodal_generation(): 16 | model = Qwen_VL_Max() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_qwen_vl_max_signalgroup_evaluation(): 34 | model = Qwen_VL_Max() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_qwen_vl_max_signalgroup_evaluation_parallel(): 44 | model = Qwen_VL_Max() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] 51 | -------------------------------------------------------------------------------- /tests/models/test_gpt4o.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import GPT4o 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_gpt4o_text_generation(): 8 | model = GPT4o() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_gpt4o_multimodal_generation(): 16 | model = GPT4o() 17 | image_path = "/Users/little1d/Desktop/Code/SpectrumLab/playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/png;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_gpt4o_signalgroup_evaluation(): 34 | model = GPT4o() 35 | signal_group = SignalGroup("data") 36 | # 只选一个子任务,避免测试太慢 37 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 38 | evaluator = ChoiceEvaluator() 39 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 40 | assert "metrics" in results 41 | assert "overall" in results["metrics"] 42 | 43 | 44 | def test_gpt4o_signalgroup_evaluation_parallel(): 45 | model = GPT4o() 46 | signal_group = SignalGroup("data") 47 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 48 | evaluator = ChoiceEvaluator() 49 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 50 | assert "metrics" in results 51 | assert "overall" in results["metrics"] 52 | -------------------------------------------------------------------------------- /tests/models/test_gpt_4_v.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import GPT4_Vision 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_gpt_4_vision_text_generation(): 8 | model = GPT4_Vision() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_gpt_4_vision_multimodal_generation(): 16 | model = GPT4_Vision() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_gpt_4_vision_signalgroup_evaluation(): 34 | model = GPT4_Vision() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_gpt_4_vision_signalgroup_evaluation_parallel(): 44 | model = GPT4_Vision() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] 51 | -------------------------------------------------------------------------------- /spectrumlab/models/deepseek_api.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from .base_api import BaseAPIModel 3 | from spectrumlab.config import Config 4 | from openai import OpenAI 5 | 6 | 7 | class DeepSeek(BaseAPIModel): 8 | def __init__( 9 | self, 10 | api_key: Optional[str] = None, 11 | base_url: Optional[str] = None, 12 | model_name: Optional[str] = None, 13 | **kwargs, 14 | ): 15 | config = Config() 16 | 17 | # Use provided parameters or fall back to config 18 | self.api_key = api_key or config.deepseek_api_key 19 | self.base_url = base_url or config.deepseek_base_url 20 | self.model_name = model_name or config.deepseek_model_name 21 | 22 | # Validate that we have required configuration 23 | if not self.api_key: 24 | raise ValueError( 25 | "DeepSeek API key not found. Please set DEEPSEEK_API_KEY in your .env file " 26 | "or provide api_key parameter." 27 | ) 28 | 29 | self.client = OpenAI( 30 | api_key=self.api_key, 31 | base_url=self.base_url, 32 | ) 33 | 34 | # Initialize parent class 35 | super().__init__(model_name=self.model_name, **kwargs) 36 | 37 | def generate(self, prompt: str, max_tokens: int = 512, json_output=False) -> str: 38 | messages = [] 39 | 40 | if json_output: 41 | messages.append({"role": "system", "content": "response in JSON format"}) 42 | 43 | messages.append({"role": "user", "content": prompt}) 44 | 45 | try: 46 | response = self.client.chat.completions.create( 47 | model=self.model_name, 48 | messages=messages, 49 | max_tokens=max_tokens, 50 | stream=False, 51 | ) 52 | return response.choices[0].message.content 53 | except Exception as e: 54 | raise RuntimeError(f"DeepSeek API call failed: {e}") 55 | -------------------------------------------------------------------------------- /tests/models/test_claude_haiku_3_5.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import Claude_Haiku_3_5 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_claude_text_generation(): 8 | model = Claude_Haiku_3_5() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_claude_multimodal_generation(): 16 | model = Claude_Haiku_3_5() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_claude_signalgroup_evaluation(): 34 | model = Claude_Haiku_3_5() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_claude_signalgroup_evaluation_parallel(): 44 | model = Claude_Haiku_3_5() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] 51 | -------------------------------------------------------------------------------- /tests/models/test_claude_sonnet_3_5.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import Claude_Sonnet_3_5 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_claude_text_generation(): 8 | model = Claude_Sonnet_3_5() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_claude_multimodal_generation(): 16 | model = Claude_Sonnet_3_5() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_claude_signalgroup_evaluation(): 34 | model = Claude_Sonnet_3_5() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_claude_signalgroup_evaluation_parallel(): 44 | model = Claude_Sonnet_3_5() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] -------------------------------------------------------------------------------- /tests/models/test_deepseek_vl_2.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import DeepSeek_VL2 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_deepseek_vl_2_text_generation(): 8 | model = DeepSeek_VL2() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_deepseek_vl_2_multimodal_generation(): 16 | model = DeepSeek_VL2() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_deepseek_vl_2_signalgroup_evaluation(): 34 | model = DeepSeek_VL2() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_deepseek_vl_2_signalgroup_evaluation_parallel(): 44 | model = DeepSeek_VL2() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] -------------------------------------------------------------------------------- /tests/models/test_internvl.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import InternVL 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_internvl_text_generation(): 8 | model = InternVL() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_internvl_multimodal_generation(): 16 | model = InternVL() 17 | image_path = "/Users/little1d/Desktop/Code/SpectrumLab/playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_internvl_signalgroup_evaluation(): 34 | model = InternVL() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_internvl_signalgroup_evaluation_parallel(): 44 | model = InternVL() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] 51 | -------------------------------------------------------------------------------- /leaderboard/vue/src/components/icons/IconEcosystem.vue: -------------------------------------------------------------------------------- 1 | 8 | -------------------------------------------------------------------------------- /tests/models/test_grok_2_v.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import Grok_2_Vision 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_grok_2_vision_text_generation(): 8 | model = Grok_2_Vision() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_grok_2_vision_multimodal_generation(): 16 | model = Grok_2_Vision() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_grok_2_vision_signalgroup_evaluation(): 34 | model = Grok_2_Vision() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_grok_2_vision_signalgroup_evaluation_parallel(): 44 | model = Grok_2_Vision() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] 51 | -------------------------------------------------------------------------------- /tests/models/test_qwen_vl_2_5_32b.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import Qwen_2_5_VL_32B 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_qwen_2_5_vl_32b_text_generation(): 8 | model = Qwen_2_5_VL_32B() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_qwen_2_5_vl_32b_multimodal_generation(): 16 | model = Qwen_2_5_VL_32B() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_qwen_2_5_vl_32b_signalgroup_evaluation(): 34 | model = Qwen_2_5_VL_32B() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_qwen_2_5_vl_32b_signalgroup_evaluation_parallel(): 44 | model = Qwen_2_5_VL_32B() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] 51 | -------------------------------------------------------------------------------- /tests/models/test_qwen_vl_2_5_72b.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import Qwen_2_5_VL_72B 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_qwen_2_5_vl_72b_text_generation(): 8 | model = Qwen_2_5_VL_72B() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_qwen_2_5_vl_72b_multimodal_generation(): 16 | model = Qwen_2_5_VL_72B() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_qwen_2_5_vl_72b_signalgroup_evaluation(): 34 | model = Qwen_2_5_VL_72B() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_qwen_2_5_vl_72b_signalgroup_evaluation_parallel(): 44 | model = Qwen_2_5_VL_72B() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] 51 | -------------------------------------------------------------------------------- /tests/models/test_llama_3_2_vision_11b.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import Llama_Vision_11B 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_llama_vision_11b_text_generation(): 8 | model = Llama_Vision_11B() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_llama_vision_11b_multimodal_generation(): 16 | model = Llama_Vision_11B() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_llama_vision_11b_signalgroup_evaluation(): 34 | model = Llama_Vision_11B() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_llama_vision_11b_signalgroup_evaluation_parallel(): 44 | model = Llama_Vision_11B() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] 51 | -------------------------------------------------------------------------------- /tests/models/test_llama_3_2_vision_90b.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import Llama_Vision_90B 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_llama_vision_90b_text_generation(): 8 | model = Llama_Vision_90B() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_llama_vision_90b_multimodal_generation(): 16 | model = Llama_Vision_90B() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_llama_vision_90b_signalgroup_evaluation(): 34 | model = Llama_Vision_90B() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_llama_vision_90b_signalgroup_evaluation_parallel(): 44 | model = Llama_Vision_90B() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] 51 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # 如何为本项目做出贡献 2 | 3 | ## 标准开发流程 4 | 5 | 1. 浏览 Github 上的 Issues ,查看你愿意添加的功能或修复的错误,以及它们是否被 Pull Request 6 | - 如果没有,请创建一个新 Issues,除非您的 PR 非常小,否则 PR 应该指向具体的 Issues,这样可以避免重复重做,同时提高代码审查效率。 7 | 2. 如果你是第一次为项目贡献代码,请转到仓库首页单击右上角的"Fork"按钮,这将创建你用于开发的仓库副本 8 | 9 | - 将 Fork 的项目克隆到你的计算机,并添加指向本项目的远程链接: 10 | 11 | ```bash 12 | git clone https://github.com//hello.git 13 | cd hello 14 | git remote add upstream https://github.com/hello.git 15 | ``` 16 | 17 | 3. 开发你的贡献 18 | 19 | - 确保您的 Fork 与主存储库同步: 20 | 21 | ```bash 22 | git checkout main 23 | git pull upstream main 24 | ``` 25 | 26 | - 创建一个 `git`分支,您将在其中开发您的贡献。为分支使用合理的名称,例如: 27 | 28 | ```bash 29 | git checkout -b / 30 | ``` 31 | 32 | - 当你取得进展时,在本地提交你的改动,例如: 33 | 34 | ```bash 35 | git add changed-file.py test/test-changed-file.py 36 | git commit -m "feat(integreations): Add integration with the `awosome` library" 37 | ``` 38 | 39 | 4. 发起贡献: 40 | 41 | - [Github Pull Request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests) 42 | - 当您的贡献准备就绪后,将您的分支推送到 Github: 43 | 44 | ```bash 45 | git push origin / 46 | ``` 47 | 48 | - 分支上传后,`Github` 将打印一个 URL,用于将您的贡献作为拉取请求提交。在浏览器中打开该 URL,为您的拉取请求编写信息丰富的标题和详细描述,然后提交。 49 | 50 | - 请将相关 Issue(现有 Issue 或您创建的 Issue)链接到您的 PR。请参阅 PR 页面的右栏。或者,在 PR 51 | 描述中提及“修复问题链接” - GitHub 将自动进行链接。 52 | 53 | - 我们将审查您的贡献并提供反馈。要合并审阅者建议的更改,请将编辑提交到您的分支,然后再次推送到分支(无需重新创建拉取请求,它将自动跟踪对分支的修改),例如: 54 | 55 | ```bash 56 | git add tests/test-changed-file.py 57 | git commit -m "test(sdk): Add a test case to address reviewer feedback" 58 | git push origin / 59 | ``` 60 | 61 | - 一旦您的拉取请求被审阅者批准,它将被合并到存储库的主分支中。 62 | 63 | ## 安装环境 64 | 65 | 打开您所使用的 python 环境,在根目录下执行以下命令 66 | 67 | ```bash 68 | pip install uv 69 | 70 | uv pip install -e . 71 | ``` 72 | -------------------------------------------------------------------------------- /docs/zh/tutorial.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2 | 3 | 欢迎使用 SpectrumLab!本教程将帮助你快速了解谱学分析、SpectrumLab 平台以及如何使用它来评估大语言模型在光谱学任务上的表现。 4 | 5 | ## 什么是谱学? 6 | 7 | 谱学(Spectroscopy)是研究物质与电磁辐射相互作用的科学分支。通过分析物质吸收、发射或散射的光谱,我们可以获得关于物质结构、组成和性质的详细信息。 8 | 9 | ## 谱学的重要性 10 | 11 | 谱学在现代科学中具有重要地位,它通过分析物质与电磁辐射的相互作用,为理解物质的组成、结构和性质提供了关键手段。在化学中,谱学用于分子结构解析和反应机理研究;在材料科学中,它能表征纳米材料并进行表面分析;在生物学中,则用于研究蛋白质折叠和代谢物检测。同时,谱学在临床医学中也被广泛应用,如通过光谱技术实现无创诊断和疾病早期检测,使其成为现代科学研究和应用中不可或缺的工具。 12 | 13 | ## 常见谱学技术 14 | 15 | - **红外光谱(IR)**:分析分子振动,识别官能团。IR 谱特征吸收峰(如 C=O、O–H、C–H 等)在特征频率范围内具有标志性,是判断官能团的核心工具 16 | - **核磁共振(NMR)**:通过化学位移、信号强度和偶合常数提供分子中原子环境和结构连接信息,常用于确定分子结构(尤其有机化合物) 17 | - **紫外-可见光谱(UV-Vis)**:研究分子的电子跃迁和共轭体系,尤其用于判断电子结构、共轭长度和光学性质,不直接提供结构连接信息 18 | - **质谱(MS)**:测定分子量并通过碎片组合推断分子结构,是判断分子组成和次级结构的重要工具 19 | - **拉曼光谱(Raman)**:提供分子振动信息,能识别与 IR 类似的化学键振动,尤其对对称分子和无极性键敏感,经常作为 IR 的互补方法 20 | - **HSQC 谱图**:一种二维 NMR(^1H–^13C 或 ^1H–^15N)实验,每个交叉点代表一个直接键连接的质子–杂核对,可用于明确 ^1H–^13C(或 ^15N)一键归属,辅助峰归属和结构解析 21 | 22 | ## 什么是 SpectrumLab? 23 | 24 | ### 概述 25 | 26 | SpectrumLab 是一个开创性的统一平台和综合工具包,为加速和系统化化学光谱学领域的深度学习研究而设计。它旨在简化从数据预处理到模型评估的整个 AI 驱动的光谱学研究生命周期,为研究人员和开发者提供一个模块化、可扩展且易于使用的 Python 库和工具生态系统,以推动光谱学领域的人工智能研究和应用。 27 | 28 | ### 核心功能 29 | 30 | #### 模块化与可扩展架构 31 | 32 | SpectrumLab 采用灵活的模块化设计,其核心组件包括: 33 | 34 | - **基准测试组 (Benchmark Group)**:将 SpectrumBench 数据集进行分层组织,支持多种光谱模态和任务类型,并允许用户根据需求灵活组合,创建定制化的评测任务 35 | - **模型集成 (Model Integration)**:提供统一的框架和标准化的 API,可以无缝接入和评测各类外部模型,无论是商业闭源模型(如 GPT-4o)还是本地部署的开源模型 36 | - **评估器 (Evaluator)**:作为评估引擎的核心,支持根据不同任务(如选择题、生成题)定制评估指标和协议,确保评估的严谨性和任务适应性 37 | 38 | #### 全面的工具链生态系统 39 | 40 | 提供一个通过 PyPI 分发的 Python 库,集成了数据处理、模型开发、自动评估和可视化等核心模块,极大地简化了整个研究工作流程。 41 | 42 | #### 自动化基准生成 (SpectrumAnnotator) 43 | 44 | 紧密集成了创新的 SpectrumAnnotator 组件,该组件能利用先进多模态大模型的推理能力,从种子数据集自动生成高质量、多样化的基准测试数据,高效构建评测任务。 45 | 46 | #### 公开排行榜 (Leaderboards) 47 | 48 | 为确保透明度和可复现性,SpectrumLab 建立了一个公开的排行榜系统。该系统系统地追踪和比较各类模型在所有 14 项任务上的性能表现,促进公平竞争和领域的共同进步。 49 | 50 | ## 相关链接 51 | 52 | - [API 参考](/zh/api) - 了解详细的接口说明和代码示例 53 | - [基准测试](/zh/benchmark) - 查看评估指标和数据集详情 54 | - [排行榜](https://huggingface.co/spaces/SpectrumWorld/SpectrumLeaderboard) - 查看模型性能对比 55 | -------------------------------------------------------------------------------- /tests/models/test_doubao_1_5_vision_pro.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import Doubao_1_5_Vision_Pro 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_doubao_1_5_vision_pro_text_generation(): 8 | model = Doubao_1_5_Vision_Pro() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_doubao_1_5_vision_pro_multimodal_generation(): 16 | model = Doubao_1_5_Vision_Pro() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_doubao_1_5_vision_pro_signalgroup_evaluation(): 34 | model = Doubao_1_5_Vision_Pro() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_doubao_1_5_vision_pro_signalgroup_evaluation_parallel(): 44 | model = Doubao_1_5_Vision_Pro() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] 51 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | /** 文件夹材质 */ 3 | "workbench.iconTheme": "material-icon-theme", 4 | /* 自定义图标关联 */ 5 | "material-icon-theme.folders.associations": { 6 | "leaderboard": "Secure", 7 | "evaluator": "Plastic", 8 | "spectrumlab": "Api", 9 | }, 10 | "material-icon-theme.files.associations": {}, 11 | "editor.codeActionsOnSave": { 12 | "source.fixAll.eslint": "explicit" 13 | }, 14 | "editor.formatOnSave": true, 15 | "eslint.format.enable": true, 16 | "[python]": { 17 | "editor.defaultFormatter": "ms-python.black-formatter", 18 | "editor.formatOnSave": true 19 | }, 20 | /** TODO tree 配置 */ 21 | "todo-tree.general.tags": [ 22 | "TODO", // 待办 23 | "FIXME", // 待修复 24 | "COMPAT", // 兼容性问题 25 | "WARNING" // 警告 26 | ], 27 | "todo-tree.highlights.customHighlight": { 28 | "TODO": { 29 | "icon": "check", 30 | "type": "tag", 31 | "foreground": "#ffff00", 32 | "iconColour": "#ffff" 33 | }, 34 | "WARNING": { 35 | "icon": "alert", 36 | "type": "tag", 37 | "foreground": "#ff0000", 38 | "iconColour": "#ff0000" 39 | }, 40 | "FIXME": { 41 | "icon": "flame", 42 | "type": "tag", 43 | "foreground": "#ff0000", 44 | "iconColour": "#ff0000" 45 | }, 46 | "COMPAT": { 47 | "icon": "flame", 48 | "type": "tag", 49 | "foreground": "#00ff00", 50 | "iconColour": "#ffff" 51 | } 52 | }, 53 | /** python代码注释 */ 54 | "autoDocstring.docstringFormat": "numpy", 55 | /** markdown格式检查 */ 56 | "markdownlint.config": { 57 | // 允许使用html标签 58 | "MD033": false, 59 | // 允许首行不是level1标题 60 | "MD041": false 61 | }, 62 | /** 不显示文件夹 */ 63 | "files.exclude": { 64 | "**/.git": true, 65 | "**/.DS_Store": true, 66 | "**/__pycache__": true, 67 | ".idea": true 68 | }, 69 | "python.testing.pytestEnabled": true 70 | } -------------------------------------------------------------------------------- /tests/models/test_doubao_1_5_vision_pro_thinking.py: -------------------------------------------------------------------------------- 1 | from spectrumlab.models import Doubao_1_5_Vision_Pro_Thinking 2 | from spectrumlab.utils.image_utils import encode_image_to_base64 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 5 | 6 | 7 | def test_doubao_1_5_vision_pro_thinking_text_generation(): 8 | model = Doubao_1_5_Vision_Pro_Thinking() 9 | prompt = "What is spectroscopy?" 10 | response = model.generate(prompt) 11 | assert isinstance(response, str) 12 | assert len(response) > 0 13 | 14 | 15 | def test_doubao_1_5_vision_pro_thinking_multimodal_generation(): 16 | model = Doubao_1_5_Vision_Pro_Thinking() 17 | image_path = "playground/models/test.jpg" 18 | image_base64 = encode_image_to_base64(image_path) 19 | prompt = { 20 | "text": "Please explain this spectroscopy image.", 21 | "images": [ 22 | { 23 | "type": "image_url", 24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"}, 25 | } 26 | ], 27 | } 28 | response = model.generate(prompt) 29 | assert isinstance(response, str) 30 | assert len(response) > 0 31 | 32 | 33 | def test_doubao_1_5_vision_pro_thinking_signalgroup_evaluation(): 34 | model = Doubao_1_5_Vision_Pro_Thinking() 35 | signal_group = SignalGroup("data") 36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 37 | evaluator = ChoiceEvaluator() 38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None) 39 | assert "metrics" in results 40 | assert "overall" in results["metrics"] 41 | 42 | 43 | def test_doubao_1_5_vision_pro_thinking_signalgroup_evaluation_parallel(): 44 | model = Doubao_1_5_Vision_Pro_Thinking() 45 | signal_group = SignalGroup("data") 46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 47 | evaluator = ChoiceEvaluator() 48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None) 49 | assert "metrics" in results 50 | assert "overall" in results["metrics"] 51 | -------------------------------------------------------------------------------- /docs/.vitepress/config.mts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vitepress' 2 | import { en } from './en' 3 | import { zh } from './zh' 4 | 5 | // https://vitepress.dev/reference/site-config 6 | export default defineConfig({ 7 | base: '/SpectrumLab/', 8 | 9 | rewrites: { 10 | 'zh/:rest*': ':rest*' 11 | }, 12 | 13 | cleanUrls: true, 14 | 15 | // 全局共享配置 16 | themeConfig: { 17 | socialLinks: [ 18 | { icon: 'github', link: 'https://github.com/little1d/spectrumlab' } 19 | ], 20 | 21 | lastUpdated: { 22 | text: "最后更新于", 23 | formatOptions: { 24 | dateStyle: 'full', 25 | timeStyle: 'medium', 26 | }, 27 | }, 28 | 29 | search: { 30 | provider: 'local', 31 | options: { 32 | locales: { 33 | root: { 34 | translations: { 35 | button: { 36 | buttonText: '搜索文档', 37 | buttonAriaLabel: '搜索文档', 38 | }, 39 | modal: { 40 | noResultsText: '无法找到相关结果', 41 | resetButtonTitle: '清除查询条件', 42 | footer: { 43 | selectText: '选择', 44 | navigateText: '切换', 45 | closeText: '关闭', 46 | } 47 | } 48 | } 49 | }, 50 | en: { 51 | translations: { 52 | button: { 53 | buttonText: 'Search', 54 | buttonAriaLabel: 'Search', 55 | }, 56 | modal: { 57 | noResultsText: 'No results found', 58 | resetButtonTitle: 'Clear search criteria', 59 | footer: { 60 | selectText: 'to select', 61 | navigateText: 'to navigate', 62 | closeText: 'to close', 63 | } 64 | } 65 | } 66 | } 67 | } 68 | } 69 | }, 70 | }, 71 | 72 | markdown: { 73 | image: { 74 | lazyLoading: true, 75 | } 76 | }, 77 | 78 | // 国际化配置 79 | locales: { 80 | root: { label: '简体中文', lang: 'zh-CN', ...zh }, 81 | en: { label: 'English', lang: 'en-US', ...en }, 82 | }, 83 | }) 84 | -------------------------------------------------------------------------------- /docs/.vitepress/en.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vitepress' 2 | 3 | // https://vitepress.dev/reference/site-config 4 | export const en = defineConfig({ 5 | lang: 'en-US', 6 | title: "SpectrumLab", 7 | description: "A pioneering unified platform designed to systematize and accelerate deep learning research in spectroscopy", 8 | head: [ 9 | ['link', { rel: 'icon', type: 'image/svg+xml', href: '/logo.svg' }] 10 | ], 11 | themeConfig: { 12 | siteTitle: "SpectrumLab", 13 | logo: { 14 | src: '/logo.svg', 15 | }, 16 | nav: [ 17 | { text: 'Tutorial', link: '/en/tutorial' }, 18 | { text: 'API', link: '/en/api' }, 19 | { text: 'Benchmark', link: '/en/benchmark' }, 20 | { text: 'Leaderboard', link: 'https://huggingface.co/spaces/SpectrumWorld/SpectrumLeaderboard' }, 21 | ], 22 | sidebar: { 23 | '/en/': [ 24 | { 25 | text: 'Getting Started', 26 | items: [ 27 | { text: 'Introduction', link: '/en/' }, 28 | { text: 'Tutorial', link: '/en/tutorial' }, 29 | ] 30 | }, 31 | { 32 | text: 'Documentation', 33 | items: [ 34 | { text: 'API Reference', link: '/en/api' }, 35 | { text: 'Benchmark', link: '/en/benchmark' }, 36 | { text: 'Leaderboard', link: 'https://huggingface.co/spaces/SpectrumWorld/SpectrumLeaderboard' }, 37 | ] 38 | } 39 | ] 40 | }, 41 | footer: { 42 | message: 'Released under the MIT License', 43 | copyright: 'Copyright © 2025 SpectrumLab' 44 | }, 45 | docFooter: { 46 | prev: 'Previous page', 47 | next: 'Next page' 48 | }, 49 | 50 | outline: { 51 | label: 'On this page' 52 | }, 53 | 54 | lastUpdated: { 55 | text: 'Last updated' 56 | }, 57 | 58 | darkModeSwitchLabel: 'Appearance', 59 | lightModeSwitchTitle: 'Switch to light theme', 60 | darkModeSwitchTitle: 'Switch to dark theme', 61 | } 62 | }) -------------------------------------------------------------------------------- /leaderboard/vue/src/assets/base.css: -------------------------------------------------------------------------------- 1 | /* color palette from */ 2 | :root { 3 | --vt-c-white: #ffffff; 4 | --vt-c-white-soft: #f8f8f8; 5 | --vt-c-white-mute: #f2f2f2; 6 | 7 | --vt-c-black: #181818; 8 | --vt-c-black-soft: #222222; 9 | --vt-c-black-mute: #282828; 10 | 11 | --vt-c-indigo: #2c3e50; 12 | 13 | --vt-c-divider-light-1: rgba(60, 60, 60, 0.29); 14 | --vt-c-divider-light-2: rgba(60, 60, 60, 0.12); 15 | --vt-c-divider-dark-1: rgba(84, 84, 84, 0.65); 16 | --vt-c-divider-dark-2: rgba(84, 84, 84, 0.48); 17 | 18 | --vt-c-text-light-1: var(--vt-c-indigo); 19 | --vt-c-text-light-2: rgba(60, 60, 60, 0.66); 20 | --vt-c-text-dark-1: var(--vt-c-white); 21 | --vt-c-text-dark-2: rgba(235, 235, 235, 0.64); 22 | } 23 | 24 | /* semantic color variables for this project */ 25 | :root { 26 | --color-background: var(--vt-c-white); 27 | --color-background-soft: var(--vt-c-white-soft); 28 | --color-background-mute: var(--vt-c-white-mute); 29 | 30 | --color-border: var(--vt-c-divider-light-2); 31 | --color-border-hover: var(--vt-c-divider-light-1); 32 | 33 | --color-heading: var(--vt-c-text-light-1); 34 | --color-text: var(--vt-c-text-light-1); 35 | 36 | --section-gap: 160px; 37 | } 38 | 39 | @media (prefers-color-scheme: dark) { 40 | :root { 41 | --color-background: var(--vt-c-black); 42 | --color-background-soft: var(--vt-c-black-soft); 43 | --color-background-mute: var(--vt-c-black-mute); 44 | 45 | --color-border: var(--vt-c-divider-dark-2); 46 | --color-border-hover: var(--vt-c-divider-dark-1); 47 | 48 | --color-heading: var(--vt-c-text-dark-1); 49 | --color-text: var(--vt-c-text-dark-2); 50 | } 51 | } 52 | 53 | *, 54 | *::before, 55 | *::after { 56 | box-sizing: border-box; 57 | margin: 0; 58 | font-weight: normal; 59 | } 60 | 61 | body { 62 | min-height: 100vh; 63 | color: var(--color-text); 64 | background: var(--color-background); 65 | transition: 66 | color 0.5s, 67 | background-color 0.5s; 68 | line-height: 1.6; 69 | font-family: 70 | Inter, 71 | -apple-system, 72 | BlinkMacSystemFont, 73 | 'Segoe UI', 74 | Roboto, 75 | Oxygen, 76 | Ubuntu, 77 | Cantarell, 78 | 'Fira Sans', 79 | 'Droid Sans', 80 | 'Helvetica Neue', 81 | sans-serif; 82 | font-size: 15px; 83 | text-rendering: optimizeLegibility; 84 | -webkit-font-smoothing: antialiased; 85 | -moz-osx-font-smoothing: grayscale; 86 | } 87 | -------------------------------------------------------------------------------- /run_evaluation.py: -------------------------------------------------------------------------------- 1 | # import swanlab 2 | from spectrumlab.models import GPT4_1 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.benchmark.generation_group import GenerationGroup 5 | from spectrumlab.benchmark.perception_group import PerceptionGroup 6 | from spectrumlab.benchmark.semantic_group import SemanticGroup 7 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 8 | from spectrumlab.evaluator.open_evaluator import OpenEvaluator 9 | 10 | # export your swanlab api-key 11 | 12 | # Change your model! 13 | # MODEL = GPT4o() 14 | MODEL = GPT4_1() 15 | 16 | 17 | # Change this!!! such as gpt-4o_evaluation_results 18 | SAVE_DIR = "./gpt4_1_generation_evaluation_results" 19 | 20 | # 定义每个 Group 及其子任务和评测器 21 | GROUPS = [ 22 | { 23 | "name": "Signal", 24 | "group": SignalGroup("data"), 25 | "evaluator": ChoiceEvaluator(), 26 | "subcategories": None, # None 表示全部 27 | }, 28 | { 29 | "name": "Perception", 30 | "group": PerceptionGroup("data"), 31 | "evaluator": ChoiceEvaluator(), 32 | "subcategories": None, 33 | }, 34 | { 35 | "name": "Semantic", 36 | "group": SemanticGroup("data"), 37 | "evaluator": ChoiceEvaluator(), 38 | "subcategories": None, 39 | }, 40 | { 41 | "name": "Generation", 42 | "group": GenerationGroup("data"), 43 | "evaluator": OpenEvaluator(), 44 | "subcategories": None, 45 | }, 46 | ] 47 | 48 | # Change the experiment_name to your model name!!! 49 | # swanlab.init( 50 | # workspace="SpectrumLab", 51 | # project="spectrumlab-eval", 52 | # experiment_name="gpt_4_1_generation_evaluation_results", 53 | # config={"model": MODEL.model_name}, 54 | # ) 55 | 56 | for group_info in GROUPS: 57 | name = group_info["name"] 58 | group = group_info["group"] 59 | evaluator = group_info["evaluator"] 60 | subcategories = group_info["subcategories"] 61 | print(f"\n===== Evaluating {name} Group =====") 62 | data = group.get_data_by_subcategories(subcategories or "all") 63 | results = evaluator.evaluate(data_items=data, model=MODEL, save_path=SAVE_DIR) 64 | accuracy = results["metrics"]["overall"]["accuracy"] 65 | print(f"{name} Group evaluation completed! Overall accuracy: {accuracy:.2f}%\n") 66 | # swanlab.log({f"{name}_accuracy": accuracy}) 67 | 68 | # swanlab.finish() 69 | 70 | # use nohup in the terminal to start the evaluation 71 | # nohup python run_evaluation.py > run_eval.log 2>&1 & 72 | -------------------------------------------------------------------------------- /docs/.vitepress/zh.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vitepress' 2 | 3 | // https://vitepress.dev/reference/site-config 4 | export const zh = defineConfig({ 5 | lang: 'zh-CN', 6 | title: "SpectrumLab", 7 | description: "A pioneering unified platform designed to systematize and accelerate deep learning research in spectroscopy.", 8 | 9 | head: [ 10 | ['link', { rel: 'icon', type: 'image/svg+xml', href: '/logo.svg' }] 11 | // ['link', { rel: 'preconnect', href: 'https://fonts.googleapis.com' }], 12 | // [ 13 | // 'link', 14 | // { rel: 'preconnect', href: 'https://fonts.gstatic.com', crossorigin: '' } 15 | // ], 16 | // [ 17 | // 'link', 18 | // { href: 'https://fonts.googleapis.com/css2?family=Roboto&display=swap', rel: 'stylesheet' } 19 | // ] 20 | ], 21 | 22 | themeConfig: { 23 | siteTitle: "SpectrumLab", 24 | logo: { 25 | src: '/logo.svg', 26 | }, 27 | nav: [ 28 | { text: '教程', link: '/tutorial' }, 29 | { text: 'API', link: '/api' }, 30 | { text: '基准测试', link: '/benchmark' }, 31 | { text: '排行榜', link: 'https://huggingface.co/spaces/SpectrumWorld/SpectrumLeaderboard' }, 32 | ], 33 | 34 | sidebar: { 35 | '/': [ 36 | { 37 | text: '开始使用', 38 | items: [ 39 | { text: '介绍', link: '/' }, 40 | { text: '教程', link: '/tutorial' }, 41 | ] 42 | }, 43 | { 44 | text: '文档', 45 | items: [ 46 | { text: 'API 参考', link: '/api' }, 47 | { text: '基准测试', link: '/benchmark' }, 48 | { text: '排行榜', link: 'https://huggingface.co/spaces/SpectrumWorld/SpectrumLeaderboard' }, 49 | ] 50 | } 51 | ] 52 | }, 53 | footer: { 54 | message: '基于 MIT 许可发布', 55 | copyright: 'Copyright © 2025 SpectrumLab' 56 | }, 57 | docFooter: { 58 | prev: '上一页', 59 | next: '下一页' 60 | }, 61 | outline: { 62 | label: '页面导航' 63 | }, 64 | lastUpdated: { 65 | text: '最后更新于' 66 | }, 67 | darkModeSwitchLabel: '主题', 68 | lightModeSwitchTitle: '切换到浅色模式', 69 | darkModeSwitchTitle: '切换到深色模式', 70 | } 71 | }) 72 | -------------------------------------------------------------------------------- /spectrumlab/utils/image_utils.py: -------------------------------------------------------------------------------- 1 | import base64 2 | from pathlib import Path 3 | from typing import List, Dict, Optional, Any, Union 4 | 5 | 6 | def encode_image_to_base64(image_path: str) -> str: 7 | try: 8 | with open(image_path, "rb") as image_file: 9 | return base64.b64encode(image_file.read()).decode("utf-8") 10 | except Exception as e: 11 | raise ValueError(f"Failed to encode image to base64: {e}") 12 | 13 | 14 | def get_image_mime_type(image_path: str) -> str: 15 | path = Path(image_path) 16 | extension = path.suffix.lower() 17 | 18 | mime_type = { 19 | ".png": "image/png", 20 | ".jpg": "image/jpeg", 21 | ".jpeg": "image/jpeg", 22 | ".gif": "image/gif", 23 | ".bmp": "image/bmp", 24 | ".webp": "image/webp", 25 | } 26 | 27 | return mime_type.get(extension, "image/jpeg") 28 | 29 | 30 | def prepare_images_for_prompt( 31 | image_paths: Union[str, List[str], None], 32 | ) -> List[Dict[str, Any]]: 33 | if not image_paths: 34 | return [] 35 | 36 | # Ensure it's a list format 37 | if isinstance(image_paths, str): 38 | image_paths = [image_paths] 39 | 40 | image_data = [] 41 | for image_path in image_paths: 42 | if not image_path or not image_path.strip(): 43 | continue 44 | 45 | path = Path(image_path) 46 | if not path.exists(): 47 | print(f"⚠️ Warning: Image file not found: {image_path}") 48 | continue 49 | 50 | try: 51 | base64_image = encode_image_to_base64(image_path) 52 | mime_type = get_image_mime_type(image_path) 53 | 54 | image_info = { 55 | "type": "image_url", 56 | "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}, 57 | } 58 | image_data.append(image_info) 59 | 60 | except Exception as e: 61 | print(f"⚠️ Warning: Failed to process image {image_path}: {e}") 62 | continue 63 | 64 | return image_data 65 | 66 | 67 | def normalize_image_paths(image_paths_field: Any) -> Optional[List[str]]: 68 | if not image_paths_field: 69 | return None 70 | if isinstance(image_paths_field, str): 71 | if image_paths_field.strip() == "": 72 | return None 73 | return [image_paths_field.strip()] 74 | if isinstance(image_paths_field, list): 75 | # 递归处理每个元素,保证都是字符串 76 | paths = [] 77 | for p in image_paths_field: 78 | if isinstance(p, str) and p.strip(): 79 | paths.append(p.strip()) 80 | return paths if paths else None 81 | return None 82 | -------------------------------------------------------------------------------- /spectrumlab/models/deepseek_vl.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Union, Dict, Any 2 | from .base_api import BaseAPIModel 3 | from spectrumlab.config import Config 4 | from openai import OpenAI 5 | 6 | 7 | class DeepSeek_VL2(BaseAPIModel): 8 | def __init__( 9 | self, 10 | api_key: Optional[str] = None, 11 | base_url: Optional[str] = None, 12 | model_name: Optional[str] = None, 13 | **kwargs, 14 | ): 15 | config = Config() 16 | 17 | # Use provided parameters or fall back to config 18 | self.api_key = api_key or config.deepseek_vl_2_api_key 19 | self.base_url = base_url or config.deepseek_vl_2_base_url 20 | self.model_name = model_name or config.deepseek_vl_2_model_name 21 | 22 | # Validate that we have required configuration 23 | if not self.api_key: 24 | raise ValueError( 25 | "InternVL API key not found. Please set INTERNVL_API_KEY in your .env file " 26 | "or provide api_key parameter." 27 | ) 28 | 29 | self.client = OpenAI( 30 | api_key=self.api_key, 31 | base_url=self.base_url, 32 | ) 33 | 34 | # Initialize parent class 35 | super().__init__(model_name=self.model_name, **kwargs) 36 | 37 | def generate( 38 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512 39 | ) -> str: 40 | """ 41 | Generate response supporting both text and multimodal input. 42 | 43 | Args: 44 | prompt: Either text string or multimodal dict 45 | max_tokens: Maximum tokens to generate 46 | 47 | Returns: 48 | Generated response string 49 | """ 50 | 51 | # Link: https://internlm.intern-ai.org.cn/api/document 52 | messages = [] 53 | 54 | if isinstance(prompt, dict) and "images" in prompt: 55 | content = [] 56 | 57 | content.append({"type": "text", "text": prompt["text"]}) 58 | 59 | for image_data in prompt["images"]: 60 | content.append(image_data) 61 | 62 | messages.append({"role": "user", "content": content}) 63 | else: 64 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "") 65 | messages.append({"role": "user", "content": text_content}) 66 | 67 | try: 68 | response = self.client.chat.completions.create( 69 | model=self.model_name, 70 | messages=messages, 71 | max_tokens=max_tokens, 72 | ) 73 | return response.choices[0].message.content 74 | except Exception as e: 75 | raise RuntimeError(f"InternVL API call failed: {e}") 76 | -------------------------------------------------------------------------------- /spectrumlab/models/gpt4o_api.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, Optional, Union 2 | from .base_api import BaseAPIModel 3 | from spectrumlab.config import Config 4 | from openai import OpenAI 5 | 6 | 7 | class GPT4o(BaseAPIModel): 8 | def __init__( 9 | self, 10 | api_key: Optional[str] = None, 11 | base_url: Optional[str] = None, 12 | model_name: Optional[str] = None, 13 | **kwargs, 14 | ): 15 | config = Config() 16 | 17 | # Use provided parameters or fall back to config 18 | self.api_key = api_key or config.gpt4o_api_key 19 | self.base_url = base_url or config.gpt4o_base_url 20 | self.model_name = model_name or config.gpt4o_model_name 21 | 22 | # Validate that we have required configuration 23 | if not self.api_key: 24 | raise ValueError( 25 | "GPT-4o API key not found. Please set GPT4O_API_KEY in your .env file " 26 | "or provide api_key parameter." 27 | ) 28 | 29 | self.client = OpenAI( 30 | api_key=self.api_key, 31 | base_url=self.base_url, 32 | ) 33 | 34 | # Initialize parent class 35 | super().__init__(model_name=self.model_name, **kwargs) 36 | 37 | def generate( 38 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512 39 | ) -> str: 40 | """ 41 | Generate response supporting both text and multimodal input. 42 | 43 | Args: 44 | prompt: Either text string or multimodal dict 45 | max_tokens: Maximum tokens to generate 46 | 47 | Returns: 48 | Generated response string 49 | """ 50 | messages = [] 51 | 52 | # Handle multimodal vs text-only prompts 53 | if isinstance(prompt, dict) and "images" in prompt: 54 | # Multimodal prompt 55 | content = [] 56 | 57 | content.append({"type": "text", "text": prompt["text"]}) 58 | 59 | for image_data in prompt["images"]: 60 | content.append(image_data) 61 | 62 | messages.append({"role": "user", "content": content}) 63 | else: 64 | # Text-only prompt 65 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "") 66 | messages.append({"role": "user", "content": text_content}) 67 | 68 | try: 69 | response = self.client.chat.completions.create( 70 | model=self.model_name, 71 | messages=messages, 72 | max_tokens=max_tokens, 73 | ) 74 | return response.choices[0].message.content 75 | except Exception as e: 76 | raise RuntimeError(f"GPT-4o API call failed: {e}") 77 | -------------------------------------------------------------------------------- /spectrumlab/models/grok_api.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, Optional, Union 2 | from .base_api import BaseAPIModel 3 | from spectrumlab.config import Config 4 | from openai import OpenAI 5 | 6 | 7 | class Grok_2_Vision(BaseAPIModel): 8 | def __init__( 9 | self, 10 | api_key: Optional[str] = None, 11 | base_url: Optional[str] = None, 12 | model_name: Optional[str] = None, 13 | **kwargs, 14 | ): 15 | config = Config() 16 | 17 | # Use provided parameters or fall back to config 18 | self.api_key = api_key or config.grok_2_vision_api_key 19 | self.base_url = base_url or config.grok_2_vision_base_url 20 | self.model_name = model_name or config.grok_2_vision_model_name 21 | 22 | # Validate that we have required configuration 23 | if not self.api_key: 24 | raise ValueError( 25 | "Grok-2-Vision API key not found. Please set GROK_2_VISION_API_KEY in your .env file " 26 | "or provide api_key parameter." 27 | ) 28 | 29 | self.client = OpenAI( 30 | api_key=self.api_key, 31 | base_url=self.base_url, 32 | ) 33 | 34 | # Initialize parent class 35 | super().__init__(model_name=self.model_name, **kwargs) 36 | 37 | def generate( 38 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512 39 | ) -> str: 40 | """ 41 | Generate response supporting both text and multimodal input. 42 | 43 | Args: 44 | prompt: Either text string or multimodal dict 45 | max_tokens: Maximum tokens to generate 46 | 47 | Returns: 48 | Generated response string 49 | """ 50 | messages = [] 51 | 52 | # Handle multimodal vs text-only prompts 53 | if isinstance(prompt, dict) and "images" in prompt: 54 | # Multimodal prompt 55 | content = [] 56 | 57 | content.append({"type": "text", "text": prompt["text"]}) 58 | 59 | for image_data in prompt["images"]: 60 | content.append(image_data) 61 | 62 | messages.append({"role": "user", "content": content}) 63 | else: 64 | # Text-only prompt 65 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "") 66 | messages.append({"role": "user", "content": text_content}) 67 | 68 | try: 69 | response = self.client.chat.completions.create( 70 | model=self.model_name, 71 | messages=messages, 72 | max_tokens=max_tokens, 73 | ) 74 | return response.choices[0].message.content 75 | except Exception as e: 76 | raise RuntimeError(f"Grok-2-Vision API call failed: {e}") 77 | -------------------------------------------------------------------------------- /docs/en/benchmark.md: -------------------------------------------------------------------------------- 1 | # Benchmark 2 | ## Benchmark Overview 3 | 4 | The Benchmark of SpectrumLab adopts a hierarchical architecture design, comprehensively evaluating the model's capabilities in spectroscopy tasks from signal processing to advanced semantic understanding. The benchmark test consists of four main levels, with each level containing multiple sub - tasks, suitable for different types of spectral analysis. 5 | 6 | ## Benchmark Details 7 | 8 | ### 1. Signal layer(Signal Level) 9 | 10 | Basic spectral signal processing and analysis, including the following subtasks: 11 | 12 | - **Spectrum Type Classification**:Identify different types of spectra (infrared, nuclear magnetic resonance, Raman, etc.). 13 | - **Spectrum Quality Assessment**:Identify whether the spectrogram is clear, complete, and whether there is obvious noise. 14 | - **Basic Feature Extraction**:Identify basic features such as baselines, peaks, peak positions, and peak intensities in the spectrogram. 15 | - **Impurity Peak Detection**:Identify impurity peaks and abnormal signals in the spectrogram. 16 | 17 | ### 2. Perception Level 18 | 19 | Further spectral visual understanding and pattern recognition, covering: 20 | 21 | - **Basic Property Prediction**:Predict properties directly related to molecular ion peaks, solubility, acidity and alkalinity based on spectral graph features. 22 | - **Elemental Compositional Prediction**:Identify elemental composition and isotope patterns from mass spectrometry, etc. 23 | - **Functional Group Recognition**:Predict the possible functional groups of a molecule based on spectral characteristics (especially characteristic peak positions). 24 | - **Peak Assignment**:Preliminarily assign the main peaks in the spectrum to corresponding chemical groups. 25 | 26 | ### 3. Semantic Level 27 | 28 | Deep spectral semantic understanding and chemical knowledge reasoning, including: 29 | 30 | - **Fusing Spectroscopic Modalitie)**:Make comprehensive judgments by combining multiple spectral or molecular information. 31 | - **Molecular Structure Elucidation**:Match the correct molecular structure from multiple candidates based on spectral information. 32 | - **Multimodal Molecular Reasoning**:Conduct complex chemical reasoning and answering based on spectral and textual information. 33 | 34 | ### 4. Generation Level 35 | 36 | Generate new chemical information creatively. The main tasks are: 37 | 38 | - **Forward Problems**:Infer the molecular structure from spectra, SMILES, or a combination of both. 39 | - **Inverse Problems**:Generate spectra, SMILES, etc. for molecular structures. 40 | - **De Novo Generation**:Generate novel, diverse, and reasonable molecular structures (SMILES, 2D diagrams) and/or predicted multimodal information (spectra, properties) from scratch according to specific targets, such as molecules with specific properties or ligands for specific targets. 41 | -------------------------------------------------------------------------------- /spectrumlab/models/internvl_api.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Union, Dict, Any 2 | from .base_api import BaseAPIModel 3 | from spectrumlab.config import Config 4 | from openai import OpenAI 5 | 6 | 7 | class InternVL(BaseAPIModel): 8 | def __init__( 9 | self, 10 | api_key: Optional[str] = None, 11 | base_url: Optional[str] = None, 12 | model_name: Optional[str] = None, 13 | **kwargs, 14 | ): 15 | config = Config() 16 | 17 | # Use provided parameters or fall back to config 18 | self.api_key = api_key or config.internvl_api_key 19 | self.base_url = base_url or config.internvl_base_url 20 | self.model_name = model_name or config.internvl_model_name 21 | 22 | # Validate that we have required configuration 23 | if not self.api_key: 24 | raise ValueError( 25 | "InternVL API key not found. Please set INTERNVL_API_KEY in your .env file " 26 | "or provide api_key parameter." 27 | ) 28 | 29 | # Ensure base_url has proper protocol for OpenRouter/API services 30 | if self.base_url and not self.base_url.startswith(("http://", "https://")): 31 | self.base_url = f"https://{self.base_url}" 32 | 33 | self.client = OpenAI( 34 | api_key=self.api_key, 35 | base_url=self.base_url, 36 | ) 37 | 38 | # Initialize parent class 39 | super().__init__(model_name=self.model_name, **kwargs) 40 | 41 | def generate( 42 | self, 43 | prompt: Union[str, Dict[str, Any]], 44 | max_tokens: int = 512, 45 | **generation_kwargs, 46 | ) -> str: 47 | """ 48 | Generate response supporting both text and multimodal input. 49 | 50 | Args: 51 | prompt: Either text string or multimodal dict 52 | max_tokens: Maximum tokens to generate 53 | **generation_kwargs: Additional generation parameters like temperature, top_p, etc. 54 | 55 | Returns: 56 | Generated response string 57 | """ 58 | 59 | # Link: https://internlm.intern-ai.org.cn/api/document 60 | messages = [] 61 | 62 | if isinstance(prompt, dict) and "images" in prompt: 63 | content = [] 64 | 65 | content.append({"type": "text", "text": prompt["text"]}) 66 | 67 | for image_data in prompt["images"]: 68 | content.append(image_data) 69 | 70 | messages.append({"role": "user", "content": content}) 71 | else: 72 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "") 73 | messages.append({"role": "user", "content": text_content}) 74 | 75 | # Prepare API call parameters 76 | api_params = { 77 | "model": self.model_name, 78 | "messages": messages, 79 | "max_tokens": max_tokens, 80 | } 81 | 82 | # Add any additional generation parameters 83 | api_params.update(generation_kwargs) 84 | 85 | try: 86 | response = self.client.chat.completions.create(**api_params) 87 | return response.choices[0].message.content 88 | except Exception as e: 89 | raise RuntimeError(f"InternVL API call failed: {e}") 90 | -------------------------------------------------------------------------------- /spectrumlab/models/README.md: -------------------------------------------------------------------------------- 1 | # Model Integration & Testing Pipeline 2 | 3 | This guide explains how to quickly adapt and test a new multimodal model in SpectrumLab. 4 | 5 | ## 1. Environment Configuration (`.env`) 6 | 7 | Add your model's API keys and endpoints to the `.env` file at the project root. Example: 8 | 9 | ``` 10 | DEEPSEEK_API_KEY=your_deepseek_key 11 | DEEPSEEK_BASE_URL=https://api.deepseek.com 12 | DEEPSEEK_MODEL_NAME=deepseek-model 13 | 14 | GPT4O_API_KEY=your_gpt4o_key 15 | GPT4O_BASE_URL=https://api.gpt4o.com 16 | GPT4O_MODEL_NAME=gpt-4o 17 | 18 | INTERNVL_API_KEY=your_internvl_key 19 | INTERNVL_BASE_URL=https://api.internvl.com 20 | INTERNVL_MODEL_NAME=internvl-model 21 | ``` 22 | 23 | ## 2. Config Class (`@config`) 24 | 25 | Ensure your model's config is added to `spectrumlab/config/base_config.py`: 26 | 27 | ```python 28 | @dataclass 29 | class Config: 30 | ... 31 | yourmodel_api_key: str = os.getenv("YOURMODEL_API_KEY") 32 | yourmodel_base_url: str = os.getenv("YOURMODEL_BASE_URL") 33 | yourmodel_model_name: str = os.getenv("YOURMODEL_MODEL_NAME") 34 | ``` 35 | 36 | ## 3. Model Registration 37 | 38 | Implement your model in `spectrumlab/models/yourmodel_api.py` (inherit from `BaseAPIModel` or `BaseModel`). 39 | 40 | Register it in `spectrumlab/models/__init__.py`: 41 | 42 | ```python 43 | from .yourmodel_api import YourModel 44 | __all__ = [ ..., "YourModel" ] 45 | ``` 46 | 47 | ## 4. Add Test File 48 | 49 | Create a test file in `tests/models/test_yourmodel.py`. Example: 50 | 51 | ```python 52 | import pytest 53 | from spectrumlab.models import YourModel 54 | from spectrumlab.utils.image_utils import encode_image_to_base64 55 | from spectrumlab.benchmark.signal_group import SignalGroup 56 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 57 | 58 | def test_yourmodel_text_generation(): 59 | model = YourModel() 60 | response = model.generate("What is spectroscopy?") 61 | assert isinstance(response, str) 62 | assert len(response) > 0 63 | 64 | def test_yourmodel_multimodal_generation(): 65 | model = YourModel() 66 | image_path = "playground/models/test.png" 67 | image_base64 = encode_image_to_base64(image_path) 68 | prompt = { 69 | "text": "Please explain this spectroscopy image.", 70 | "images": [ 71 | {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}} 72 | ], 73 | } 74 | response = model.generate(prompt) 75 | assert isinstance(response, str) 76 | assert len(response) > 0 77 | 78 | def test_yourmodel_signalgroup_evaluation(): 79 | model = YourModel() 80 | signal_group = SignalGroup("data") 81 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 82 | evaluator = ChoiceEvaluator() 83 | results = evaluator.evaluate(data_items=data, model=model) 84 | assert "metrics" in results 85 | assert "overall" in results["metrics"] 86 | ``` 87 | 88 | ## 5. Run Tests 89 | 90 | From the project root, run: 91 | 92 | ``` 93 | pytest -s -v tests/models/test_yourmodel.py 94 | ``` 95 | 96 | Or run all model tests: 97 | 98 | ``` 99 | pytest -s -v tests/models/ 100 | ``` 101 | 102 | --- 103 | 104 | **Tip:** 105 | 106 | - Each model has its own test file for easy debugging and extension. 107 | - Add new models by following steps 1-4 above. 108 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | SpectrumLab 5 | 6 |

A pioneering unified platform designed to systematize and accelerate deep learning research in spectroscopy.

7 |
8 | 9 | ## 🚀 Quick Start 10 | 11 | ### Environment Setup 12 | 13 | We recommend using conda and uv for environment management: 14 | 15 | ```bash 16 | # Clone the repository 17 | git clone https://github.com/little1d/SpectrumLab.git 18 | cd SpectrumLab 19 | 20 | # Create conda environment 21 | conda create -n spectrumlab python=3.10 22 | conda activate spectrumlab 23 | 24 | pip install uv 25 | uv pip install -e . 26 | ``` 27 | 28 | ### Data Setup 29 | 30 | Download benchmark data from Hugging Face: 31 | 32 | - [SpectrumBench v1.0](https://huggingface.co/SpectrumWorld/spectrumbench_v_1.0/tree/main) 33 | 34 | Extract the data to the `data` directory in the project root. 35 | 36 | ### API Keys Configuration 37 | 38 | ```bash 39 | # Copy and edit environment configuration 40 | cp .env.example .env 41 | # Configure your API keys in the .env file 42 | ``` 43 | 44 | ## 💻 Usage 45 | 46 | ### Python API 47 | 48 | ```python 49 | from spectrumlab.benchmark import get_benchmark_group 50 | from spectrumlab.models import GPT4o 51 | from spectrumlab.evaluator import get_evaluator 52 | 53 | # Load benchmark data 54 | benchmark = get_benchmark_group("perception") 55 | data = benchmark.get_data_by_subcategories("all") 56 | 57 | # Initialize model 58 | model = GPT4o() 59 | 60 | # Get evaluator 61 | evaluator = get_evaluator("perception") 62 | 63 | # Run evaluation 64 | results = evaluator.evaluate( 65 | data_items=data, 66 | model=model, 67 | save_path="./results" 68 | ) 69 | 70 | print(f"Overall accuracy: {results['metrics']['overall']['accuracy']:.2f}%") 71 | ``` 72 | 73 | ### Command Line Interface 74 | 75 | The CLI provides a simple way to run evaluations: 76 | 77 | ```bash 78 | # Basic evaluation 79 | spectrumlab eval --model gpt4o --level perception 80 | 81 | # Specify data path and output directory 82 | spectrumlab eval --model claude --level signal --data-path ./data --output ./my_results 83 | 84 | # Evaluate specific subcategories 85 | spectrumlab eval --model deepseek --level semantic --subcategories "IR_spectroscopy" "Raman_spectroscopy" 86 | 87 | # Customize output length 88 | spectrumlab eval --model internvl --level generation --max-length 1024 89 | 90 | # Get help 91 | spectrumlab eval --help 92 | ``` 93 | 94 | ## 🤝 Contributing 95 | 96 | We welcome community contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines. 97 | 98 | ## Citation 99 | 100 | If you use SpectrumLab in your research, please cite our paper: 101 | 102 | ```bibtex 103 | @article{yang2025spectrumworldartificialintelligencefoundation, 104 | title={SpectrumWorld: Artificial Intelligence Foundation for Spectroscopy}, 105 | author={Zhuo Yang and Jiaqing Xie and Shuaike Shen and Daolang Wang and Yeyun Chen and Ben Gao and Shuzhou Sun and Biqing Qi and Dongzhan Zhou and Lei Bai and Linjiang Chen and Shufei Zhang and Qinying Gu and Jun Jiang and Tianfan Fu and Yuqiang Li}, 106 | year={2025}, 107 | eprint={2508.01188}, 108 | archivePrefix={arXiv}, 109 | primaryClass={cs.LG}, 110 | url={https://arxiv.org/abs/2508.01188}, 111 | } 112 | ``` 113 | 114 | ## Acknowledgments 115 | 116 | - **Experiment Tracking**: [SwanLab](https://github.com/SwanHubX/SwanLab/) for experiment management and visualization 117 | - **Choice Evaluator Framework**: Inspired by [MMAR](https://github.com/ddlBoJack/MMAR) 118 | -------------------------------------------------------------------------------- /leaderboard/vue/src/components/TheWelcome.vue: -------------------------------------------------------------------------------- 1 | 11 | 12 | 95 | -------------------------------------------------------------------------------- /spectrumlab/evaluator/choice_evaluator.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import List, Dict 3 | from .base import BaseEvaluator 4 | from spectrumlab.utils.image_utils import ( 5 | prepare_images_for_prompt, 6 | normalize_image_paths, 7 | ) 8 | 9 | 10 | class ChoiceEvaluator(BaseEvaluator): 11 | def __init__(self, prediction_key: str = "model_prediction"): 12 | super().__init__(prediction_key) 13 | 14 | def _build_prompt(self, item: Dict) -> str: 15 | question = item.get("question", "") 16 | choices = item.get("choices", []) 17 | image_paths_field = item.get("image_path") 18 | 19 | option_lines = [f"{chr(65 + i)}. {choice}" for i, choice in enumerate(choices)] 20 | options_block = "\n".join(option_lines) 21 | 22 | text_parts = [ 23 | f"Question: {question}", 24 | "", 25 | "Available options:", 26 | options_block, 27 | "", 28 | "Please analyze the question and options carefully. Your answer must be exactly one of the provided options, and must be copied verbatim from the options above.", 29 | "Return your answer using the format \\answer{...}, where the content inside the braces is exactly the text of your chosen option (not the option letter or number, and do not use \\box{} or any other wrapper).", 30 | "For example, if you choose the option '~1700 cm⁻¹', you should return: \\answer{~1700 cm⁻¹}", 31 | "Do not return just a value like '~1700 cm' or any partial/incomplete answer. The answer must match one of the options exactly.", 32 | "", 33 | "Your response:", 34 | ] 35 | 36 | text_content = "\n".join(text_parts) 37 | 38 | # Check if there are images 39 | image_paths = normalize_image_paths(image_paths_field) 40 | 41 | if image_paths: 42 | assert all( 43 | isinstance(p, str) for p in image_paths 44 | ), f"image_paths should be List[str], got {image_paths}" 45 | # Prepare image data 46 | image_data = prepare_images_for_prompt(image_paths) 47 | 48 | if image_data: 49 | # Return multimodal format 50 | return {"text": text_content, "images": image_data} 51 | 52 | # Return pure text format 53 | return text_content 54 | 55 | def _extract_prediction(self, response: str, item: Dict) -> str: 56 | """只提取 \\answer{...} 内的内容""" 57 | if not response: 58 | return "" 59 | answer_pattern = r"\\answer\{([^}]+)\}" 60 | matches = re.findall(answer_pattern, response) 61 | if matches: 62 | return matches[-1].strip() 63 | return "" 64 | 65 | def _calculate_accuracy(self, answer: str, prediction: str, item: Dict) -> bool: 66 | """Calculate accuracy using string matching from MMAR.""" 67 | choices = item.get("choices", []) 68 | return self._string_match(answer, prediction, choices) 69 | 70 | def _string_match(self, answer: str, prediction: str, choices: List[str]) -> bool: 71 | # Adapted from: MMAR 72 | # Source: https://github.com/ddlBoJack/MMAR/blob/main/code/evaluation.py#L8 73 | 74 | def tokenize(text): 75 | return set(re.findall(r"\b\w+\b", text.lower())) 76 | 77 | prediction_tokens = tokenize(prediction) 78 | answer_tokens = tokenize(answer) 79 | 80 | if not prediction_tokens: 81 | return False 82 | 83 | # Get tokens from incorrect choices 84 | incorrect_tokens = set() 85 | for choice in choices: 86 | choice_tokens = tokenize(choice) 87 | if choice_tokens != answer_tokens: 88 | incorrect_tokens.update(choice_tokens - answer_tokens) 89 | 90 | # Two conditions for correct match 91 | cond1 = answer_tokens.issubset( 92 | prediction_tokens 93 | ) # All answer tokens in prediction 94 | cond2 = prediction_tokens.isdisjoint( 95 | incorrect_tokens 96 | ) # No incorrect choice tokens 97 | 98 | return cond1 and cond2 99 | -------------------------------------------------------------------------------- /spectrumlab/config/base_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dataclasses import dataclass 3 | from dotenv import load_dotenv 4 | from pathlib import Path 5 | 6 | # Load .env from project root directory 7 | project_root = Path(__file__).parent.parent.parent 8 | env_path = project_root / ".env" 9 | load_dotenv(env_path) 10 | 11 | 12 | @dataclass 13 | class Config: 14 | # DeepSeek API Configuration 15 | deepseek_api_key: str = os.getenv("DEEPSEEK_API_KEY") 16 | deepseek_base_url: str = os.getenv("DEEPSEEK_BASE_URL") 17 | deepseek_model_name: str = os.getenv("DEEPSEEK_MODEL_NAME") 18 | 19 | # GPT-4o API Configuration 20 | gpt4o_api_key: str = os.getenv("GPT4O_API_KEY") 21 | gpt4o_base_url: str = os.getenv("GPT4O_BASE_URL") 22 | gpt4o_model_name: str = os.getenv("GPT4O_MODEL_NAME") 23 | 24 | # InternVL API Configuration 25 | internvl_api_key: str = os.getenv("INTERNVL_API_KEY") 26 | internvl_base_url: str = os.getenv("INTERNVL_BASE_URL") 27 | internvl_model_name: str = os.getenv("INTERNVL_MODEL_NAME") 28 | 29 | # Claude API Configuration 30 | claude_api_key: str = os.getenv("CLAUDE_API_KEY") 31 | claude_base_url: str = os.getenv("CLAUDE_BASE_URL") 32 | claude_sonnet_3_5_model_name: str = os.getenv("CLAUDE_SONNET_3_5") 33 | claude_opus_4_model_name: str = os.getenv("CLAUDE_OPUS_4") 34 | claude_haiku_3_5_model_name: str = os.getenv("CLAUDE_HAIKU_3_5") 35 | claude_sonnet_4_model_name: str = os.getenv("CLAUDE_SONNET_4") 36 | 37 | # GPT-4.1, GPT-4-Vision 38 | gpt4_1_api_key: str = os.getenv("GPT4_1_API_KEY") 39 | gpt4_1_base_url: str = os.getenv("GPT4_1_BASE_URL") 40 | gpt4_1_model_name: str = os.getenv("GPT4_1") 41 | gpt4_vision_api_key: str = os.getenv("GPT4_VISION_API_KEY") 42 | gpt4_vision_base_url: str = os.getenv("GPT4_VISION_BASE_URL") 43 | gpt4_vision_model_name: str = os.getenv("GPT4_VISION") 44 | 45 | # Grok-2-Vision 46 | grok_2_vision_api_key: str = os.getenv("GROK_2_VISION_API_KEY") 47 | grok_2_vision_base_url: str = os.getenv("GROK_2_VISION_BASE_URL") 48 | grok_2_vision_model_name: str = os.getenv("GROK_2_VISION") 49 | 50 | # Qwen-VL-Max 51 | qwen_vl_api_key: str = os.getenv("QWEN_VL_API_KEY") 52 | qwen_vl_base_url: str = os.getenv("QWEN_VL_BASE_URL") 53 | qwen_vl_model_name: str = os.getenv("QWEN_VL_MAX") 54 | 55 | # DeepSeek-VL-2 56 | deepseek_vl_2_api_key: str = os.getenv("DEEPSEEK_VL_2_API_KEY") 57 | deepseek_vl_2_base_url: str = os.getenv("DEEPSEEK_VL_2_BASE_URL") 58 | deepseek_vl_2_model_name: str = os.getenv("DEEPSEEK_VL_2") 59 | 60 | # Qwen-2.5-VL-32B 61 | qwen_2_5_vl_32b_api_key: str = os.getenv("QWEN_VL_API_KEY") 62 | qwen_2_5_vl_32b_base_url: str = os.getenv("QWEN_VL_BASE_URL") 63 | qwen_2_5_vl_32b_model_name: str = os.getenv("QWEN_2_5_VL_32B") 64 | 65 | # Qwen-2.5-VL-72B 66 | qwen_2_5_vl_72b_api_key: str = os.getenv("QWEN_VL_API_KEY") 67 | qwen_2_5_vl_72b_base_url: str = os.getenv("QWEN_VL_BASE_URL") 68 | qwen_2_5_vl_72b_model_name: str = os.getenv("QWEN_2_5_VL_72B") 69 | 70 | # Llama-Vision-11B 71 | llama_vision_11b_api_key: str = os.getenv("LLAMA_VISION_API_KEY") 72 | llama_vision_11b_base_url: str = os.getenv("LLAMA_VISION_BASE_URL") 73 | llama_vision_11b_model_name: str = os.getenv("LLAMA_VISION_11B") 74 | 75 | # Llama-Vision-90B 76 | llama_vision_90b_api_key: str = os.getenv("LLAMA_VISION_API_KEY") 77 | llama_vision_90b_base_url: str = os.getenv("LLAMA_VISION_BASE_URL") 78 | llama_vision_90b_model_name: str = os.getenv("LLAMA_VISION_90B") 79 | 80 | # Doubao-1.5-Vision-Pro 81 | doubao_1_5_vision_pro_api_key: str = os.getenv("DOUBAO_1_5_VISION_PRO_API_KEY") 82 | doubao_1_5_vision_pro_base_url: str = os.getenv("DOUBAO_1_5_VISION_PRO_BASE_URL") 83 | doubao_1_5_vision_pro_model_name: str = os.getenv("DOUBAO_1_5_VISION_PRO") 84 | 85 | # Doubao-1.5-Vision-Pro-Thinking 86 | doubao_1_5_vision_pro_thinking_api_key: str = os.getenv( 87 | "DOUBAO_1_5_VISION_PRO_THINKING_API_KEY" 88 | ) 89 | doubao_1_5_vision_pro_thinking_base_url: str = os.getenv( 90 | "DOUBAO_1_5_VISION_PRO_THINKING_BASE_URL" 91 | ) 92 | doubao_1_5_vision_pro_thinking_model_name: str = os.getenv( 93 | "DOUBAO_1_5_VISION_PRO_THINKING" 94 | ) 95 | -------------------------------------------------------------------------------- /run_ablation_experiments.py: -------------------------------------------------------------------------------- 1 | import swanlab 2 | from spectrumlab.models import Qwen_2_5_VL_32B 3 | from spectrumlab.benchmark.signal_group import SignalGroup 4 | from spectrumlab.benchmark.generation_group import GenerationGroup 5 | from spectrumlab.benchmark.perception_group import PerceptionGroup 6 | from spectrumlab.benchmark.semantic_group import SemanticGroup 7 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 8 | from spectrumlab.evaluator.open_evaluator import OpenEvaluator 9 | 10 | # ABLATION_CONFIGS = [ 11 | # { 12 | # "model_class": Qwen_2_5_VL_72B, 13 | # "model_name": "Qwen-2.5-VL-72B", 14 | # "temperature": 0, 15 | # "top_p": 1.0, 16 | # "save_dir": "./ablation_qwen_2_5_vl_72b_temp_0_evaluation_results", 17 | # }, 18 | # ] 19 | 20 | ABLATION_CONFIGS = [ 21 | { 22 | "model_class": Qwen_2_5_VL_32B, 23 | "model_name": "Qwen-2.5-VL-32B", 24 | "temperature": 1, 25 | "top_p": 1, 26 | "save_dir": "./ablation_qwen_2_5_vl_32b_baselines_evaluation_results", 27 | }, 28 | ] 29 | 30 | # 定义每个 Group 及其子任务和评测器 - 先测试Signal组 31 | GROUPS = [ 32 | { 33 | "name": "Signal", 34 | "group": SignalGroup("data"), 35 | "evaluator": ChoiceEvaluator(), 36 | "subcategories": None, # None 表示全部 37 | }, 38 | { 39 | "name": "Perception", 40 | "group": PerceptionGroup("data"), 41 | "evaluator": ChoiceEvaluator(), 42 | "subcategories": None, 43 | }, 44 | { 45 | "name": "Semantic", 46 | "group": SemanticGroup("data"), 47 | "evaluator": ChoiceEvaluator(), 48 | "subcategories": None, 49 | }, 50 | { 51 | "name": "Generation", 52 | "group": GenerationGroup("data"), 53 | "evaluator": OpenEvaluator(), 54 | "subcategories": None, 55 | }, 56 | ] 57 | 58 | for config in ABLATION_CONFIGS: 59 | print(f"\n{'='*60}") 60 | print( 61 | f"开始消融实验: {config['model_name']} (temperature={config['temperature']}, top_p={config['top_p']})" 62 | ) 63 | print(f"{'='*60}") 64 | 65 | model = config["model_class"]() 66 | 67 | # 初始化 SwanLab 68 | swanlab.init( 69 | workspace="SpectrumLab", 70 | project="spectrumlab-ablation", 71 | experiment_name=f"{config['model_name']}_temp_{config['temperature']}_top_p_{config['top_p']}", 72 | config=config, 73 | ) 74 | 75 | # 遍历每个评测组 76 | for group_info in GROUPS: 77 | name = group_info["name"] 78 | group = group_info["group"] 79 | evaluator = group_info["evaluator"] 80 | subcategories = group_info["subcategories"] 81 | print(f"\n===== Evaluating {name} Group =====") 82 | data = group.get_data_by_subcategories(subcategories or "all") 83 | 84 | class ModelWithSamplingParams: 85 | def __init__(self, base_model, temperature, top_p): 86 | self.base_model = base_model 87 | self.temperature = temperature 88 | self.top_p = top_p 89 | self.model_name = base_model.model_name 90 | 91 | def generate(self, prompt, max_tokens=512): 92 | return self.base_model.generate( 93 | prompt, 94 | max_tokens=max_tokens, 95 | temperature=self.temperature, 96 | top_p=self.top_p, 97 | ) 98 | 99 | wrapped_model = ModelWithSamplingParams( 100 | model, config["temperature"], config["top_p"] 101 | ) 102 | 103 | results = evaluator.evaluate( 104 | data_items=data, model=wrapped_model, save_path=config["save_dir"] 105 | ) 106 | accuracy = results["metrics"]["overall"]["accuracy"] 107 | print(f"{name} Group evaluation completed! Overall accuracy: {accuracy:.2f}%\n") 108 | swanlab.log({f"{name}_accuracy": accuracy}) 109 | 110 | swanlab.finish() 111 | print(f"\n消融实验 {config['model_name']} 完成!") 112 | print(f"结果保存在: {config['save_dir']}") 113 | 114 | # use nohup in the terminal to start the evaluation 115 | # nohup python run_ablation_experiments.py > run_ablation.log 2>&1 & 116 | -------------------------------------------------------------------------------- /spectrumlab/cli/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | from typing import Optional, List 4 | 5 | from .api import run_evaluation 6 | from spectrumlab.models import ( 7 | GPT4o, 8 | Claude_Sonnet_3_5, 9 | DeepSeek_VL2, 10 | InternVL, 11 | Qwen_2_5_VL_32B, 12 | ) 13 | 14 | AVAILABLE_MODELS = { 15 | "gpt4o": GPT4o, 16 | "claude": Claude_Sonnet_3_5, 17 | "deepseek": DeepSeek_VL2, 18 | "internvl": InternVL, 19 | "qwen-vl": Qwen_2_5_VL_32B, 20 | } 21 | 22 | 23 | def main(argv: Optional[List[str]] = None) -> int: 24 | parser = argparse.ArgumentParser( 25 | prog="spectrumlab", 26 | description="A pioneering unified platform designed to systematize and accelerate deep learning research in spectroscopy", 27 | ) 28 | 29 | parser.add_argument("--version", action="version", version="%(prog)s 0.1.0") 30 | 31 | subparsers = parser.add_subparsers(dest="command", help="Available commands") 32 | 33 | eval_parser = subparsers.add_parser("eval", help="Run model evaluation") 34 | 35 | eval_parser.add_argument( 36 | "--model", 37 | "-m", 38 | required=True, 39 | choices=list(AVAILABLE_MODELS.keys()), 40 | help=f"Model name, options: {', '.join(AVAILABLE_MODELS.keys())}", 41 | ) 42 | 43 | eval_parser.add_argument( 44 | "--level", 45 | "-l", 46 | required=True, 47 | choices=["signal", "perception", "semantic", "generation"], 48 | help="Evaluation level", 49 | ) 50 | 51 | eval_parser.add_argument( 52 | "--subcategories", 53 | "-s", 54 | nargs="*", 55 | help="Specify subcategories (optional, default: all)", 56 | ) 57 | 58 | eval_parser.add_argument( 59 | "--data-path", "-d", default="./data", help="Data path (default: ./data)" 60 | ) 61 | 62 | eval_parser.add_argument( 63 | "--output", "-o", default="./results", help="Output path (default: ./results)" 64 | ) 65 | 66 | eval_parser.add_argument( 67 | "--max-length", type=int, default=512, help="Max output length (default: 512)" 68 | ) 69 | 70 | args = parser.parse_args(argv) 71 | 72 | if args.command == "eval": 73 | try: 74 | # Initialize the model 75 | if args.model not in AVAILABLE_MODELS: 76 | available = ", ".join(AVAILABLE_MODELS.keys()) 77 | raise ValueError( 78 | f"Unsupported model: {args.model}. Available: {available}" 79 | ) 80 | 81 | model_class = AVAILABLE_MODELS[args.model] 82 | model_instance = model_class() 83 | 84 | results = run_evaluation( 85 | model=model_instance, 86 | level=args.level, 87 | subcategories=args.subcategories, 88 | data_path=args.data_path, 89 | save_path=args.output, 90 | max_out_len=args.max_length, 91 | ) 92 | 93 | print("\n" + "=" * 50) 94 | print("📊 Evaluation Results") 95 | print("=" * 50) 96 | 97 | if "error" in results: 98 | print(f"❌ Evaluation failed: {results['error']}") 99 | return 1 100 | 101 | metrics = results.get("metrics", {}) 102 | overall = metrics.get("overall", {}) 103 | 104 | print("✅ Evaluation completed!") 105 | print(f"📈 Overall accuracy: {overall.get('accuracy', 0):.2f}%") 106 | print(f"✅ Correct answers: {overall.get('correct', 0)}") 107 | print(f"📝 Total questions: {overall.get('total', 0)}") 108 | 109 | subcategory_metrics = metrics.get("subcategory_metrics", {}) 110 | if subcategory_metrics: 111 | print("\n📋 Subcategory details:") 112 | for subcategory, sub_metrics in subcategory_metrics.items(): 113 | acc = sub_metrics.get("accuracy", 0) 114 | correct = sub_metrics.get("correct", 0) 115 | total = sub_metrics.get("total", 0) 116 | print(f" {subcategory}: {acc:.2f}% ({correct}/{total})") 117 | 118 | print(f"\n💾 Results saved to: {args.output}") 119 | return 0 120 | 121 | except Exception as e: 122 | print(f"❌ Evaluation failed: {e}") 123 | return 1 124 | 125 | elif args.command is None: 126 | parser.print_help() 127 | return 0 128 | else: 129 | print(f"❌ Unknown command: {args.command}") 130 | parser.print_help() 131 | return 1 132 | 133 | 134 | if __name__ == "__main__": 135 | sys.exit(main()) 136 | -------------------------------------------------------------------------------- /spectrumlab/models/llama_api.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Union, Dict, Any 2 | from .base_api import BaseAPIModel 3 | from spectrumlab.config import Config 4 | from openai import OpenAI 5 | 6 | 7 | class Llama_Vision_11B(BaseAPIModel): 8 | def __init__( 9 | self, 10 | api_key: Optional[str] = None, 11 | base_url: Optional[str] = None, 12 | model_name: Optional[str] = None, 13 | **kwargs, 14 | ): 15 | config = Config() 16 | 17 | # Use provided parameters or fall back to config 18 | self.api_key = api_key or config.llama_vision_11b_api_key 19 | self.base_url = base_url or config.llama_vision_11b_base_url 20 | self.model_name = model_name or config.llama_vision_11b_model_name 21 | 22 | # Validate that we have required configuration 23 | if not self.api_key: 24 | raise ValueError( 25 | "Llama-Vision-11B API key not found. Please set LLAMA_VISION_11B_API_KEY in your .env file " 26 | "or provide api_key parameter." 27 | ) 28 | 29 | self.client = OpenAI( 30 | api_key=self.api_key, 31 | base_url=self.base_url, 32 | ) 33 | 34 | # Initialize parent class 35 | super().__init__(model_name=self.model_name, **kwargs) 36 | 37 | def generate( 38 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512 39 | ) -> str: 40 | """ 41 | Generate response supporting both text and multimodal input. 42 | 43 | Args: 44 | prompt: Either text string or multimodal dict 45 | max_tokens: Maximum tokens to generate 46 | 47 | Returns: 48 | Generated response string 49 | """ 50 | 51 | # Link: https://internlm.intern-ai.org.cn/api/document 52 | messages = [] 53 | 54 | if isinstance(prompt, dict) and "images" in prompt: 55 | content = [] 56 | 57 | content.append({"type": "text", "text": prompt["text"]}) 58 | 59 | for image_data in prompt["images"]: 60 | content.append(image_data) 61 | 62 | messages.append({"role": "user", "content": content}) 63 | else: 64 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "") 65 | messages.append({"role": "user", "content": text_content}) 66 | 67 | try: 68 | response = self.client.chat.completions.create( 69 | model=self.model_name, 70 | messages=messages, 71 | max_tokens=max_tokens, 72 | ) 73 | return response.choices[0].message.content 74 | except Exception as e: 75 | raise RuntimeError(f"Llama-Vision-11B API call failed: {e}") 76 | 77 | 78 | class Llama_Vision_90B(BaseAPIModel): 79 | def __init__( 80 | self, 81 | api_key: Optional[str] = None, 82 | base_url: Optional[str] = None, 83 | model_name: Optional[str] = None, 84 | **kwargs, 85 | ): 86 | config = Config() 87 | 88 | # Use provided parameters or fall back to config 89 | self.api_key = api_key or config.llama_vision_90b_api_key 90 | self.base_url = base_url or config.llama_vision_90b_base_url 91 | self.model_name = model_name or config.llama_vision_90b_model_name 92 | 93 | # Validate that we have required configuration 94 | if not self.api_key: 95 | raise ValueError( 96 | "Llama-Vision-90B API key not found. Please set LLAMA_VISION_90B_API_KEY in your .env file " 97 | "or provide api_key parameter." 98 | ) 99 | 100 | self.client = OpenAI( 101 | api_key=self.api_key, 102 | base_url=self.base_url, 103 | ) 104 | 105 | # Initialize parent class 106 | super().__init__(model_name=self.model_name, **kwargs) 107 | 108 | def generate( 109 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512 110 | ) -> str: 111 | """ 112 | Generate response supporting both text and multimodal input. 113 | 114 | Args: 115 | prompt: Either text string or multimodal dict 116 | max_tokens: Maximum tokens to generate 117 | 118 | Returns: 119 | Generated response string 120 | """ 121 | 122 | # Link: https://internlm.intern-ai.org.cn/api/document 123 | messages = [] 124 | 125 | if isinstance(prompt, dict) and "images" in prompt: 126 | content = [] 127 | 128 | content.append({"type": "text", "text": prompt["text"]}) 129 | 130 | for image_data in prompt["images"]: 131 | content.append(image_data) 132 | 133 | messages.append({"role": "user", "content": content}) 134 | else: 135 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "") 136 | messages.append({"role": "user", "content": text_content}) 137 | 138 | try: 139 | response = self.client.chat.completions.create( 140 | model=self.model_name, 141 | messages=messages, 142 | max_tokens=max_tokens, 143 | ) 144 | return response.choices[0].message.content 145 | except Exception as e: 146 | raise RuntimeError(f"Llama-Vision-90B API call failed: {e}") 147 | -------------------------------------------------------------------------------- /spectrumlab/models/gpt4_v_api.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, Optional, Union 2 | from .base_api import BaseAPIModel 3 | from spectrumlab.config import Config 4 | from openai import OpenAI 5 | 6 | 7 | class GPT4_1(BaseAPIModel): 8 | def __init__( 9 | self, 10 | api_key: Optional[str] = None, 11 | base_url: Optional[str] = None, 12 | model_name: Optional[str] = None, 13 | **kwargs, 14 | ): 15 | config = Config() 16 | 17 | # Use provided parameters or fall back to config 18 | self.api_key = api_key or config.gpt4_1_api_key 19 | self.base_url = base_url or config.gpt4_1_base_url 20 | self.model_name = model_name or config.gpt4_1_model_name 21 | 22 | # Validate that we have required configuration 23 | if not self.api_key: 24 | raise ValueError( 25 | "GPT-4.1 API key not found. Please set GPT4_1_API_KEY in your .env file " 26 | "or provide api_key parameter." 27 | ) 28 | 29 | self.client = OpenAI( 30 | api_key=self.api_key, 31 | base_url=self.base_url, 32 | ) 33 | 34 | # Initialize parent class 35 | super().__init__(model_name=self.model_name, **kwargs) 36 | 37 | def generate( 38 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512 39 | ) -> str: 40 | """ 41 | Generate response supporting both text and multimodal input. 42 | 43 | Args: 44 | prompt: Either text string or multimodal dict 45 | max_tokens: Maximum tokens to generate 46 | 47 | Returns: 48 | Generated response string 49 | """ 50 | messages = [] 51 | 52 | # Handle multimodal vs text-only prompts 53 | if isinstance(prompt, dict) and "images" in prompt: 54 | # Multimodal prompt 55 | content = [] 56 | 57 | content.append({"type": "text", "text": prompt["text"]}) 58 | 59 | for image_data in prompt["images"]: 60 | content.append(image_data) 61 | 62 | messages.append({"role": "user", "content": content}) 63 | else: 64 | # Text-only prompt 65 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "") 66 | messages.append({"role": "user", "content": text_content}) 67 | 68 | try: 69 | response = self.client.chat.completions.create( 70 | model=self.model_name, 71 | messages=messages, 72 | max_tokens=max_tokens, 73 | ) 74 | return response.choices[0].message.content 75 | except Exception as e: 76 | raise RuntimeError(f"GPT-4.1 API call failed: {e}") 77 | 78 | 79 | class GPT4_Vision(BaseAPIModel): 80 | def __init__( 81 | self, 82 | api_key: Optional[str] = None, 83 | base_url: Optional[str] = None, 84 | model_name: Optional[str] = None, 85 | **kwargs, 86 | ): 87 | config = Config() 88 | 89 | # Use provided parameters or fall back to config 90 | self.api_key = api_key or config.gpt4_vision_api_key 91 | self.base_url = base_url or config.gpt4_vision_base_url 92 | self.model_name = model_name or config.gpt4_vision_model_name 93 | 94 | # Validate that we have required configuration 95 | if not self.api_key: 96 | raise ValueError( 97 | "GPT-4 Vision API key not found. Please set GPT4_VISION_API_KEY in your .env file " 98 | "or provide api_key parameter." 99 | ) 100 | 101 | self.client = OpenAI( 102 | api_key=self.api_key, 103 | base_url=self.base_url, 104 | ) 105 | 106 | # Initialize parent class 107 | super().__init__(model_name=self.model_name, **kwargs) 108 | 109 | def generate( 110 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512 111 | ) -> str: 112 | """ 113 | Generate response supporting both text and multimodal input. 114 | 115 | Args: 116 | prompt: Either text string or multimodal dict 117 | max_tokens: Maximum tokens to generate 118 | 119 | Returns: 120 | Generated response string 121 | """ 122 | messages = [] 123 | 124 | # Handle multimodal vs text-only prompts 125 | if isinstance(prompt, dict) and "images" in prompt: 126 | # Multimodal prompt 127 | content = [] 128 | 129 | content.append({"type": "text", "text": prompt["text"]}) 130 | 131 | for image_data in prompt["images"]: 132 | content.append(image_data) 133 | 134 | messages.append({"role": "user", "content": content}) 135 | else: 136 | # Text-only prompt 137 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "") 138 | messages.append({"role": "user", "content": text_content}) 139 | 140 | try: 141 | response = self.client.chat.completions.create( 142 | model=self.model_name, 143 | messages=messages, 144 | max_tokens=max_tokens, 145 | ) 146 | return response.choices[0].message.content 147 | except Exception as e: 148 | raise RuntimeError(f"GPT-4 Vision API call failed: {e}") 149 | -------------------------------------------------------------------------------- /spectrumlab/models/doubao_api.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Union, Dict, Any 2 | from .base_api import BaseAPIModel 3 | from spectrumlab.config import Config 4 | from openai import OpenAI 5 | 6 | 7 | class Doubao_1_5_Vision_Pro(BaseAPIModel): 8 | def __init__( 9 | self, 10 | api_key: Optional[str] = None, 11 | base_url: Optional[str] = None, 12 | model_name: Optional[str] = None, 13 | **kwargs, 14 | ): 15 | config = Config() 16 | 17 | # Use provided parameters or fall back to config 18 | self.api_key = api_key or config.doubao_1_5_vision_pro_api_key 19 | self.base_url = base_url or config.doubao_1_5_vision_pro_base_url 20 | self.model_name = model_name or config.doubao_1_5_vision_pro_model_name 21 | 22 | # Validate that we have required configuration 23 | if not self.api_key: 24 | raise ValueError( 25 | "Doubao-1.5-Vision-Pro API key not found. Please set DOUBAO_1_5_VISION_PRO_API_KEY in your .env file " 26 | "or provide api_key parameter." 27 | ) 28 | 29 | self.client = OpenAI( 30 | api_key=self.api_key, 31 | base_url=self.base_url, 32 | ) 33 | 34 | # Initialize parent class 35 | super().__init__(model_name=self.model_name, **kwargs) 36 | 37 | def generate( 38 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512 39 | ) -> str: 40 | """ 41 | Generate response supporting both text and multimodal input. 42 | 43 | Args: 44 | prompt: Either text string or multimodal dict 45 | max_tokens: Maximum tokens to generate 46 | 47 | Returns: 48 | Generated response string 49 | """ 50 | 51 | # Link: https://internlm.intern-ai.org.cn/api/document 52 | messages = [] 53 | 54 | if isinstance(prompt, dict) and "images" in prompt: 55 | content = [] 56 | 57 | content.append({"type": "text", "text": prompt["text"]}) 58 | 59 | for image_data in prompt["images"]: 60 | content.append(image_data) 61 | 62 | messages.append({"role": "user", "content": content}) 63 | else: 64 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "") 65 | messages.append({"role": "user", "content": text_content}) 66 | 67 | try: 68 | response = self.client.chat.completions.create( 69 | model=self.model_name, 70 | messages=messages, 71 | max_tokens=max_tokens, 72 | ) 73 | return response.choices[0].message.content 74 | except Exception as e: 75 | raise RuntimeError(f"Doubao-1.5-Vision-Pro API call failed: {e}") 76 | 77 | 78 | class Doubao_1_5_Vision_Pro_Thinking(BaseAPIModel): 79 | def __init__( 80 | self, 81 | api_key: Optional[str] = None, 82 | base_url: Optional[str] = None, 83 | model_name: Optional[str] = None, 84 | **kwargs, 85 | ): 86 | config = Config() 87 | 88 | # Use provided parameters or fall back to config 89 | self.api_key = api_key or config.doubao_1_5_vision_pro_thinking_api_key 90 | self.base_url = base_url or config.doubao_1_5_vision_pro_thinking_base_url 91 | self.model_name = model_name or config.doubao_1_5_vision_pro_thinking_model_name 92 | 93 | # Validate that we have required configuration 94 | if not self.api_key: 95 | raise ValueError( 96 | "Doubao-1.5-Vision-Pro-Thinking API key not found. Please set DOUBAO_1_5_VISION_PRO_THINKING_API_KEY in your .env file " 97 | "or provide api_key parameter." 98 | ) 99 | 100 | self.client = OpenAI( 101 | api_key=self.api_key, 102 | base_url=self.base_url, 103 | ) 104 | 105 | # Initialize parent class 106 | super().__init__(model_name=self.model_name, **kwargs) 107 | 108 | def generate( 109 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512 110 | ) -> str: 111 | """ 112 | Generate response supporting both text and multimodal input. 113 | 114 | Args: 115 | prompt: Either text string or multimodal dict 116 | max_tokens: Maximum tokens to generate 117 | 118 | Returns: 119 | Generated response string 120 | """ 121 | 122 | # Link: https://internlm.intern-ai.org.cn/api/document 123 | messages = [] 124 | 125 | if isinstance(prompt, dict) and "images" in prompt: 126 | content = [] 127 | 128 | content.append({"type": "text", "text": prompt["text"]}) 129 | 130 | for image_data in prompt["images"]: 131 | content.append(image_data) 132 | 133 | messages.append({"role": "user", "content": content}) 134 | else: 135 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "") 136 | messages.append({"role": "user", "content": text_content}) 137 | 138 | try: 139 | response = self.client.chat.completions.create( 140 | model=self.model_name, 141 | messages=messages, 142 | max_tokens=max_tokens, 143 | ) 144 | return response.choices[0].message.content 145 | except Exception as e: 146 | raise RuntimeError(f"Doubao-1.5-Vision-Pro-Thinking API call failed: {e}") 147 | -------------------------------------------------------------------------------- /docs/en/tutorial.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2 | 3 | Welcome to use SpectrumLab! This tutorial will help you quickly understand spectroscopy analysis, the SpectrumLab platform, and how to use it to evaluate the performance of large language models on spectroscopy tasks. 4 | 5 | ## What is Spectroscopy? 6 | 7 | Spectroscopy is a branch of science that studies the interaction between matter and electromagnetic radiation. By analyzing the spectra of absorption, emission, or scattering of matter, we can obtain detailed information about the structure, composition, and properties of matter. 8 | 9 | ## The Importance of Spectroscopy 10 | 11 | Spectroscopy plays an important role in modern science. By analyzing the interaction between matter and electromagnetic radiation, it provides a key means to understand the composition, structure, and properties of matter. In chemistry, spectroscopy is used for molecular structure analysis and reaction mechanism research. In materials science, it can characterize nanomaterials and conduct surface analysis. In biology, it is used to study protein folding and metabolite detection. At the same time, spectroscopy is also widely used in clinical medicine. For example, non-invasive diagnosis and early disease detection can be achieved through spectroscopic techniques, making it an indispensable tool in modern scientific research and applications. 12 | 13 | ## Common Spectroscopic Techniques 14 | 15 | - **Infrared spectrum(IR)**:Analyze molecular vibrations and identify functional groups. The characteristic absorption peaks in the IR spectrum (such as C=O, O–H, C–H, etc.) are iconic within the characteristic frequency range and are the core tools for determining functional groups. 16 | - **Nuclear Magnetic Resonance(NMR)**:Provide information about the atomic environment and structural connectivity in a molecule through chemical shift, signal intensity, and coupling constants, which is often used to determine the molecular structure (especially for organic compounds). 17 | - **Ultraviolet - Visible Spectroscopy(UV-Vis)**:Study the electronic transitions and conjugated systems of molecules, especially for determining the electronic structure, conjugation length, and optical properties, without directly providing structural connectivity information. 18 | - **Mass Spectrometry (MS)**:Determining the molecular weight and inferring the molecular structure through fragment combination are important tools for determining the molecular composition and secondary structure. 19 | - **Raman spectroscopy (Raman)**:Provides molecular vibration information, can identify chemical bond vibrations similar to IR, is particularly sensitive to symmetric molecules and non - polar bonds, and is often used as a complementary method to IR. 20 | - **HSQC spectrum**:A two-dimensional NMR (^1H–^13C or ^1H–^15N) experiment where each cross peak represents a directly bonded proton-heteroatom pair. It can be used to unambiguously assign ^1H–^13C (or ^15N) one-bond correlations, assist in peak assignment, and structure elucidation. 21 | 22 | ## What is SpectrumLab? 23 | 24 | ### Overview 25 | 26 | SpectrumLab is a groundbreaking unified platform and comprehensive toolkit designed to accelerate and systematize deep learning research in the field of chemical spectroscopy. It aims to streamline the entire AI - driven spectroscopy research lifecycle, from data pre - processing to model evaluation. It provides researchers and developers with a modular, scalable, and easy - to - use ecosystem of Python libraries and tools to drive artificial intelligence research and applications in the field of spectroscopy. 27 | 28 | ### Core Functions 29 | 30 | #### Modular and Extensible Architecture 31 | 32 | SpectrumLab Adopt a flexible modular design, and its core components include: 33 | 34 | - **Benchmark Group**:Hierarchically organize the SpectrumBench dataset to support multiple spectral modalities and task types, and allow users to flexibly combine according to their needs to create customized evaluation tasks. 35 | - **Model Integration**:Provide a unified framework and standardized API that can seamlessly integrate and evaluate various external models, whether they are commercial closed - source models (such as GPT - 4o) or open - source models deployed locally. 36 | - **Evaluator**:As the core of the evaluation engine, it supports the customization of evaluation indicators and protocols according to different tasks (such as multiple-choice questions, generation questions), ensuring the rigor of evaluation and task adaptability. 37 | 38 | #### A Comprehensive Toolchain Ecosystem 39 | 40 | Provide a Python library distributed through PyPI that integrates core modules such as data processing, model development, automatic evaluation, and visualization, greatly simplifying the entire research workflow. 41 | 42 | #### SpectrumAnnotator 43 | 44 | It is closely integrated with the innovative SpectrumAnnotator component, which can utilize the reasoning capabilities of advanced multimodal large models to automatically generate high-quality and diverse benchmark test data from the seed dataset, and efficiently build evaluation tasks. 45 | 46 | #### Leaderboards 47 | 48 | To ensure transparency and reproducibility, SpectrumLab has established a public leaderboard system. This system systematically tracks and compares the performance of various models on all 14 tasks, promoting fair competition and the common progress of the field. 49 | 50 | ## Related links 51 | 52 | - [API Reference](/zh/api) - Understand the detailed interface description and code examples 53 | - [Benchmark](/zh/benchmark) - View the details of evaluation metrics and the dataset 54 | - [Leaderboard](https://huggingface.co/spaces/SpectrumWorld/SpectrumLeaderboard) - View the comparison of model performance 55 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # SpectrumLab Documentation 2 | 3 | Welcome to the SpectrumLab documentation! This guide will help you contribute to our documentation system. 4 | 5 | ## About This Documentation 6 | 7 | This documentation is built with [VitePress](https://vitepress.dev/), a static site generator designed for creating fast, beautiful documentation websites. Our documentation supports both English and Chinese languages to serve our global community. 8 | 9 | ## Contributing to the Documentation 10 | 11 | Contributing to our documentation is straightforward! Simply clone the project, add or modify Markdown files, commit your changes, and create a Pull Request. 12 | 13 | ### Prerequisites 14 | 15 | Before you begin, ensure you have the following installed: 16 | 17 | - [Node.js](https://nodejs.org/) (version 18 or higher) 18 | - [npm](https://www.npmjs.com/) (comes with Node.js) 19 | 20 | ### Step 1: Clone the Repository 21 | 22 | ```bash 23 | git clone https://github.com/little1d/SpectrumLab.git 24 | cd SpectrumLab 25 | ``` 26 | 27 | ### Step 2: Install Dependencies 28 | 29 | Navigate to the docs directory and install the required dependencies: 30 | 31 | ```bash 32 | cd docs 33 | npm install 34 | ``` 35 | 36 | Alternatively, if you prefer using the development dependencies globally: 37 | 38 | ```bash 39 | npm add -D vitepress 40 | npm install 41 | ``` 42 | 43 | ### Step 3: Create a New Branch 44 | 45 | Create a new branch for your documentation changes. We recommend using the naming convention `docs/` (e.g., `docs/api`, `docs/examples`, `docs/benchmarks`). 46 | 47 | ```bash 48 | git checkout -b docs/ 49 | ``` 50 | 51 | For detailed branching and contribution guidelines, please refer to our [Contributing Guide](https://github.com/little1d/SpectrumLab/blob/main/CONTRIBUTING.md). 52 | 53 | ### Step 4: Preview Your Changes 54 | 55 | #### Local Development 56 | 57 | To start the development server and preview your changes in real-time: 58 | 59 | ```bash 60 | npm run docs:dev 61 | ``` 62 | 63 | This will start a local server (typically at `http://localhost:5173`) where you can preview your documentation changes. 64 | 65 | #### Production Build 66 | 67 | If you need to test the complete compilation and packaging: 68 | 69 | ```bash 70 | # Build the documentation 71 | npm run docs:build 72 | 73 | # Preview the production build 74 | npm run docs:preview 75 | ``` 76 | 77 | > **Note:** These commands are configured in `docs/package.json`. You can modify them if needed. 78 | 79 | ### Step 5: Deployment 80 | 81 | We have automated deployment set up using GitHub Actions. The deployment process is triggered automatically when: 82 | 83 | - Changes are pushed to the `main` branch 84 | - Changes are made to files in the `docs/` directory 85 | - Changes are made to the deployment workflow file 86 | 87 | **All you need to do is create a Pull Request!** Once your PR is merged into the main branch, the documentation will be automatically deployed to GitHub Pages. 88 | 89 | ## Documentation Structure 90 | 91 | Our documentation is organized as follows: 92 | 93 | ``` 94 | docs/ 95 | ├── .vitepress/ # VitePress configuration 96 | ├── public/ # Static assets 97 | ├── assets/ # Documentation assets 98 | ├── en/ # English documentation 99 | │ ├── index.md 100 | │ ├── tutorial.md 101 | │ ├── api.md 102 | │ └── benchmark.md 103 | ├── zh/ # Chinese documentation 104 | │ ├── index.md 105 | │ ├── tutorial.md 106 | │ ├── api.md 107 | │ └── benchmark.md 108 | ├── index.md # Homepage 109 | ├── package.json # Dependencies and scripts 110 | └── README.md # This file 111 | ``` 112 | 113 | ## Writing Guidelines 114 | 115 | ### Language Support 116 | 117 | - **English**: Primary language for the documentation 118 | - **Chinese**: Full translation available for Chinese-speaking users 119 | 120 | ### Content Guidelines 121 | 122 | 1. **Be Clear and Concise**: Write in simple, clear language 123 | 2. **Use Code Examples**: Include practical examples wherever possible 124 | 3. **Maintain Consistency**: Follow the existing style and structure 125 | 4. **Cross-Reference**: Link to related sections when appropriate 126 | 127 | ### Markdown Features 128 | 129 | VitePress supports many Markdown features including: 130 | 131 | - **Code Blocks**: With syntax highlighting 132 | - **Custom Containers**: For tips, warnings, and notes 133 | - **Mathematical Expressions**: Using LaTeX syntax 134 | - **Mermaid Diagrams**: For flowcharts and diagrams 135 | 136 | Example: 137 | 138 | ```markdown 139 | ::: tip 140 | This is a helpful tip! 141 | ::: 142 | 143 | ::: warning 144 | This is a warning message. 145 | ::: 146 | 147 | ::: danger 148 | This is a danger alert. 149 | ::: 150 | ``` 151 | 152 | ## Getting Help 153 | 154 | If you encounter any issues or have questions about contributing to the documentation: 155 | 156 | 1. Check our [existing issues](https://github.com/little1d/SpectrumLab/issues) 157 | 2. Create a new issue with the `documentation` label 158 | 3. Refer to the [VitePress documentation](https://vitepress.dev/) for technical questions 159 | 4. Review our [Contributing Guide](https://github.com/little1d/SpectrumLab/blob/main/CONTRIBUTING.md) for general contribution guidelines 160 | 161 | ## Resources 162 | 163 | - [VitePress Guide](https://vitepress.dev/guide/getting-started) 164 | - [Markdown Guide](https://www.markdownguide.org/) 165 | - [GitHub Docs: About Pull Requests](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests) 166 | 167 | Thank you for contributing to SpectrumLab documentation! 🎉 168 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # SpectrumLab Environment Configuration 2 | # Copy this file to .env and fill in your actual API keys and configurations 3 | 4 | # ============================================================================= 5 | # DeepSeek API Configuration 6 | # ============================================================================= 7 | DEEPSEEK_API_KEY=your_deepseek_api_key_here 8 | DEEPSEEK_BASE_URL=https://api.deepseek.com 9 | DEEPSEEK_MODEL_NAME=deepseek-chat 10 | 11 | # ============================================================================= 12 | # GPT-4o API Configuration 13 | # ============================================================================= 14 | GPT4O_API_KEY=your_openai_api_key_here 15 | GPT4O_BASE_URL=https://api.openai.com/v1 16 | GPT4O_MODEL_NAME=gpt-4o 17 | 18 | # ============================================================================= 19 | # InternVL API Configuration 20 | # ============================================================================= 21 | INTERNVL_API_KEY=your_internvl_api_key_here 22 | INTERNVL_BASE_URL=https://chat.intern-ai.org.cn/api/v1/ 23 | INTERNVL_MODEL_NAME=internvl3-latest 24 | 25 | # ============================================================================= 26 | # Claude API Configuration 27 | # ============================================================================= 28 | CLAUDE_API_KEY=your_anthropic_api_key_here 29 | CLAUDE_BASE_URL=https://api.anthropic.com 30 | CLAUDE_SONNET_3_5=anthropic/claude-3.5-sonnet 31 | CLAUDE_OPUS_4=claude-opus-4-20250514 32 | CLAUDE_HAIKU_3_5=claude-3-5-haiku-20241022 33 | CLAUDE_SONNET_4=anthropic/claude-sonnet-4 34 | 35 | # ============================================================================= 36 | # GPT-4.1 and GPT-4-Vision Configuration 37 | # ============================================================================= 38 | GPT4_1_API_KEY=your_gpt4_1_api_key_here 39 | GPT4_1_BASE_URL=https://api.openai.com 40 | GPT4_1=gpt-4.1-2025-04-14 41 | 42 | GPT4_VISION_API_KEY=your_gpt4_vision_api_key_here 43 | GPT4_VISION_BASE_URL=https://api.openai.com 44 | GPT4_VISION=gpt-4-vision-preview 45 | 46 | # ============================================================================= 47 | # Grok-2-Vision Configuration 48 | # ============================================================================= 49 | GROK_2_VISION_API_KEY=your_grok_api_key_here 50 | GROK_2_VISION_BASE_URL=https://api.x.ai 51 | GROK_2_VISION=x-ai/grok-2-vision-1212 52 | 53 | # ============================================================================= 54 | # Qwen-VL Configuration 55 | # ============================================================================= 56 | QWEN_VL_API_KEY=your_qwen_api_key_here 57 | QWEN_VL_BASE_URL=https://dashscope.aliyuncs.com 58 | QWEN_VL=qwen-vl-max 59 | 60 | # ============================================================================= 61 | # Qwen-2.5-VL-32B Configuration 62 | # ============================================================================= 63 | QWEN_2_5_VL_32B_API_KEY=your_qwen_2_5_vl_32b_api_key_here 64 | QWEN_2_5_VL_32B_BASE_URL=https://dashscope.aliyuncs.com 65 | QWEN_2_5_VL_32B=Qwen/Qwen2.5-VL-32B-Instruct 66 | 67 | # ============================================================================= 68 | # Qwen-2.5-VL-72B Configuration 69 | # ============================================================================= 70 | QWEN_2_5_VL_72B_API_KEY=your_qwen_2_5_vl_72b_api_key_here 71 | QWEN_2_5_VL_72B_BASE_URL=https://dashscope.aliyuncs.com 72 | QWEN_2_5_VL_72B=Qwen/Qwen2.5-VL-72B-Instruct 73 | 74 | # ============================================================================= 75 | # DeepSeek-VL-2 Configuration 76 | # ============================================================================= 77 | DEEPSEEK_VL_2_API_KEY=your_deepseek_vl_2_api_key_here 78 | DEEPSEEK_VL_2_BASE_URL=https://api.deepseek.com 79 | DEEPSEEK_VL_2=deepseek-ai/deepseek-vl2 80 | 81 | # ============================================================================= 82 | # Llama-Vision-11B Configuration 83 | # ============================================================================= 84 | LLAMA_VISION_11B_API_KEY=your_llama_vision_11b_api_key_here 85 | LLAMA_VISION_11B_BASE_URL=https://api.meta.com 86 | LLAMA_VISION_11B=llama-3.2-11b-vision-instruct 87 | 88 | # ============================================================================= 89 | # Llama-Vision-90B Configuration 90 | # ============================================================================= 91 | LLAMA_VISION_90B_API_KEY=your_llama_vision_90b_api_key_here 92 | LLAMA_VISION_90B_BASE_URL=https://api.meta.com 93 | LLAMA_VISION_90B=meta-llama/llama-3.2-90b-vision-instruct 94 | 95 | # ============================================================================= 96 | # Doubao-1.5-Vision-Pro Configuration 97 | # ============================================================================= 98 | DOUBAO_1_5_VISION_PRO_API_KEY=your_doubao_vision_pro_api_key_here 99 | DOUBAO_1_5_VISION_PRO_BASE_URL=https://ark.cn-beijing.volces.com 100 | DOUBAO_1_5_VISION_PRO=doubao-1-5-vision-pro-250328 101 | 102 | # ============================================================================= 103 | # Doubao-1.5-Vision-Pro-Thinking Configuration 104 | # ============================================================================= 105 | DOUBAO_1_5_VISION_PRO_THINKING_API_KEY=your_doubao_vision_pro_thinking_api_key_here 106 | DOUBAO_1_5_VISION_PRO_THINKING_BASE_URL=https://ark.cn-beijing.volces.com 107 | DOUBAO_1_5_VISION_PRO_THINKING=doubao-1.5-thinking-vision-pro-250428 108 | 109 | # ============================================================================= 110 | # Instructions 111 | # ============================================================================= 112 | # 1. Copy this file to .env: cp .env.example .env 113 | # 2. Replace all placeholder values (your_*_here) with your actual API keys 114 | # 3. Configure the base URLs according to your API providers 115 | # 4. Make sure to keep your .env file secure and never commit it to version control 116 | -------------------------------------------------------------------------------- /docs/zh/api.md: -------------------------------------------------------------------------------- 1 | # API Reference 2 | 3 | SpectrumLab 提供了简洁而强大的 API 接口,帮助你快速构建光谱学深度学习应用。本文档涵盖了核心模块的使用方法和自定义扩展指南。 4 | 5 | ## Benchmark 模块 6 | 7 | Benchmark 模块是 SpectrumLab 的数据访问核心,提供了统一的接口来加载和管理不同层级的光谱学基准测试数据。 8 | 9 | ### 获取 Benchmark Group 10 | 11 | 通过 `get_benchmark_group` 函数可以获取四个不同层级的基准测试组: 12 | 13 | ```python 14 | from spectrumlab.benchmark import get_benchmark_group 15 | 16 | signal_group = get_benchmark_group("signal") # 信号层 17 | perception_group = get_benchmark_group("perception") # 感知层 18 | semantic_group = get_benchmark_group("semantic") # 语义层 19 | generation_group = get_benchmark_group("generation") # 生成层 20 | ``` 21 | 22 | ### 数据访问 23 | 24 | 每个 Benchmark Group 提供了灵活的数据访问方法: 25 | 26 | ```python 27 | # 获取所有数据 28 | data = signal_group.get_data_by_subcategories("all") 29 | 30 | # 获取特定子类别数据 31 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 32 | 33 | # 获取 Benchmark Group 可用的所有 sub-categories 34 | subcategories = signal_group.get_available_subcategories() 35 | print(subcategories) 36 | ``` 37 | 38 | **方法说明:** 39 | 40 | - `get_data_by_subcategories("all")`: 返回该层级下所有子类别的数据 41 | - `get_data_by_subcategories([...])`: 返回指定子类别的数据列表 42 | - `get_available_subcategories()`: 查看当前层级包含的所有子类别名称 43 | 44 | ## Model 模块 45 | 46 | Model 模块提供了统一的模型接口,支持多种预训练模型和自定义模型的集成。 47 | 48 | ### 使用现有模型 49 | 50 | SpectrumLab 内置了多种先进的多模态模型接口: 51 | 52 | ```python 53 | from spectrumlab.models import GPT4oAPI 54 | 55 | gpt4o = GPT4oAPI() 56 | 57 | response = gpt4o.generate("Your Prompts") 58 | ``` 59 | 60 | **支持的模型:** 61 | 62 | - `GPT4oAPI`: OpenAI GPT-4o 63 | - `ClaudeAPI`: Anthropic Claude 系列 64 | - `DeepSeekAPI`: DeepSeek-VL 65 | - `QwenVLAPI`: Qwen-VL 系列 66 | - `InternVLAPI`: InternVL 系列 67 | 68 | ### 自定义模型 69 | 70 | 通过继承 `BaseModel` 类,你可以轻松集成自己的模型: 71 | 72 | ```python 73 | from spectrumlab.models.base import BaseModel 74 | 75 | class CustomModel(BaseModel): 76 | def __init__(self): 77 | super().__init__() 78 | self.model_name = "CustomModel" 79 | 80 | def generate(self, prompt, max_out_len=512): 81 | # 实现你的模型调用逻辑 82 | # 这里可以是 API 调用、本地模型推理等 83 | return response 84 | ``` 85 | 86 | **自定义要求:** 87 | 88 | - 必须实现 `generate` 方法 89 | - 支持文本和多模态输入 90 | - 返回字符串格式的响应 91 | 92 | ## Evaluator 模块 93 | 94 | Evaluator 模块负责模型评估的核心逻辑,提供了标准化的评估流程和灵活的自定义选项。 95 | 96 | ### 基础使用 97 | 98 | 对于选择题类型的评估任务,可以直接使用 `ChoiceEvaluator`: 99 | 100 | ```python 101 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 102 | 103 | evaluator = ChoiceEvaluator() 104 | 105 | results = evaluator.evaluate( 106 | data_items=data, 107 | model=model, 108 | max_out_len=512, 109 | save_path="./eval_results" 110 | ) 111 | ``` 112 | 113 | **参数说明:** 114 | 115 | - `data_items`: 评估数据列表 116 | - `model`: 模型实例 117 | - `max_out_len`: 最大输出长度 118 | - `save_path`: 结果保存路径 119 | 120 | ### 自定义 Evaluator 121 | 122 | 通过继承 `BaseEvaluator` 类,你可以定制评估逻辑以适应特定任务需求: 123 | 124 | ```python 125 | from spectrumlab.evaluator.base import BaseEvaluator 126 | 127 | class CustomEvaluator(BaseEvaluator): 128 | def _build_prompt(self, item): 129 | """构建输入提示词""" 130 | question = item["question"] 131 | choices = item["choices"] 132 | return f"问题:{question}\n选项:{choices}\n请选择正确答案:" 133 | 134 | def _extract_prediction(self, response, item): 135 | """从模型响应中提取预测结果""" 136 | import re 137 | match = re.search(r'\box\{([^}]+)\}', response) 138 | return match.group(1) if match else "" 139 | 140 | def _calculate_accuracy(self, answer, prediction, item): 141 | """计算准确率""" 142 | return answer.strip().lower() == prediction.strip().lower() 143 | ``` 144 | 145 | **核心方法:** 146 | 147 | - `_build_prompt`: 根据数据项构建模型输入 148 | - `_extract_prediction`: 从模型输出中提取预测答案 149 | - `_calculate_accuracy`: 判断预测是否正确 150 | 151 | ## 完整评估示例 152 | 153 | 以下是一个完整的评估流程示例,展示了从数据加载到结果分析的全过程: 154 | 155 | ```python 156 | from spectrumlab.benchmark.signal_group import SignalGroup 157 | from spectrumlab.models import GPT4oAPI 158 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 159 | 160 | # 1. 加载数据 161 | signal_group = SignalGroup("data") 162 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 163 | 164 | # 2. 初始化模型和评估器 165 | model = GPT4oAPI() 166 | evaluator = ChoiceEvaluator() 167 | 168 | # 3. 运行评估 169 | results = evaluator.evaluate( 170 | data_items=data, 171 | model=model, 172 | save_path="./evaluation_results" 173 | ) 174 | 175 | # 4. 查看评估结果 176 | print(f"评估完成!整体准确率: {results['metrics']['overall']['accuracy']:.2f}%") 177 | 178 | # 查看详细结果 179 | for subcategory, metrics in results['metrics']['subcategory_metrics'].items(): 180 | print(f"{subcategory}: {metrics['accuracy']:.2f}% ({metrics['correct']}/{metrics['total']})") 181 | ``` 182 | 183 | ## 数据格式 184 | 185 | ### 输入数据格式 186 | 187 | 每个数据项遵循以下格式: 188 | 189 | ```python 190 | { 191 | "question": "基于该红外光谱图,该化合物最可能是?", 192 | "choices": ["苯甲酸", "苯甲醛", "苯甲醇", "苯乙酸"], 193 | "answer": "苯甲酸", 194 | "image_path": "./data/signal/ir_001.png", # 可选 195 | "category": "Chemistry", 196 | "sub_category": "Spectrum Type Classification" 197 | } 198 | ``` 199 | 200 | ### 输出结果格式 201 | 202 | 评估结果包含详细的性能指标: 203 | 204 | ```python 205 | { 206 | "metrics": { 207 | "overall": { 208 | "accuracy": 85.5, 209 | "correct": 171, 210 | "total": 200 211 | }, 212 | "subcategory_metrics": { 213 | "Spectrum Type Classification": { 214 | "accuracy": 90.0, 215 | "correct": 45, 216 | "total": 50 217 | } 218 | } 219 | }, 220 | "saved_files": ["result_001.json"], 221 | "total_items": 200 222 | } 223 | ``` 224 | 225 | ## 环境配置 226 | 227 | 使用 API 模型前需要配置相应的环境变量: 228 | 229 | ```bash 230 | # OpenAI 模型 231 | export OPENAI_API_KEY="your_openai_api_key" 232 | 233 | # Anthropic 模型 234 | export ANTHROPIC_API_KEY="your_anthropic_api_key" 235 | 236 | # DeepSeek 模型 237 | export DEEPSEEK_API_KEY="your_deepseek_api_key" 238 | 239 | # 其他模型... 240 | ``` 241 | 242 | ## 快速开始 243 | 244 | 1. **安装依赖**:`pip install spectrumlab` 245 | 2. **配置 API 密钥**:设置相应的环境变量 246 | 3. **加载数据**:使用 Benchmark 模块获取评估数据 247 | 4. **选择模型**:初始化预训练模型或自定义模型 248 | 5. **运行评估**:使用 Evaluator 执行评估并保存结果 249 | -------------------------------------------------------------------------------- /leaderboard/manage_leaderboard.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Simplified Leaderboard Management for SpectrumLab 4 | Core functionality for batch import operations 5 | """ 6 | 7 | import json 8 | from datetime import datetime 9 | from pathlib import Path 10 | from typing import Dict, Any, Optional 11 | import statistics 12 | 13 | 14 | class LeaderboardManager: 15 | def __init__(self, leaderboard_path: str = "leaderboard/leaderboard_v_1.0.json"): 16 | self.leaderboard_path = Path(leaderboard_path) 17 | self.data = self._load_leaderboard() 18 | 19 | def _load_leaderboard(self) -> Dict[str, Any]: 20 | """Load leaderboard data from JSON file""" 21 | if self.leaderboard_path.exists(): 22 | try: 23 | with open(self.leaderboard_path, "r", encoding="utf-8") as f: 24 | content = f.read().strip() 25 | if not content: # File is empty 26 | return {"leaderboard_info": {"total_models": 0}, "models": []} 27 | return json.loads(content) 28 | except (json.JSONDecodeError, ValueError) as e: 29 | print( 30 | f"Warning: Invalid JSON in {self.leaderboard_path}. Creating new leaderboard. Error: {e}" 31 | ) 32 | return {"leaderboard_info": {"total_models": 0}, "models": []} 33 | else: 34 | return {"leaderboard_info": {"total_models": 0}, "models": []} 35 | 36 | def _save_leaderboard(self): 37 | """Save leaderboard data to JSON file""" 38 | # Ensure directory exists 39 | self.leaderboard_path.parent.mkdir(parents=True, exist_ok=True) 40 | 41 | # Update total_models count 42 | self.data["leaderboard_info"]["total_models"] = len(self.data["models"]) 43 | 44 | # Sort models by overall accuracy (descending) 45 | self.data["models"].sort( 46 | key=lambda x: x["results"].get("overall_accuracy", 0), reverse=True 47 | ) 48 | 49 | with open(self.leaderboard_path, "w", encoding="utf-8") as f: 50 | json.dump(self.data, f, indent=4, ensure_ascii=False) 51 | 52 | def _calculate_category_accuracy( 53 | self, category_results: Dict[str, Any] 54 | ) -> Optional[float]: 55 | """Calculate category accuracy from subcategories""" 56 | subcategories = category_results.get("subcategories", {}) 57 | valid_scores = [] 58 | 59 | for subcat_name, subcat_data in subcategories.items(): 60 | accuracy = subcat_data.get("accuracy") 61 | if accuracy is not None: 62 | valid_scores.append(accuracy) 63 | 64 | return round(statistics.mean(valid_scores), 2) if valid_scores else None 65 | 66 | def _calculate_overall_accuracy(self, results: Dict[str, Any]) -> Optional[float]: 67 | """Calculate overall accuracy from all categories""" 68 | category_scores = [] 69 | 70 | for category in ["Signal", "Perception", "Semantic", "Generation"]: 71 | if category in results: 72 | category_accuracy = results[category].get("accuracy") 73 | if category_accuracy is not None: 74 | category_scores.append(category_accuracy) 75 | 76 | return round(statistics.mean(category_scores), 2) if category_scores else None 77 | 78 | def _recalculate_model_scores(self, model: Dict[str, Any]): 79 | """Recalculate all accuracy scores for a model""" 80 | results = model["results"] 81 | 82 | # Calculate category accuracies 83 | for category in ["Signal", "Perception", "Semantic", "Generation"]: 84 | if category in results: 85 | calculated_accuracy = self._calculate_category_accuracy( 86 | results[category] 87 | ) 88 | if calculated_accuracy is not None: 89 | results[category]["accuracy"] = calculated_accuracy 90 | 91 | # Calculate overall accuracy 92 | overall_accuracy = self._calculate_overall_accuracy(results) 93 | if overall_accuracy is not None: 94 | results["overall_accuracy"] = overall_accuracy 95 | 96 | def find_model(self, model_name: str) -> Optional[Dict[str, Any]]: 97 | """Find a model by name""" 98 | for model in self.data["models"]: 99 | if model["name"] == model_name: 100 | return model 101 | return None 102 | 103 | def add_model( 104 | self, 105 | model_info: Dict[str, Any], 106 | subcategory_scores: Dict[str, Dict[str, float]], 107 | ): 108 | """Add a new model to the leaderboard""" 109 | # Check if model already exists 110 | existing_model = self.find_model(model_info["name"]) 111 | if existing_model: 112 | print(f"Model '{model_info['name']}' already exists. Skipping...") 113 | return False 114 | 115 | # Create model entry 116 | model_entry = { 117 | "name": model_info["name"], 118 | "name_link": model_info.get("name_link", ""), 119 | "submitter": model_info.get("submitter", ""), 120 | "submitter_link": model_info.get("submitter_link", ""), 121 | "submission_time": datetime.now().isoformat() + "Z", 122 | "model_type": model_info.get("model_type", "unknown"), 123 | "model_size": model_info.get("model_size", "Unknown"), 124 | "is_multimodal": model_info.get("is_multimodal", False), 125 | "results": {}, 126 | "model_info": { 127 | "homepage": model_info.get("homepage", ""), 128 | "paper": model_info.get("paper", ""), 129 | "code": model_info.get("code", ""), 130 | "description": model_info.get("description", ""), 131 | }, 132 | } 133 | 134 | # Add subcategory scores 135 | for category, subcats in subcategory_scores.items(): 136 | model_entry["results"][category] = { 137 | "accuracy": None, # Will be calculated 138 | "subcategories": {}, 139 | } 140 | 141 | for subcat, accuracy in subcats.items(): 142 | model_entry["results"][category]["subcategories"][subcat] = { 143 | "accuracy": accuracy if accuracy is not None else None 144 | } 145 | 146 | # Calculate derived scores 147 | self._recalculate_model_scores(model_entry) 148 | 149 | # Add to leaderboard 150 | self.data["models"].append(model_entry) 151 | self._save_leaderboard() 152 | 153 | print(f"✅ Successfully added model '{model_info['name']}' to leaderboard") 154 | return True 155 | -------------------------------------------------------------------------------- /spectrumlab/benchmark/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from pathlib import Path 3 | from typing import List, Dict, Union 4 | import json 5 | import os 6 | 7 | 8 | class BaseGroup(ABC): 9 | def __init__(self, level: str, path: str = "./data"): 10 | self.level = level 11 | self.data_root = Path(path).resolve() 12 | self.path = self.data_root / self.level 13 | self.datasets = {} 14 | self._load_datasets() 15 | 16 | def _load_datasets(self): 17 | """ 18 | Load benchmark datasets for the current level. 19 | """ 20 | print(f"Loading datasets for level '{self.level}'...") 21 | print(f"Looking for local datasets in: {self.path}") 22 | 23 | if self.path.exists() and self.path.is_dir(): 24 | print("✅ Local datasets found, loading...") 25 | self._load_from_local(self.path) 26 | else: 27 | print("❌ Local datasets not found, falling back to HuggingFace...") 28 | self._load_from_remote(self.path) 29 | 30 | print( 31 | f"📊 Total available sub-categories in '{self.level}' level: {len(self.datasets)}" 32 | ) 33 | print(f"📋 Available sub-categories: {list(self.datasets.keys())}") 34 | 35 | def _load_from_local(self, level_path: Path): 36 | self.datasets = {} 37 | 38 | for sub_category_dir in level_path.iterdir(): 39 | if not sub_category_dir.is_dir(): 40 | continue 41 | sub_category_name = sub_category_dir.name 42 | json_filename = f"{sub_category_name.replace(' ', '_')}_datasets.json" 43 | json_file = sub_category_dir / json_filename 44 | 45 | if json_file.exists(): 46 | try: 47 | data = self._load_json(json_file) 48 | if data: 49 | self.datasets[sub_category_name] = data 50 | print( 51 | f" ✔ Loaded {len(data)} items from '{sub_category_name}'" 52 | ) 53 | else: 54 | print(f" ⚠ Empty data in '{sub_category_name}'") 55 | except Exception as e: 56 | print(f" ✖ Failed to load '{sub_category_name}': {e}") 57 | else: 58 | print(f" ⚠ No {json_filename} found in '{sub_category_name}'") 59 | 60 | def _load_from_remote(self, local_level_path: Path): 61 | # TODO 62 | self.datasets = {} 63 | 64 | def _fix_image_path(self, image_path): 65 | if isinstance(image_path, list): 66 | return [self._fix_image_path(p) for p in image_path] 67 | if not image_path or not str(image_path).strip(): 68 | return image_path 69 | # 支持 ./data/ 和 data/ 开头 70 | s = str(image_path) 71 | if s.startswith("./data/"): 72 | relative_part = s[7:] 73 | corrected_path = self.data_root / relative_part 74 | return str(corrected_path) 75 | if s.startswith("data/"): 76 | corrected_path = self.data_root / s[5:] 77 | return str(corrected_path) 78 | # 如果已经是绝对路径,直接返回 79 | if os.path.isabs(s): 80 | return s 81 | # 其它相对路径,拼到 data_root 下 82 | corrected_path = self.data_root / s 83 | return str(corrected_path) 84 | 85 | def _load_json(self, file_path: Path) -> List[Dict]: 86 | try: 87 | with open(file_path, "r", encoding="utf-8") as f: 88 | data = json.load(f) 89 | if isinstance(data, list): 90 | for item in data: 91 | if isinstance(item, dict) and "image_path" in item: 92 | if item["image_path"]: 93 | original_path = item["image_path"] 94 | item["image_path"] = self._fix_image_path(original_path) 95 | # 修正 answer 字段(如果是图片路径或图片路径 list) 96 | if isinstance(item, dict) and "answer" in item: 97 | answer = item["answer"] 98 | # 只修正字符串类型且像图片路径的 answer 99 | if isinstance(answer, str) and answer.lower().endswith( 100 | (".png", ".jpg", ".jpeg", ".bmp", ".gif", ".webp") 101 | ): 102 | item["answer"] = self._fix_image_path(answer) 103 | # 如果 answer 是 list(极少见),也递归修正 104 | if isinstance(answer, list): 105 | item["answer"] = [ 106 | self._fix_image_path(a) for a in answer 107 | ] 108 | return data 109 | else: 110 | print(f"Warning: Expected list in {file_path}, got {type(data)}") 111 | return [] 112 | except json.JSONDecodeError as e: 113 | print(f"Error parsing JSON in {file_path}: {e}") 114 | return [] 115 | except Exception as e: 116 | print(f"Error reading file {file_path}: {e}") 117 | return [] 118 | 119 | def get_data_by_subcategories( 120 | self, subcategories: Union[str, List[str]] = "all" 121 | ) -> List[Dict]: 122 | if subcategories == "all": 123 | subcategories = self.get_available_subcategories() 124 | print( 125 | f"🔍 Selecting all available sub-categories ({len(subcategories)} total)" 126 | ) 127 | elif isinstance(subcategories, str): 128 | subcategories = [subcategories] 129 | print(f"🔍 Selecting sub-category: '{subcategories[0]}'") 130 | else: 131 | print(f"🔍 Selecting {len(subcategories)} sub-categories: {subcategories}") 132 | 133 | available = set(self.get_available_subcategories()) 134 | invalid_subcategories = [s for s in subcategories if s not in available] 135 | if invalid_subcategories: 136 | raise ValueError( 137 | f"Invalid subcategory names: {invalid_subcategories}. " 138 | f"Available subcategories: {list(available)}" 139 | ) 140 | 141 | all_data = [] 142 | total_items = 0 143 | for subcategory in subcategories: 144 | category_data = self.datasets.get(subcategory, []) 145 | all_data.extend(category_data) 146 | total_items += len(category_data) 147 | print(f" 📦 '{subcategory}': {len(category_data)} items") 148 | 149 | print(f"✅ Total selected items: {total_items}") 150 | return all_data 151 | 152 | def get_available_subcategories(self) -> List[str]: 153 | return list(self.datasets.keys()) 154 | -------------------------------------------------------------------------------- /spectrumlab/evaluator/open_evaluator.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Dict, Any, Optional 3 | from .base import BaseEvaluator 4 | from spectrumlab.utils.image_utils import ( 5 | prepare_images_for_prompt, 6 | normalize_image_paths, 7 | ) 8 | from spectrumlab.models import GPT4o 9 | from tqdm import tqdm 10 | 11 | 12 | class OpenEvaluator(BaseEvaluator): 13 | def __init__( 14 | self, 15 | prediction_key: str = "model_prediction", 16 | score_model: Optional[Any] = None, 17 | ): 18 | super().__init__(prediction_key) 19 | # 支持自定义评分模型,默认 GPT4o 20 | self.score_model = score_model or GPT4o() 21 | 22 | def _build_prompt(self, item: Dict) -> Any: 23 | """ 24 | 为被测模型构建解题 prompt。 25 | """ 26 | question = item.get("question", "") 27 | images = normalize_image_paths(item.get("image_path")) 28 | text_content = f"Question: {question}\nPlease answer the question." 29 | if images: 30 | assert all( 31 | isinstance(p, str) for p in images 32 | ), f"images should be List[str], got {images}" 33 | return {"text": text_content, "images": prepare_images_for_prompt(images)} 34 | else: 35 | return text_content 36 | 37 | def _build_score_prompt(self, item: Dict, model_output: Any) -> Any: 38 | """ 39 | 构建评分 prompt,包含评分准则。 40 | """ 41 | question = item.get("question", "") 42 | images = normalize_image_paths(item.get("image_path")) 43 | reference_answer = item.get("answer", "") 44 | # 支持图片型参考答案 45 | reference_images = [] 46 | if isinstance(reference_answer, str) and reference_answer.lower().endswith( 47 | (".png", ".jpg", ".jpeg", ".bmp", ".gif", ".webp") 48 | ): 49 | reference_images = [reference_answer] 50 | reference_answer_text = "[See reference image]" 51 | else: 52 | reference_answer_text = reference_answer 53 | # 支持图片型模型输出 54 | model_output_images = [] 55 | if isinstance(model_output, str) and model_output.lower().endswith( 56 | (".png", ".jpg", ".jpeg", ".bmp", ".gif", ".webp") 57 | ): 58 | model_output_images = [model_output] 59 | model_output_text = "[See model output image]" 60 | else: 61 | model_output_text = model_output 62 | # 评分准则 63 | prompt_lines = [ 64 | "You are an expert evaluator. Given the following question, reference answer, and model answer, please rate the model answer on a scale of 0 to 1, and explain your reasoning.", 65 | "Scoring rules:", 66 | "- If the reference answer is an image but the model output does not contain an image, score 0.", 67 | "- If the reference answer is text but the model output does not contain text, score 0.", 68 | "- Otherwise, score based on the similarity and correctness of the model output compared to the reference answer.", 69 | "- If both text and image are present, consider both in your evaluation.", 70 | "Please output your score in the format: \\score{X}, where X is a number between 0 and 1.", 71 | "", 72 | f"Question: {question}", 73 | ] 74 | if images: 75 | prompt_lines.append("[See question image(s)]") 76 | prompt_lines.append("") 77 | prompt_lines.append(f"Reference Answer: {reference_answer_text}") 78 | if reference_images: 79 | prompt_lines.append("[See reference answer image(s)]") 80 | prompt_lines.append("") 81 | prompt_lines.append(f"Model Output: {model_output_text}") 82 | if model_output_images: 83 | prompt_lines.append("[See model output image(s)]") 84 | prompt_lines.append("") 85 | prompt_lines.append("Your response:") 86 | text_content = "\n".join(prompt_lines) 87 | # 构建多模态输入 88 | all_images = [] 89 | if images: 90 | assert all( 91 | isinstance(p, str) for p in images 92 | ), f"images should be List[str], got {images}" 93 | all_images += prepare_images_for_prompt(images) 94 | if reference_images: 95 | assert all( 96 | isinstance(p, str) for p in reference_images 97 | ), f"reference_images should be List[str], got {reference_images}" 98 | all_images += prepare_images_for_prompt(reference_images) 99 | if model_output_images: 100 | assert all( 101 | isinstance(p, str) for p in model_output_images 102 | ), f"model_output_images should be List[str], got {model_output_images}" 103 | all_images += prepare_images_for_prompt(model_output_images) 104 | if all_images: 105 | return {"text": text_content, "images": all_images} 106 | else: 107 | return text_content 108 | 109 | def _extract_prediction(self, response: str, item: Dict) -> float: 110 | """ 111 | 提取 \\score{X} 112 | """ 113 | if not response: 114 | return 0.0 115 | score_pattern = r"\\score\{([0-9.]+)\}" 116 | matches = re.findall(score_pattern, response) 117 | if matches: 118 | try: 119 | score = float(matches[-1]) 120 | return max(0.0, min(1.0, score)) 121 | except Exception: 122 | return 0.0 123 | return 0.0 124 | 125 | def _calculate_accuracy(self, answer: Any, prediction: float, item: Dict) -> bool: 126 | return prediction >= 0.5 127 | 128 | def evaluate( 129 | self, 130 | data_items, 131 | model, 132 | max_out_len=512, 133 | batch_size=None, 134 | save_path="./eval_results", 135 | score_model=None, 136 | ): 137 | """ 138 | 两阶段评测:先用被测模型生成答案,再用评分模型打分。 139 | 支持 score_model 参数。 140 | """ 141 | score_model = score_model or self.score_model 142 | results = [] 143 | print("🚀 Running model inference...") 144 | model_outputs = [] 145 | # 1. 让被测模型生成答案(带进度条) 146 | for item in tqdm(data_items, desc="Generating responses", unit="item"): 147 | prompt = self._build_prompt(item) 148 | model_output = model.generate(prompt, max_out_len) 149 | model_outputs.append(model_output) 150 | # 2. 评分阶段(带进度条) 151 | print("📝 Running scoring model...") 152 | for item, model_output in tqdm( 153 | zip(data_items, model_outputs), 154 | total=len(data_items), 155 | desc="Scoring responses", 156 | unit="item", 157 | ): 158 | score_prompt = self._build_score_prompt(item, model_output) 159 | score_response = score_model.generate(score_prompt, max_out_len) 160 | score = self._extract_prediction(score_response, item) 161 | # 3. 保存所有信息 162 | item_result = item.copy() 163 | item_result[self.prediction_key] = score 164 | item_result["model_output"] = model_output 165 | item_result["score_response"] = score_response 166 | item_result["pass"] = self._calculate_accuracy( 167 | item.get("answer", ""), score, item 168 | ) 169 | results.append(item_result) 170 | # 4. 保存和统计 171 | saved_files = self._save_results(results, save_path) 172 | metrics = self._calculate_metrics(results) 173 | self._print_results(metrics) 174 | return { 175 | "metrics": metrics, 176 | "saved_files": saved_files, 177 | "total_items": len(results), 178 | } 179 | -------------------------------------------------------------------------------- /docs/en/api.md: -------------------------------------------------------------------------------- 1 | # API Reference 2 | 3 | SpectrumLab Provides a concise and powerful API interface to help you quickly build deep learning applications in spectroscopy. This document covers the usage of core modules and guidelines for custom extensions. 4 | 5 | ## Benchmark Module 6 | 7 | Benchmark The module is the core of data access in SpectrumLab, providing a unified interface to load and manage spectroscopic benchmarking data at different levels. 8 | 9 | ### Get Benchmark Group 10 | 11 | The `get_benchmark_group` function can be used to obtain benchmark test groups at four different levels: 12 | 13 | ```python 14 | from spectrumlab.benchmark import get_benchmark_group 15 | 16 | signal_group = get_benchmark_group("signal") # Signal layer 17 | perception_group = get_benchmark_group("perception") # Perception layer 18 | semantic_group = get_benchmark_group("semantic") # Semantic layer 19 | generation_group = get_benchmark_group("generation") # Generation layer 20 | ``` 21 | 22 | ### Data Access 23 | 24 | Each Benchmark Group provides flexible data access methods: 25 | 26 | ```python 27 | # Get all data 28 | data = signal_group.get_ data_by_subcategories("all") 29 | 30 | # Obtain data for specific sub - categories 31 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 32 | 33 | # Get all available sub-categories of the Benchmark Group 34 | subcategories = signal_group.get_available_subcategories() 35 | print(subcategories) 36 | ``` 37 | 38 | **Method description:** 39 | 40 | - `get_data_by_subcategories("all")`: Return the data of all sub - categories at this level 41 | - `get_data_by_subcategories([...])`: Return the data list of the specified subcategory 42 | - `get_available_subcategories()`: View the names of all sub - categories contained in the current level 43 | 44 | ## Model Module 45 | 46 | Model The module provides a unified model interface, supporting the integration of various pre-trained models and custom models. 47 | 48 | ### Use Existing Models 49 | 50 | SpectrumLab Built-in multiple advanced multimodal model interfaces: 51 | 52 | ```python 53 | from spectrumlab.models import GPT4oAPI 54 | 55 | gpt4o = GPT4oAPI() 56 | 57 | response = gpt4o.generate("Your Prompts") 58 | ``` 59 | 60 | **Supported models:** 61 | 62 | - `GPT4oAPI`: OpenAI GPT-4o 63 | - `ClaudeAPI`: Anthropic Claude series 64 | - `DeepSeekAPI`: DeepSeek-VL 65 | - `QwenVLAPI`: Qwen-VL series 66 | - `InternVLAPI`: InternVL series 67 | 68 | ### Custom Model 69 | 70 | By inheriting from the `BaseModel` class, you can easily integrate your own model: 71 | 72 | ```python 73 | from spectrumlab.models.base import BaseModel 74 | 75 | class CustomModel(BaseModel): 76 | def __init__(self): 77 | super().__init__() 78 | self.model_name = "CustomModel" 79 | 80 | def generate(self, prompt, max_out_len=512): 81 | # Implement the logic for calling your model 82 | # This could be API calls, local model inference, etc. 83 | return response 84 | ``` 85 | 86 | **Custom requirements:** 87 | 88 | - The `generate` method must be implemented 89 | - Support text and multimodal input 90 | - Return the response in string format 91 | 92 | ## Evaluator Module 93 | 94 | Evaluator This module is responsible for the core logic of model evaluation, providing a standardized evaluation process and flexible customization options. 95 | 96 | ### Basic Usage 97 | 98 | For evaluation tasks of the multiple - choice question type, you can directly use `ChoiceEvaluator`: 99 | 100 | ```python 101 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 102 | 103 | evaluator = ChoiceEvaluator() 104 | 105 | results = evaluator.evaluate( 106 | data_items=data, 107 | model=model, 108 | max_out_len=512, 109 | save_path="./eval_results" 110 | ) 111 | ``` 112 | 113 | **Parameter description:** 114 | 115 | - `data_items`: List of evaluation data 116 | - `model`: Model instance 117 | - `max_out_len`: Maximum output length 118 | - `save_path`: Result saving path 119 | 120 | ### Customize Evaluator 121 | 122 | By inheriting from the `BaseEvaluator` class, you can customize the evaluation logic to meet the requirements of specific tasks: 123 | 124 | ```python 125 | from spectrumlab.evaluator.base import BaseEvaluator 126 | 127 | class CustomEvaluator(BaseEvaluator): 128 | def _build_prompt(self, item): 129 | """Build input prompt words""" 130 | question = item["question"] 131 | choices = item["choices"] 132 | return f"Problem:{question}\nOptions:{choices}\nPlease choose the correct answer.:" 133 | 134 | def _extract_prediction(self, response, item): 135 | """Extract the prediction results from the model response""" 136 | import re 137 | match = re.search(r'\box\{([^}]+)\}', response) 138 | return match.group(1) if match else "" 139 | 140 | def _calculate_accuracy(self, answer, prediction, item): 141 | """Calculate accuracy""" 142 | return answer.strip().lower() == prediction.strip().lower() 143 | ``` 144 | 145 | **Core methods:** 146 | 147 | - `_build_prompt`: Build model input based on data items 148 | - `_extract_prediction`: Extract predicted answers from the model output 149 | - `_calculate_accuracy`: Judge whether the prediction is correct 150 | 151 | ## Complete Evaluation Example 152 | 153 | The following is a complete example of the evaluation process, demonstrating the entire process from data loading to result analysis: 154 | 155 | ```python 156 | from spectrumlab.benchmark.signal_group import SignalGroup 157 | from spectrumlab.models import GPT4oAPI 158 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator 159 | 160 | # 1. Load data 161 | signal_group = SignalGroup("data") 162 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"]) 163 | 164 | # 2. Initialize the model and evaluator 165 | model = GPT4oAPI() 166 | evaluator = ChoiceEvaluator() 167 | 168 | # 3. Run evaluation 169 | results = evaluator.evaluate( 170 | data_items=data, 171 | model=model, 172 | save_path="./evaluation_results" 173 | ) 174 | 175 | # 4. View the evaluation results 176 | print(f"Evaluation completed! Overall accuracy: {results['metrics']['overall']['accuracy']:.2f}%") 177 | 178 | # View detailed results 179 | for subcategory, metrics in results['metrics']['subcategory_metrics'].items(): 180 | print(f"{subcategory}: {metrics['accuracy']:.2f}% ({metrics['correct']}/{metrics['total']})") 181 | ``` 182 | 183 | ## Data Format 184 | 185 | ### Input Data format 186 | 187 | Each data item follows the following format: 188 | 189 | ```python 190 | { 191 | "question": "Based on this infrared spectrogram, what is the most likely compound??", 192 | "choices": ["benzoic acid, benzaldehyde, benzyl alcohol, phenylacetic acid"], 193 | "answer": "benzoic acid", 194 | "image_path": "./data/signal/ir_001.png", # optional 195 | "category": "Chemistry", 196 | "sub_category": "Spectrum Type Classification" 197 | } 198 | ``` 199 | 200 | ### Output Result format 201 | 202 | The evaluation results include detailed performance indicators.: 203 | 204 | ```python 205 | { 206 | "metrics": { 207 | "overall": { 208 | "accuracy": 85.5, 209 | "correct": 171, 210 | "total": 200 211 | }, 212 | "subcategory_metrics": { 213 | "Spectrum Type Classification": { 214 | "accuracy": 90.0, 215 | "correct": 45, 216 | "total": 50 217 | } 218 | } 219 | }, 220 | "saved_files": ["result_001.json"], 221 | "total_items": 200 222 | } 223 | ``` 224 | 225 | ## Environment configuration 226 | 227 | Before using the API model, you need to configure the corresponding environment variables: 228 | 229 | ```bash 230 | # OpenAI models 231 | export OPENAI_API_KEY="your_openai_api_key" 232 | 233 | # Anthropic model 234 | export ANTHROPIC_API_KEY="your_anthropic_api_key" 235 | 236 | # DeepSeek model 237 | export DEEPSEEK_API_KEY="your_deepseek_api_key" 238 | 239 | # Other models... 240 | ``` 241 | 242 | ## Quick start 243 | 244 | 1. **Install dependencies**:`pip install spectrumlab` 245 | 2. **Configure API key**:Set the corresponding environment variables 246 | 3. **Load data**:Use the Benchmark module to obtain evaluation data 247 | 4. **Select a model**:Initialize the pre-trained model or custom model 248 | 5. **Run evaluation**:Use Evaluator to perform the evaluation and save the results 249 | -------------------------------------------------------------------------------- /spectrumlab/models/qwen_vl_api.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, Optional, Union 2 | from .base_api import BaseAPIModel 3 | from spectrumlab.config import Config 4 | from openai import OpenAI 5 | 6 | 7 | class Qwen_VL_Max(BaseAPIModel): 8 | def __init__( 9 | self, 10 | api_key: Optional[str] = None, 11 | base_url: Optional[str] = None, 12 | model_name: Optional[str] = None, 13 | **kwargs, 14 | ): 15 | config = Config() 16 | 17 | # Use provided parameters or fall back to config 18 | self.api_key = api_key or config.qwen_vl_api_key 19 | self.base_url = base_url or config.qwen_vl_base_url 20 | self.model_name = model_name or config.qwen_vl_model_name 21 | 22 | # Validate that we have required configuration 23 | if not self.api_key: 24 | raise ValueError( 25 | "Qwen-VL-Max API key not found. Please set QWEN_VL_MAX_API_KEY in your .env file " 26 | "or provide api_key parameter." 27 | ) 28 | 29 | # Ensure base_url has proper protocol for OpenRouter 30 | if self.base_url and not self.base_url.startswith(("http://", "https://")): 31 | self.base_url = f"https://{self.base_url}" 32 | 33 | self.client = OpenAI( 34 | api_key=self.api_key, 35 | base_url=self.base_url, 36 | ) 37 | 38 | # Initialize parent class 39 | super().__init__(model_name=self.model_name, **kwargs) 40 | 41 | def generate( 42 | self, 43 | prompt: Union[str, Dict[str, Any]], 44 | max_tokens: int = 512, 45 | **generation_kwargs, 46 | ) -> str: 47 | """ 48 | Generate response supporting both text and multimodal input. 49 | 50 | Args: 51 | prompt: Either text string or multimodal dict 52 | max_tokens: Maximum tokens to generate 53 | **generation_kwargs: Additional generation parameters like temperature, top_p, etc. 54 | 55 | Returns: 56 | Generated response string 57 | """ 58 | messages = [] 59 | 60 | # Handle multimodal vs text-only prompts 61 | if isinstance(prompt, dict) and "images" in prompt: 62 | # Multimodal prompt 63 | content = [] 64 | 65 | content.append({"type": "text", "text": prompt["text"]}) 66 | 67 | for image_data in prompt["images"]: 68 | content.append(image_data) 69 | 70 | messages.append({"role": "user", "content": content}) 71 | else: 72 | # Text-only prompt 73 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "") 74 | messages.append({"role": "user", "content": text_content}) 75 | 76 | # Prepare API call parameters 77 | api_params = { 78 | "model": self.model_name, 79 | "messages": messages, 80 | "max_tokens": max_tokens, 81 | } 82 | 83 | # Add any additional generation parameters 84 | api_params.update(generation_kwargs) 85 | 86 | try: 87 | response = self.client.chat.completions.create(**api_params) 88 | return response.choices[0].message.content 89 | except Exception as e: 90 | raise RuntimeError(f"Qwen-VL-Max API call failed: {e}") 91 | 92 | 93 | class Qwen_2_5_VL_32B(BaseAPIModel): 94 | def __init__( 95 | self, 96 | api_key: Optional[str] = None, 97 | base_url: Optional[str] = None, 98 | model_name: Optional[str] = None, 99 | **kwargs, 100 | ): 101 | config = Config() 102 | 103 | # Use provided parameters or fall back to config 104 | self.api_key = api_key or config.qwen_2_5_vl_32b_api_key 105 | self.base_url = base_url or config.qwen_2_5_vl_32b_base_url 106 | self.model_name = model_name or config.qwen_2_5_vl_32b_model_name 107 | 108 | # Validate that we have required configuration 109 | if not self.api_key: 110 | raise ValueError( 111 | "Qwen-2.5-VL-32B API key not found. Please set QWEN_2_5_VL_32B_API_KEY in your .env file " 112 | "or provide api_key parameter." 113 | ) 114 | 115 | # Ensure base_url has proper protocol for OpenRouter 116 | if self.base_url and not self.base_url.startswith(("http://", "https://")): 117 | self.base_url = f"https://{self.base_url}" 118 | 119 | self.client = OpenAI( 120 | api_key=self.api_key, 121 | base_url=self.base_url, 122 | ) 123 | 124 | # Initialize parent class 125 | super().__init__(model_name=self.model_name, **kwargs) 126 | 127 | def generate( 128 | self, 129 | prompt: Union[str, Dict[str, Any]], 130 | max_tokens: int = 512, 131 | **generation_kwargs, 132 | ) -> str: 133 | """ 134 | Generate response supporting both text and multimodal input. 135 | 136 | Args: 137 | prompt: Either text string or multimodal dict 138 | max_tokens: Maximum tokens to generate 139 | **generation_kwargs: Additional generation parameters like temperature, top_p, etc. 140 | 141 | Returns: 142 | Generated response string 143 | """ 144 | messages = [] 145 | 146 | # Handle multimodal vs text-only prompts 147 | if isinstance(prompt, dict) and "images" in prompt: 148 | # Multimodal prompt 149 | content = [] 150 | 151 | content.append({"type": "text", "text": prompt["text"]}) 152 | 153 | for image_data in prompt["images"]: 154 | content.append(image_data) 155 | 156 | messages.append({"role": "user", "content": content}) 157 | else: 158 | # Text-only prompt 159 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "") 160 | messages.append({"role": "user", "content": text_content}) 161 | 162 | # Prepare API call parameters 163 | api_params = { 164 | "model": self.model_name, 165 | "messages": messages, 166 | "max_tokens": max_tokens, 167 | } 168 | 169 | # Add any additional generation parameters 170 | api_params.update(generation_kwargs) 171 | 172 | try: 173 | response = self.client.chat.completions.create(**api_params) 174 | return response.choices[0].message.content 175 | except Exception as e: 176 | raise RuntimeError(f"Qwen-2.5-VL-32B API call failed: {e}") 177 | 178 | 179 | class Qwen_2_5_VL_72B(BaseAPIModel): 180 | def __init__( 181 | self, 182 | api_key: Optional[str] = None, 183 | base_url: Optional[str] = None, 184 | model_name: Optional[str] = None, 185 | **kwargs, 186 | ): 187 | config = Config() 188 | 189 | # Use provided parameters or fall back to config 190 | self.api_key = api_key or config.qwen_2_5_vl_72b_api_key 191 | self.base_url = base_url or config.qwen_2_5_vl_72b_base_url 192 | self.model_name = model_name or config.qwen_2_5_vl_72b_model_name 193 | 194 | # Validate that we have required configuration 195 | if not self.api_key: 196 | raise ValueError( 197 | "Qwen-2.5-VL-72B API key not found. Please set QWEN_2_5_VL_72B_API_KEY in your .env file " 198 | "or provide api_key parameter." 199 | ) 200 | 201 | # Ensure base_url has proper protocol for OpenRouter 202 | if self.base_url and not self.base_url.startswith(("http://", "https://")): 203 | self.base_url = f"https://{self.base_url}" 204 | 205 | self.client = OpenAI( 206 | api_key=self.api_key, 207 | base_url=self.base_url, 208 | ) 209 | 210 | # Initialize parent class 211 | super().__init__(model_name=self.model_name, **kwargs) 212 | 213 | def generate( 214 | self, 215 | prompt: Union[str, Dict[str, Any]], 216 | max_tokens: int = 512, 217 | **generation_kwargs, 218 | ) -> str: 219 | """ 220 | Generate response supporting both text and multimodal input. 221 | 222 | Args: 223 | prompt: Either text string or multimodal dict 224 | max_tokens: Maximum tokens to generate 225 | **generation_kwargs: Additional generation parameters like temperature, top_p, etc. 226 | 227 | Returns: 228 | Generated response string 229 | """ 230 | messages = [] 231 | 232 | # Handle multimodal vs text-only prompts 233 | if isinstance(prompt, dict) and "images" in prompt: 234 | # Multimodal prompt 235 | content = [] 236 | 237 | content.append({"type": "text", "text": prompt["text"]}) 238 | 239 | for image_data in prompt["images"]: 240 | content.append(image_data) 241 | 242 | messages.append({"role": "user", "content": content}) 243 | else: 244 | # Text-only prompt 245 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "") 246 | messages.append({"role": "user", "content": text_content}) 247 | 248 | # Prepare API call parameters 249 | api_params = { 250 | "model": self.model_name, 251 | "messages": messages, 252 | "max_tokens": max_tokens, 253 | } 254 | 255 | # Add any additional generation parameters 256 | api_params.update(generation_kwargs) 257 | 258 | try: 259 | response = self.client.chat.completions.create(**api_params) 260 | return response.choices[0].message.content 261 | except Exception as e: 262 | raise RuntimeError(f"Qwen-2.5-VL-72B API call failed: {e}") 263 | --------------------------------------------------------------------------------