├── spectrumlab
├── cli
│ ├── py.typed
│ ├── __init__.py
│ ├── api.py
│ └── main.py
├── config
│ ├── __init__.py
│ └── base_config.py
├── benchmark
│ ├── signal_group.py
│ ├── semantic_group.py
│ ├── generation_group.py
│ ├── perception_group.py
│ ├── __init__.py.bak
│ ├── __init__.py
│ └── base.py
├── evaluator
│ ├── __init__.py
│ ├── choice_evaluator.py
│ └── open_evaluator.py
├── utils
│ ├── __init__.py
│ └── image_utils.py
└── models
│ ├── base.py
│ ├── __init__.py
│ ├── base_api.py
│ ├── deepseek_api.py
│ ├── deepseek_vl.py
│ ├── gpt4o_api.py
│ ├── grok_api.py
│ ├── internvl_api.py
│ ├── README.md
│ ├── llama_api.py
│ ├── gpt4_v_api.py
│ ├── doubao_api.py
│ └── qwen_vl_api.py
├── leaderboard
├── gradio
│ └── requirements.txt
├── vue
│ ├── env.d.ts
│ ├── .vscode
│ │ └── extensions.json
│ ├── public
│ │ └── favicon.ico
│ ├── tsconfig.json
│ ├── src
│ │ ├── views
│ │ │ ├── HomeView.vue
│ │ │ └── AboutView.vue
│ │ ├── assets
│ │ │ ├── logo.svg
│ │ │ ├── main.css
│ │ │ └── base.css
│ │ ├── main.ts
│ │ ├── components
│ │ │ ├── icons
│ │ │ │ ├── IconSupport.vue
│ │ │ │ ├── IconTooling.vue
│ │ │ │ ├── IconCommunity.vue
│ │ │ │ ├── IconDocumentation.vue
│ │ │ │ └── IconEcosystem.vue
│ │ │ ├── HelloWorld.vue
│ │ │ ├── WelcomeItem.vue
│ │ │ └── TheWelcome.vue
│ │ ├── stores
│ │ │ └── counter.ts
│ │ ├── router
│ │ │ └── index.ts
│ │ └── App.vue
│ ├── index.html
│ ├── tsconfig.app.json
│ ├── .gitignore
│ ├── vite.config.ts
│ ├── tsconfig.node.json
│ ├── package.json
│ └── README.md
├── batch_import_models.py
└── manage_leaderboard.py
├── docs
├── .gitignore
├── package.json
├── .vitepress
│ ├── theme
│ │ ├── index.ts
│ │ └── custom.css
│ ├── config.mts
│ ├── en.ts
│ └── zh.ts
├── index.md
├── zh
│ ├── index.md
│ ├── benchmark.md
│ ├── tutorial.md
│ └── api.md
├── en
│ ├── index.md
│ ├── benchmark.md
│ ├── tutorial.md
│ └── api.md
└── README.md
├── .gitattributes
├── .vscode
├── extensions.json
├── python.code-snippets
└── settings.json
├── tests
└── models
│ ├── test_deepseek.py
│ ├── test_gpt_4_1.py
│ ├── test_claude_opus_4.py
│ ├── test_claude_sonnet_4.py
│ ├── test_qwen_vl_max.py
│ ├── test_gpt4o.py
│ ├── test_gpt_4_v.py
│ ├── test_claude_haiku_3_5.py
│ ├── test_claude_sonnet_3_5.py
│ ├── test_deepseek_vl_2.py
│ ├── test_internvl.py
│ ├── test_grok_2_v.py
│ ├── test_qwen_vl_2_5_32b.py
│ ├── test_qwen_vl_2_5_72b.py
│ ├── test_llama_3_2_vision_11b.py
│ ├── test_llama_3_2_vision_90b.py
│ ├── test_doubao_1_5_vision_pro.py
│ └── test_doubao_1_5_vision_pro_thinking.py
├── .pre-commit-config.yaml
├── .gitignore
├── scripts
└── start_leaderboard.sh
├── pyproject.toml
├── .github
└── workflows
│ └── docs-deploy.yml
├── CONTRIBUTING.md
├── run_evaluation.py
├── README.md
├── run_ablation_experiments.py
└── .env.example
/spectrumlab/cli/py.typed:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/leaderboard/gradio/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio==5.35.0
--------------------------------------------------------------------------------
/leaderboard/vue/env.d.ts:
--------------------------------------------------------------------------------
1 | ///
2 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | /.vitepress/dist
3 | /.vitepress/cache
4 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/leaderboard/vue/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | "recommendations": ["Vue.volar"]
3 | }
4 |
--------------------------------------------------------------------------------
/spectrumlab/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_config import Config
2 |
3 |
4 | __all__ = ["Config"]
5 |
--------------------------------------------------------------------------------
/leaderboard/vue/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/little1d/SpectrumLab/HEAD/leaderboard/vue/public/favicon.ico
--------------------------------------------------------------------------------
/spectrumlab/cli/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.1"
2 |
3 |
4 | def hello() -> str:
5 | return "Hello from SpectrumLab!"
6 |
--------------------------------------------------------------------------------
/spectrumlab/benchmark/signal_group.py:
--------------------------------------------------------------------------------
1 | from .base import BaseGroup
2 |
3 |
4 | class SignalGroup(BaseGroup):
5 | def __init__(self, path: str = "./data"):
6 | super().__init__(level="Signal", path=path)
7 |
--------------------------------------------------------------------------------
/leaderboard/vue/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "files": [],
3 | "references": [
4 | {
5 | "path": "./tsconfig.node.json"
6 | },
7 | {
8 | "path": "./tsconfig.app.json"
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/spectrumlab/benchmark/semantic_group.py:
--------------------------------------------------------------------------------
1 | from .base import BaseGroup
2 |
3 |
4 | class SemanticGroup(BaseGroup):
5 | def __init__(self, path: str = "./data"):
6 | super().__init__(level="Semantic", path=path)
7 |
--------------------------------------------------------------------------------
/spectrumlab/benchmark/generation_group.py:
--------------------------------------------------------------------------------
1 | from .base import BaseGroup
2 |
3 |
4 | class GenerationGroup(BaseGroup):
5 | def __init__(self, path: str = "./data"):
6 | super().__init__(level="Generation", path=path)
7 |
--------------------------------------------------------------------------------
/spectrumlab/benchmark/perception_group.py:
--------------------------------------------------------------------------------
1 | from .base import BaseGroup
2 |
3 |
4 | class PerceptionGroup(BaseGroup):
5 | def __init__(self, path: str = "./data"):
6 | super().__init__(level="Perception", path=path)
7 |
--------------------------------------------------------------------------------
/leaderboard/vue/src/views/HomeView.vue:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | "recommendations": [
3 | "ms-python.black-formatter",
4 | "editorconfig.editorconfig",
5 | "eamodio.gitlens",
6 | // 代码标记,比如 TODO, FIXME
7 | "gruntfuggly.todo-tree",
8 | ]
9 | }
--------------------------------------------------------------------------------
/spectrumlab/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | from .choice_evaluator import ChoiceEvaluator
2 |
3 |
4 | def get_evaluator(level: str):
5 | """
6 | 获取评估器
7 | 目前所有level都使用ChoiceEvaluator,后续可以根据level返回不同的evaluator
8 | """
9 | return ChoiceEvaluator()
10 |
--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "devDependencies": {
3 | "markdown-it-mathjax3": "^4.3.2",
4 | "vitepress": "^1.6.3"
5 | },
6 | "scripts": {
7 | "docs:dev": "vitepress dev .",
8 | "docs:build": "vitepress build .",
9 | "docs:preview": "vitepress preview ."
10 | }
11 | }
--------------------------------------------------------------------------------
/spectrumlab/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .image_utils import (
2 | encode_image_to_base64,
3 | get_image_mime_type,
4 | prepare_images_for_prompt,
5 | )
6 |
7 | __all__ = [
8 | "encode_image_to_base64",
9 | "get_image_mime_type",
10 | "prepare_images_for_prompt",
11 | ]
12 |
--------------------------------------------------------------------------------
/leaderboard/vue/src/assets/logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/spectrumlab/benchmark/__init__.py.bak:
--------------------------------------------------------------------------------
1 | from .signal_group import SignalGroup
2 | from .perception_group import PerceptionGroup
3 | from .generation_group import GenerationGroup
4 | from .semantic_group import SemanticGroup
5 |
6 | __all__ = ["SignalGroup", "PerceptionGroup", "GenerationGroup", "SemanticGroup"]
7 |
--------------------------------------------------------------------------------
/tests/models/test_deepseek.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import DeepSeek
2 |
3 |
4 | def test_deepseek_text_generation():
5 | model = DeepSeek()
6 | prompt = "What is spectroscopy?"
7 | response = model.generate(prompt)
8 | assert isinstance(response, str)
9 | assert len(response) > 0
10 |
--------------------------------------------------------------------------------
/leaderboard/vue/src/views/AboutView.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
This is an about page
4 |
5 |
6 |
7 |
16 |
--------------------------------------------------------------------------------
/leaderboard/vue/src/main.ts:
--------------------------------------------------------------------------------
1 | import './assets/main.css'
2 |
3 | import { createApp } from 'vue'
4 | import { createPinia } from 'pinia'
5 |
6 | import App from './App.vue'
7 | import router from './router'
8 |
9 | const app = createApp(App)
10 |
11 | app.use(createPinia())
12 | app.use(router)
13 |
14 | app.mount('#app')
15 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # .pre-commit-config.yaml
2 | repos:
3 | - repo: https://github.com/astral-sh/ruff-pre-commit
4 | rev: v0.7.4
5 | hooks:
6 | - id: ruff
7 | # auto fix
8 | args: [--fix, --exit-non-zero-on-fix, --show-fixes]
9 | exclude: ^docs
10 | - id: ruff-format
11 | exclude: ^docs
--------------------------------------------------------------------------------
/leaderboard/vue/src/components/icons/IconSupport.vue:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
--------------------------------------------------------------------------------
/leaderboard/vue/src/stores/counter.ts:
--------------------------------------------------------------------------------
1 | import { ref, computed } from 'vue'
2 | import { defineStore } from 'pinia'
3 |
4 | export const useCounterStore = defineStore('counter', () => {
5 | const count = ref(0)
6 | const doubleCount = computed(() => count.value * 2)
7 | function increment() {
8 | count.value++
9 | }
10 |
11 | return { count, doubleCount, increment }
12 | })
13 |
--------------------------------------------------------------------------------
/leaderboard/vue/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | Vite App
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/leaderboard/vue/tsconfig.app.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "@vue/tsconfig/tsconfig.dom.json",
3 | "include": [
4 | "env.d.ts",
5 | "src/**/*",
6 | "src/**/*.vue"
7 | ],
8 | "exclude": [
9 | "src/**/__tests__/*"
10 | ],
11 | "compilerOptions": {
12 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
13 | "paths": {
14 | "@/*": [
15 | "./src/*"
16 | ]
17 | }
18 | }
19 | }
--------------------------------------------------------------------------------
/docs/.vitepress/theme/index.ts:
--------------------------------------------------------------------------------
1 | import type { Theme } from 'vitepress'
2 | import DefaultTheme from 'vitepress/theme'
3 | import './custom.css'
4 |
5 | export default {
6 | extends: DefaultTheme,
7 | enhanceApp({ app, router, siteData }) {
8 | // 注册全局组件
9 | // app.component('MyGlobalComponent', MyGlobalComponent)
10 |
11 | // 全局属性
12 | // app.config.globalProperties.$myGlobalProperty = () => {}
13 | }
14 | } satisfies Theme
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Build artifacts
2 | /dist/
3 |
4 | # Virtual environments & Python cache
5 | *.venv/
6 | *__pycache__/
7 | .ruff_cache/
8 |
9 | # System files
10 | .DS_Store
11 |
12 | # Environment configuration
13 | *.gradio/
14 | .python-version
15 |
16 | # Local data & playground
17 | data/
18 | data_test/
19 | playground/
20 | swanlog/
21 | log
22 | /ablation_internvl3_78b_baselines_evaluation_results
23 | /ablation_internvl3_78b_temp_0.5_evaluation_results
24 |
25 |
26 | .env
--------------------------------------------------------------------------------
/leaderboard/vue/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | pnpm-debug.log*
8 | lerna-debug.log*
9 |
10 | node_modules
11 | .DS_Store
12 | dist
13 | dist-ssr
14 | coverage
15 | *.local
16 |
17 | /cypress/videos/
18 | /cypress/screenshots/
19 |
20 | # Editor directories and files
21 | .vscode/*
22 | !.vscode/extensions.json
23 | .idea
24 | *.suo
25 | *.ntvs*
26 | *.njsproj
27 | *.sln
28 | *.sw?
29 |
30 | *.tsbuildinfo
31 |
--------------------------------------------------------------------------------
/leaderboard/vue/vite.config.ts:
--------------------------------------------------------------------------------
1 | import { fileURLToPath, URL } from 'node:url'
2 |
3 | import { defineConfig } from 'vite'
4 | import vue from '@vitejs/plugin-vue'
5 | import vueDevTools from 'vite-plugin-vue-devtools'
6 |
7 | // https://vite.dev/config/
8 | export default defineConfig({
9 | plugins: [
10 | vue(),
11 | vueDevTools(),
12 | ],
13 | resolve: {
14 | alias: {
15 | '@': fileURLToPath(new URL('./src', import.meta.url))
16 | },
17 | },
18 | })
19 |
--------------------------------------------------------------------------------
/leaderboard/vue/tsconfig.node.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "@tsconfig/node22/tsconfig.json",
3 | "include": [
4 | "vite.config.*",
5 | "vitest.config.*",
6 | "cypress.config.*",
7 | "nightwatch.conf.*",
8 | "playwright.config.*",
9 | "eslint.config.*"
10 | ],
11 | "compilerOptions": {
12 | "noEmit": true,
13 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
14 |
15 | "module": "ESNext",
16 | "moduleResolution": "Bundler",
17 | "types": ["node"]
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/scripts/start_leaderboard.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
6 | PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
7 |
8 | echo "🚀 Starting Spectral Hub Leaderboard..."
9 | echo "📁 Project root: $PROJECT_ROOT"
10 |
11 | cd "$PROJECT_ROOT"
12 |
13 | echo "📦 Checking Gradio..."
14 | if ! python -c "import gradio" 2>/dev/null; then
15 | echo "❌ Gradio not found. Installing..."
16 | pip install gradio pandas
17 | echo "✅ Dependencies installed"
18 | else
19 | echo "✅ Gradio found"
20 | fi
21 |
22 | cd leaderboard/gradio
23 | python app.py
--------------------------------------------------------------------------------
/docs/.vitepress/theme/custom.css:
--------------------------------------------------------------------------------
1 | /* 自定义 CSS 变量 */
2 | :root {
3 | --vp-c-brand-1: #646cff;
4 | --vp-c-brand-2: #747bff;
5 | --vp-c-brand-3: #535bf2;
6 | }
7 |
8 | /* 深色模式下的品牌色 */
9 | .dark {
10 | --vp-c-brand-1: #c9cbff;
11 | --vp-c-brand-2: #a6a9ff;
12 | --vp-c-brand-3: #8285f4;
13 | }
14 |
15 | /* 自定义样式 */
16 | .VPHero .name {
17 | background: linear-gradient(120deg, #bd34fe 30%, #41d1ff);
18 | background-clip: text;
19 | -webkit-background-clip: text;
20 | -webkit-text-fill-color: transparent;
21 | }
22 |
23 | .VPNavBarTitle .VPImage {
24 | width: 40px !important;
25 | height: 40px !important;
26 | }
--------------------------------------------------------------------------------
/leaderboard/vue/src/assets/main.css:
--------------------------------------------------------------------------------
1 | @import './base.css';
2 |
3 | #app {
4 | max-width: 1280px;
5 | margin: 0 auto;
6 | padding: 2rem;
7 | font-weight: normal;
8 | }
9 |
10 | a,
11 | .green {
12 | text-decoration: none;
13 | color: hsla(160, 100%, 37%, 1);
14 | transition: 0.4s;
15 | padding: 3px;
16 | }
17 |
18 | @media (hover: hover) {
19 | a:hover {
20 | background-color: hsla(160, 100%, 37%, 0.2);
21 | }
22 | }
23 |
24 | @media (min-width: 1024px) {
25 | body {
26 | display: flex;
27 | place-items: center;
28 | }
29 |
30 | #app {
31 | display: grid;
32 | grid-template-columns: 1fr 1fr;
33 | padding: 0 2rem;
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/leaderboard/vue/src/router/index.ts:
--------------------------------------------------------------------------------
1 | import { createRouter, createWebHistory } from 'vue-router'
2 | import HomeView from '../views/HomeView.vue'
3 |
4 | const router = createRouter({
5 | history: createWebHistory(import.meta.env.BASE_URL),
6 | routes: [
7 | {
8 | path: '/',
9 | name: 'home',
10 | component: HomeView,
11 | },
12 | {
13 | path: '/about',
14 | name: 'about',
15 | // route level code-splitting
16 | // this generates a separate chunk (About.[hash].js) for this route
17 | // which is lazy-loaded when the route is visited.
18 | component: () => import('../views/AboutView.vue'),
19 | },
20 | ],
21 | })
22 |
23 | export default router
24 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: home
3 |
4 | hero:
5 | name: "SpectrumLab"
6 | text: ""
7 | tagline: "开创性的统一平台,助力系统化与高效的光谱深度学习研究"
8 | actions:
9 | - theme: brand
10 | text: 快速开始
11 | link: /zh/tutorial
12 | - theme: alt
13 | text: 查看源码
14 | link: https://github.com/little1d/SpectrumLab
15 |
16 | features:
17 | - title: 🔬 多模态评估
18 | details: 支持图像+文本的多模态光谱数据评估,兼容多种深度学习模型
19 | - title: 🤖 模型集成
20 | details: 集成 GPT-4o、Claude、DeepSeek、Qwen-VL 等先进模型的 API 接口
21 | - title: 📊 基准测试
22 | details: 提供标准化的评估流程和指标,支持多种光谱学任务类型
23 | - title: 🏆 排行榜
24 | details: 实时更新的模型性能排行榜,支持多维度对比分析
25 | - title: 🚀 命令行工具
26 | details: 简洁的命令行界面,支持批量评估和结果管理
27 | - title: 🔧 易于扩展
28 | details: 模块化设计,支持自定义评估器和模型的快速集成
29 | ---
--------------------------------------------------------------------------------
/docs/zh/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: home
3 |
4 | hero:
5 | name: "SpectrumLab"
6 | text: ""
7 | tagline: "开创性的统一平台,助力系统化与高效的光谱深度学习研究"
8 | actions:
9 | - theme: brand
10 | text: 快速开始
11 | link: /tutorial
12 | - theme: alt
13 | text: 查看源码
14 | link: https://github.com/little1d/SpectrumLab
15 |
16 | features:
17 | - title: 🔬 多模态评估
18 | details: 支持图像+文本的多模态光谱数据评估,兼容多种深度学习模型
19 | - title: 🤖 模型集成
20 | details: 集成 GPT-4o、Claude、DeepSeek、Qwen-VL 等先进模型的 API 接口
21 | - title: 📊 基准测试
22 | details: 提供标准化的评估流程和指标,支持多种光谱学任务类型
23 | - title: 🏆 排行榜
24 | details: 实时更新的模型性能排行榜,支持多维度对比分析
25 | - title: 🚀 命令行工具
26 | details: 简洁的命令行界面,支持批量评估和结果管理
27 | - title: 🔧 易于扩展
28 | details: 模块化设计,支持自定义评估器和模型的快速集成
29 | ---
--------------------------------------------------------------------------------
/leaderboard/vue/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "leaderboard",
3 | "version": "0.0.0",
4 | "private": true,
5 | "type": "module",
6 | "scripts": {
7 | "dev": "vite",
8 | "build": "run-p type-check \"build-only {@}\" --",
9 | "preview": "vite preview",
10 | "build-only": "vite build",
11 | "type-check": "vue-tsc --build"
12 | },
13 | "dependencies": {
14 | "pinia": "^3.0.3",
15 | "vue": "^3.5.17",
16 | "vue-router": "^4.5.1"
17 | },
18 | "devDependencies": {
19 | "@tsconfig/node22": "^22.0.2",
20 | "@types/node": "^22.15.32",
21 | "@vitejs/plugin-vue": "^6.0.0",
22 | "@vue/tsconfig": "^0.7.0",
23 | "npm-run-all2": "^8.0.4",
24 | "typescript": "~5.8.0",
25 | "vite": "^5.4.10",
26 | "vite-plugin-vue-devtools": "^7.7.7",
27 | "vue-tsc": "^2.2.10"
28 | }
29 | }
--------------------------------------------------------------------------------
/spectrumlab/benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | from .signal_group import SignalGroup
2 | from .perception_group import PerceptionGroup
3 | from .generation_group import GenerationGroup
4 | from .semantic_group import SemanticGroup
5 |
6 | __all__ = [
7 | "SignalGroup",
8 | "PerceptionGroup",
9 | "GenerationGroup",
10 | "SemanticGroup",
11 | "get_benchmark_group",
12 | ]
13 |
14 |
15 | def get_benchmark_group(level: str, path: str = "./data"):
16 | level_map = {
17 | "signal": SignalGroup,
18 | "perception": PerceptionGroup,
19 | "semantic": SemanticGroup,
20 | "generation": GenerationGroup,
21 | }
22 |
23 | level_lower = level.lower()
24 | if level_lower not in level_map:
25 | raise ValueError(f"不支持的评估级别: {level}. 可选值: {list(level_map.keys())}")
26 |
27 | return level_map[level_lower](path=path)
28 |
--------------------------------------------------------------------------------
/leaderboard/vue/src/components/HelloWorld.vue:
--------------------------------------------------------------------------------
1 |
6 |
7 |
8 |
9 |
{{ msg }}
10 |
11 | You’ve successfully created a project with
12 | Vite +
13 | Vue 3. What's next?
14 |
15 |
16 |
17 |
18 |
42 |
--------------------------------------------------------------------------------
/.vscode/python.code-snippets:
--------------------------------------------------------------------------------
1 | {
2 | "python-comment": {
3 | "prefix": "comm",
4 | "scope": "python,notebook",
5 | "description": "python文件块注释",
6 | "body": [
7 | "# =============================================",
8 | "# ${1:SECTION TITLE} ",
9 | "# =============================================",
10 | ]
11 | },
12 | "adapted-from": {
13 | "prefix": "adapted",
14 | "scope": "python,notebook",
15 | "description": "添加代码改编来源注释",
16 | "body": [
17 | "# Adapted from: ${1:source_description}",
18 | "# Source: ${2:https://github.com/example/repo}",
19 | ]
20 | },
21 | "model-doc-link": { // <<< 新增片段
22 | "prefix": "doclink", // 触发关键字
23 | "scope": "python,notebook",
24 | "description": "插入外部模型/库的文档链接",
25 | "body": [
26 | "# Link: ${1:https://example.com}"
27 | ]
28 | }
29 | }
--------------------------------------------------------------------------------
/leaderboard/vue/README.md:
--------------------------------------------------------------------------------
1 | # This template should help get you started developing with Vue 3 in Vite
2 |
3 | ## Recommended IDE Setup
4 |
5 | [VSCode](https://code.visualstudio.com/) + [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) (and disable Vetur).
6 |
7 | ## Type Support for `.vue` Imports in TS
8 |
9 | TypeScript cannot handle type information for `.vue` imports by default, so we replace the `tsc` CLI with `vue-tsc` for type checking. In editors, we need [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) to make the TypeScript language service aware of `.vue` types.
10 |
11 | ## Customize configuration
12 |
13 | See [Vite Configuration Reference](https://vite.dev/config/).
14 |
15 | ## Project Setup
16 |
17 | ```sh
18 | npm install
19 | ```
20 |
21 | ### Compile and Hot-Reload for Development
22 |
23 | ```sh
24 | npm run dev
25 | ```
26 |
27 | ### Type-Check, Compile and Minify for Production
28 |
29 | ```sh
30 | npm run build
31 | ```
32 |
--------------------------------------------------------------------------------
/leaderboard/vue/src/components/icons/IconTooling.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
19 |
20 |
--------------------------------------------------------------------------------
/spectrumlab/models/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from typing import Union, Any, Dict
3 |
4 |
5 | class BaseModel(ABC):
6 | is_api: bool = False
7 |
8 | def __init__(self, path: str, max_seq_len: int = 2048):
9 | self.path = path
10 | self.max_seq_len = max_seq_len
11 |
12 | @abstractmethod
13 | def generate(
14 | self, prompt: Union[str, Dict[str, Any]], max_out_len: int = 512
15 | ) -> str:
16 | """
17 | Generate response for a single prompt.
18 |
19 | Args:
20 | prompt: Input prompt, can be:
21 | - str: Simple text prompt
22 | - Dict: Multimodal prompt with format:
23 | {
24 | "text": "question text",
25 | "images": [{"type": "image_url", "image_url": {"url": "data:..."}}]
26 | }
27 | max_out_len: Maximum output length
28 |
29 | Returns:
30 | Generated response string
31 | """
32 | pass
33 |
--------------------------------------------------------------------------------
/leaderboard/vue/src/components/icons/IconCommunity.vue:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
--------------------------------------------------------------------------------
/spectrumlab/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .deepseek_api import DeepSeek
2 | from .gpt4o_api import GPT4o
3 | from .internvl_api import InternVL
4 | from .claude_api import (
5 | Claude_Sonnet_3_5,
6 | Claude_Opus_4,
7 | Claude_Haiku_3_5,
8 | Claude_Sonnet_4,
9 | )
10 | from .gpt4_v_api import GPT4_1, GPT4_Vision
11 | from .grok_api import Grok_2_Vision
12 | from .deepseek_vl import DeepSeek_VL2
13 | from .qwen_vl_api import Qwen_VL_Max, Qwen_2_5_VL_32B, Qwen_2_5_VL_72B
14 | from .llama_api import Llama_Vision_11B, Llama_Vision_90B
15 | from .doubao_api import Doubao_1_5_Vision_Pro, Doubao_1_5_Vision_Pro_Thinking
16 |
17 | __all__ = [
18 | "DeepSeek",
19 | "GPT4o",
20 | "InternVL",
21 | "Claude_Sonnet_3_5",
22 | "Claude_Opus_4",
23 | "Claude_Haiku_3_5",
24 | "Claude_Sonnet_4",
25 | "GPT4_1",
26 | "GPT4_Vision",
27 | "Grok_2_Vision",
28 | "Qwen_VL_Max",
29 | "DeepSeek_VL2",
30 | "Qwen_2_5_VL_32B",
31 | "Qwen_2_5_VL_72B",
32 | "Llama_Vision_11B",
33 | "Llama_Vision_90B",
34 | "Doubao_1_5_Vision_Pro",
35 | "Doubao_1_5_Vision_Pro_Thinking",
36 | ]
37 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "spectrumlab"
7 | version = "0.0.1"
8 | description = "A pioneering unified platform designed to systematize and accelerate deep learning research in spectroscopy."
9 | readme = "README.md"
10 | requires-python = ">=3.10"
11 | authors = [
12 | { name = "Zhuo Yang", email = "yzachary1551@gmail.com" },
13 | { name = "Tianfan Fu", email = "futianfan@gmail.com" },
14 | ]
15 | keywords = ["benchmark", "chemistry", "spectroscopy", "evaluation"]
16 |
17 | dependencies = ["dotenv>=0.9.9", "openai>=1.93.0"]
18 |
19 | [project.optional-dependencies]
20 | dev = ["pytest>=7.4.0", "black>=23.0.0", "ruff>=0.1.0", "pre-commit>=4.2.0"]
21 | test = ["pytest>=8.4.1", "pytest-asyncio>=1.1.0"]
22 |
23 | [project.scripts]
24 | spectrumlab = "spectrumlab.cli.main:main"
25 |
26 | [tool.hatch.build.targets.wheel]
27 | packages = ["spectrumlab"]
28 |
29 | [tool.hatch.metadata]
30 | allow-direct-references = true
31 |
32 | [tool.black]
33 | line-length = 120
34 | skip-string-normalization = true
35 |
36 | [tool.pytest.ini_options]
37 | testpaths = ["tests"]
38 | python_files = ["test_*.py"]
39 | python_classes = ["Test*"]
40 | python_functions = ["test_*"]
41 | addopts = "-v --tb=short"
42 |
--------------------------------------------------------------------------------
/leaderboard/vue/src/components/icons/IconDocumentation.vue:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
--------------------------------------------------------------------------------
/spectrumlab/models/base_api.py:
--------------------------------------------------------------------------------
1 | from abc import abstractmethod
2 | from typing import Dict, Any, Union
3 | from .base import BaseModel
4 |
5 |
6 | class BaseAPIModel(BaseModel):
7 | is_api: bool = True
8 |
9 | def __init__(self, model_name: str = "api_model", max_seq_len: int = 2048):
10 | """
11 | Initialize API model.
12 |
13 | Args:
14 | model_name: Name of the model
15 | max_seq_len: Maximum sequence length
16 | """
17 | super().__init__(path=model_name, max_seq_len=max_seq_len)
18 |
19 | @abstractmethod
20 | def generate(
21 | self, prompt: Union[str, Dict[str, Any]], max_out_len: int = 512
22 | ) -> str:
23 | """
24 | Generate response for a single prompt.
25 |
26 | Args:
27 | prompt: Input prompt, can be:
28 | - str: Simple text prompt
29 | - Dict: Multimodal prompt with format:
30 | {
31 | "text": "question text",
32 | "images": [{"type": "image_url", "image_url": {"url": "data:..."}}]
33 | }
34 | max_out_len: Maximum output length
35 |
36 | Returns:
37 | Generated response string
38 | """
39 | pass
40 |
--------------------------------------------------------------------------------
/docs/en/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: home
3 |
4 | hero:
5 | name: "SpectrumLab"
6 | text: ""
7 | tagline: "A pioneering unified platform designed to systematize and accelerate deep learning research in spectroscopy"
8 | actions:
9 | - theme: brand
10 | text: Quick Start
11 | link: /en/tutorial
12 | - theme: alt
13 | text: View Source Code
14 | link: https://github.com/little1d/SpectrumLab
15 |
16 | features:
17 | - title: 🔬 Multimodal evaluation
18 | details: It supports multimodal spectral data evaluation combining images and text, and is compatible with various deep learning models.
19 | - title: 🤖 Model Integration
20 | details: Integrates API interfaces of advanced models such as GPT-4o, Claude, DeepSeek, and Qwen-VL
21 | - title: 📊 Benchmark
22 | details: Provides standardized evaluation processes and metrics, supporting multiple types of spectroscopy tasks.
23 | - title: 🏆 Leaderboard
24 | details: A real-time updated model performance leaderboard that supports multi-dimensional comparative analysis.
25 | - title: 🚀 Command-line tool
26 | details: A concise command-line interface that supports batch evaluation and result management.
27 | - title: 🔧 Easy to extend
28 | details: Modular design that supports the rapid integration of custom evaluators and models.
29 | ---
--------------------------------------------------------------------------------
/spectrumlab/cli/api.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict, Optional, Any
2 | from ..benchmark import get_benchmark_group
3 | from ..evaluator import get_evaluator
4 |
5 |
6 | def run_evaluation(
7 | model,
8 | level: str,
9 | subcategories: Optional[List[str]] = None,
10 | data_path: str = "./data",
11 | save_path: str = "./results",
12 | max_out_len: int = 512,
13 | ) -> Dict[str, Any]:
14 | print("🚀 Starting evaluation")
15 | print(f"📊 Model: {model.__class__.__name__}")
16 | print(f"📁 Level: {level}")
17 | print(f"📂 Data path: {data_path}")
18 | print(f"💾 Save path: {save_path}")
19 |
20 | print("\n📥 Loading benchmark data...")
21 | benchmark = get_benchmark_group(level, data_path)
22 |
23 | if subcategories:
24 | data = benchmark.get_data_by_subcategories(subcategories)
25 | print(f"📋 Subcategories: {subcategories}")
26 | else:
27 | data = benchmark.get_data_by_subcategories("all")
28 | print("📋 Subcategories: all")
29 |
30 | print(f"📊 Total data items: {len(data)}")
31 |
32 | print("\n⚙️ Getting evaluator...")
33 | evaluator = get_evaluator(level)
34 |
35 | print("\n🔄 Running evaluation...")
36 | results = evaluator.evaluate(
37 | data_items=data,
38 | model=model,
39 | max_out_len=max_out_len,
40 | save_path=save_path,
41 | )
42 |
43 | return results
44 |
--------------------------------------------------------------------------------
/docs/zh/benchmark.md:
--------------------------------------------------------------------------------
1 | # 基准测试
2 |
3 | ## Benchmark 概述
4 |
5 | SpectrumLab 的 Benchmark 采用分层架构设计,从信号处理到高级语义理解,全面评估模型在光谱学任务上的能力。基准测试包含四个主要层级,每个层级包含多个子任务,适用于不同类型的谱图分析。
6 |
7 | ## Benchmark 详情
8 |
9 | ### 1. 信号层(Signal Level)
10 |
11 | 基础的谱图信号处理和分析,包括以下子任务:
12 |
13 | - **谱图类型分类(Spectrum Type Classification)**:识别不同类型的谱图(红外、核磁、拉曼等)。
14 | - **谱图质量评估(Spectrum Quality Assessment)**:识别谱图是否清晰、完整、以及是否存在明显噪声。
15 | - **基础特征提取(Basic Feature Extraction)**:识别谱图中的基线、峰、峰位、峰强等基本特征。
16 | - **杂质峰检测(Impurity Peak Detection)**:识别谱图中的杂质峰和异常信号。
17 |
18 | ### 2. 感知层(Perception Level)
19 |
20 | 进一步的谱图视觉理解和模式识别,涵盖:
21 |
22 | - **基本化学性质预测(Basic Property Prediction)**:基于谱图特征预测分子离子峰、溶解性、酸碱性等直接关联的性质。
23 | - **元素组成预测(Elemental Compositional Prediction)**:从质谱等中识别元素组成和同位素模式。
24 | - **官能团识别(Functional Group Recognition)**:根据谱图特征(特别是特征峰位)预测分子可能存在的官能团。
25 | - **谱峰归属(Peak Assignment)**:对谱图中的主要峰进行初步的化学归属。
26 |
27 | ### 3. 语义层(Semantic Level)
28 |
29 | 深层的谱图语义理解和化学知识推理,包括:
30 |
31 | - **多模态谱图融合(Fusing Spectroscopic Modalities)**:结合多种光谱或分子信息进行综合判断。
32 | - **分子结构解析(Molecular Structure Elucidation)**:根据光谱信息,从多个候选项中匹配正确的分子结构。
33 | - **多模态推理/问答(Multimodal Molecular Reasoning)**:基于光谱、文本信息,进行复杂的化学推理问答。
34 |
35 | ### 4. 生成层(Generation Level)
36 |
37 | 创造性地生成新化学信息,主要任务有:
38 |
39 | - **前向问题(Forward Problems)**:谱图、SMILES 或两者结合,推断分子结构。
40 | - **逆向问题(Inverse Problems)**:分子结构生成谱图、SMILES 等。
41 | - **无条件生成(De Novo Generation)**:根据特定目标(如特定性质的分子、特定靶点的配体)从头生成新颖、多样且合理的分子结构(SMILES、2D图)及/或预测的多模态信息(谱图、性质)。
42 |
--------------------------------------------------------------------------------
/leaderboard/batch_import_models.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Batch import models to leaderboard
4 | Usage: python batch_import_models.py models_data.json
5 | """
6 |
7 | import json
8 | import sys
9 | from pathlib import Path
10 |
11 | # Add the leaderboard directory to Python path
12 | sys.path.insert(0, str(Path(__file__).parent))
13 |
14 | from manage_leaderboard import LeaderboardManager
15 |
16 |
17 | def batch_import(models_file: str):
18 | """Batch import models from JSON file"""
19 |
20 | with open(models_file, "r", encoding="utf-8") as f:
21 | models_data = json.load(f)
22 |
23 | manager = LeaderboardManager()
24 |
25 | # Clear existing models if specified
26 | if models_data.get("clear_existing", False):
27 | print("🗑️ Clearing existing models...")
28 | manager.data["models"] = []
29 |
30 | # Import models
31 | imported_count = 0
32 | for model_data in models_data["models"]:
33 | model_info = model_data["model_info"]
34 | subcategory_scores = model_data["scores"]
35 |
36 | success = manager.add_model(model_info, subcategory_scores)
37 | if success:
38 | imported_count += 1
39 |
40 | print(
41 | f"\n✅ Successfully imported {imported_count}/{len(models_data['models'])} models"
42 | )
43 |
44 |
45 | if __name__ == "__main__":
46 | if len(sys.argv) != 2:
47 | print("Usage: python batch_import_models.py models_data.json")
48 | sys.exit(1)
49 |
50 | batch_import(sys.argv[1])
51 |
--------------------------------------------------------------------------------
/.github/workflows/docs-deploy.yml:
--------------------------------------------------------------------------------
1 | name: Deploy VitePress Docs Site to Pages
2 |
3 | on:
4 | push:
5 | branches: [main]
6 | paths:
7 | - 'docs/**'
8 | - '.github/workflows/docs-deploy.yml'
9 | workflow_dispatch:
10 |
11 | permissions:
12 | contents: read
13 | pages: write
14 | id-token: write
15 |
16 | concurrency:
17 | group: pages
18 | cancel-in-progress: false
19 |
20 | jobs:
21 | build:
22 | runs-on: ubuntu-latest
23 | steps:
24 | - name: Checkout
25 | uses: actions/checkout@v4
26 | with:
27 | fetch-depth: 0 # 如果未启用 lastUpdated,则不需要
28 |
29 | - name: Setup Node
30 | uses: actions/setup-node@v4
31 | with:
32 | node-version: 22
33 | cache: npm
34 | cache-dependency-path: docs/package-lock.json
35 |
36 | - name: Setup Pages
37 | uses: actions/configure-pages@v4
38 |
39 | - name: Install dependencies
40 | working-directory: docs
41 | run: npm ci
42 |
43 | - name: Build with VitePress
44 | working-directory: docs
45 | run: npm run docs:build
46 |
47 | - name: Upload artifact
48 | uses: actions/upload-pages-artifact@v3
49 | with:
50 | path: docs/.vitepress/dist
51 |
52 | deploy:
53 | environment:
54 | name: github-pages
55 | url: ${{ steps.deployment.outputs.page_url }}
56 | needs: build
57 | runs-on: ubuntu-latest
58 | steps:
59 | - name: Deploy to GitHub Pages
60 | id: deployment
61 | uses: actions/deploy-pages@v4
--------------------------------------------------------------------------------
/leaderboard/vue/src/App.vue:
--------------------------------------------------------------------------------
1 |
5 |
6 |
7 |
19 |
20 |
21 |
22 |
23 |
86 |
--------------------------------------------------------------------------------
/leaderboard/vue/src/components/WelcomeItem.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
88 |
--------------------------------------------------------------------------------
/tests/models/test_gpt_4_1.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import GPT4_1
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_gpt_4_1_text_generation():
8 | model = GPT4_1()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_gpt_4_1_multimodal_generation():
16 | model = GPT4_1()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_gpt_4_1_signalgroup_evaluation():
34 | model = GPT4_1()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_gpt_4_1_signalgroup_evaluation_parallel():
44 | model = GPT4_1()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
51 |
--------------------------------------------------------------------------------
/tests/models/test_claude_opus_4.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import Claude_Opus_4
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_claude_text_generation():
8 | model = Claude_Opus_4()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_claude_multimodal_generation():
16 | model = Claude_Opus_4()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_claude_signalgroup_evaluation():
34 | model = Claude_Opus_4()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_claude_signalgroup_evaluation_parallel():
44 | model = Claude_Opus_4()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
--------------------------------------------------------------------------------
/tests/models/test_claude_sonnet_4.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import Claude_Sonnet_4
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_claude_text_generation():
8 | model = Claude_Sonnet_4()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_claude_multimodal_generation():
16 | model = Claude_Sonnet_4()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_claude_signalgroup_evaluation():
34 | model = Claude_Sonnet_4()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_claude_signalgroup_evaluation_parallel():
44 | model = Claude_Sonnet_4()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
--------------------------------------------------------------------------------
/tests/models/test_qwen_vl_max.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import Qwen_VL_Max
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_qwen_vl_max_text_generation():
8 | model = Qwen_VL_Max()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_qwen_vl_max_multimodal_generation():
16 | model = Qwen_VL_Max()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_qwen_vl_max_signalgroup_evaluation():
34 | model = Qwen_VL_Max()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_qwen_vl_max_signalgroup_evaluation_parallel():
44 | model = Qwen_VL_Max()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
51 |
--------------------------------------------------------------------------------
/tests/models/test_gpt4o.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import GPT4o
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_gpt4o_text_generation():
8 | model = GPT4o()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_gpt4o_multimodal_generation():
16 | model = GPT4o()
17 | image_path = "/Users/little1d/Desktop/Code/SpectrumLab/playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/png;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_gpt4o_signalgroup_evaluation():
34 | model = GPT4o()
35 | signal_group = SignalGroup("data")
36 | # 只选一个子任务,避免测试太慢
37 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
38 | evaluator = ChoiceEvaluator()
39 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
40 | assert "metrics" in results
41 | assert "overall" in results["metrics"]
42 |
43 |
44 | def test_gpt4o_signalgroup_evaluation_parallel():
45 | model = GPT4o()
46 | signal_group = SignalGroup("data")
47 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
48 | evaluator = ChoiceEvaluator()
49 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
50 | assert "metrics" in results
51 | assert "overall" in results["metrics"]
52 |
--------------------------------------------------------------------------------
/tests/models/test_gpt_4_v.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import GPT4_Vision
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_gpt_4_vision_text_generation():
8 | model = GPT4_Vision()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_gpt_4_vision_multimodal_generation():
16 | model = GPT4_Vision()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_gpt_4_vision_signalgroup_evaluation():
34 | model = GPT4_Vision()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_gpt_4_vision_signalgroup_evaluation_parallel():
44 | model = GPT4_Vision()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
51 |
--------------------------------------------------------------------------------
/spectrumlab/models/deepseek_api.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | from .base_api import BaseAPIModel
3 | from spectrumlab.config import Config
4 | from openai import OpenAI
5 |
6 |
7 | class DeepSeek(BaseAPIModel):
8 | def __init__(
9 | self,
10 | api_key: Optional[str] = None,
11 | base_url: Optional[str] = None,
12 | model_name: Optional[str] = None,
13 | **kwargs,
14 | ):
15 | config = Config()
16 |
17 | # Use provided parameters or fall back to config
18 | self.api_key = api_key or config.deepseek_api_key
19 | self.base_url = base_url or config.deepseek_base_url
20 | self.model_name = model_name or config.deepseek_model_name
21 |
22 | # Validate that we have required configuration
23 | if not self.api_key:
24 | raise ValueError(
25 | "DeepSeek API key not found. Please set DEEPSEEK_API_KEY in your .env file "
26 | "or provide api_key parameter."
27 | )
28 |
29 | self.client = OpenAI(
30 | api_key=self.api_key,
31 | base_url=self.base_url,
32 | )
33 |
34 | # Initialize parent class
35 | super().__init__(model_name=self.model_name, **kwargs)
36 |
37 | def generate(self, prompt: str, max_tokens: int = 512, json_output=False) -> str:
38 | messages = []
39 |
40 | if json_output:
41 | messages.append({"role": "system", "content": "response in JSON format"})
42 |
43 | messages.append({"role": "user", "content": prompt})
44 |
45 | try:
46 | response = self.client.chat.completions.create(
47 | model=self.model_name,
48 | messages=messages,
49 | max_tokens=max_tokens,
50 | stream=False,
51 | )
52 | return response.choices[0].message.content
53 | except Exception as e:
54 | raise RuntimeError(f"DeepSeek API call failed: {e}")
55 |
--------------------------------------------------------------------------------
/tests/models/test_claude_haiku_3_5.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import Claude_Haiku_3_5
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_claude_text_generation():
8 | model = Claude_Haiku_3_5()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_claude_multimodal_generation():
16 | model = Claude_Haiku_3_5()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_claude_signalgroup_evaluation():
34 | model = Claude_Haiku_3_5()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_claude_signalgroup_evaluation_parallel():
44 | model = Claude_Haiku_3_5()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
51 |
--------------------------------------------------------------------------------
/tests/models/test_claude_sonnet_3_5.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import Claude_Sonnet_3_5
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_claude_text_generation():
8 | model = Claude_Sonnet_3_5()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_claude_multimodal_generation():
16 | model = Claude_Sonnet_3_5()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_claude_signalgroup_evaluation():
34 | model = Claude_Sonnet_3_5()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_claude_signalgroup_evaluation_parallel():
44 | model = Claude_Sonnet_3_5()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
--------------------------------------------------------------------------------
/tests/models/test_deepseek_vl_2.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import DeepSeek_VL2
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_deepseek_vl_2_text_generation():
8 | model = DeepSeek_VL2()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_deepseek_vl_2_multimodal_generation():
16 | model = DeepSeek_VL2()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_deepseek_vl_2_signalgroup_evaluation():
34 | model = DeepSeek_VL2()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_deepseek_vl_2_signalgroup_evaluation_parallel():
44 | model = DeepSeek_VL2()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
--------------------------------------------------------------------------------
/tests/models/test_internvl.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import InternVL
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_internvl_text_generation():
8 | model = InternVL()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_internvl_multimodal_generation():
16 | model = InternVL()
17 | image_path = "/Users/little1d/Desktop/Code/SpectrumLab/playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_internvl_signalgroup_evaluation():
34 | model = InternVL()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_internvl_signalgroup_evaluation_parallel():
44 | model = InternVL()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
51 |
--------------------------------------------------------------------------------
/leaderboard/vue/src/components/icons/IconEcosystem.vue:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
--------------------------------------------------------------------------------
/tests/models/test_grok_2_v.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import Grok_2_Vision
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_grok_2_vision_text_generation():
8 | model = Grok_2_Vision()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_grok_2_vision_multimodal_generation():
16 | model = Grok_2_Vision()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_grok_2_vision_signalgroup_evaluation():
34 | model = Grok_2_Vision()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_grok_2_vision_signalgroup_evaluation_parallel():
44 | model = Grok_2_Vision()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
51 |
--------------------------------------------------------------------------------
/tests/models/test_qwen_vl_2_5_32b.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import Qwen_2_5_VL_32B
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_qwen_2_5_vl_32b_text_generation():
8 | model = Qwen_2_5_VL_32B()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_qwen_2_5_vl_32b_multimodal_generation():
16 | model = Qwen_2_5_VL_32B()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_qwen_2_5_vl_32b_signalgroup_evaluation():
34 | model = Qwen_2_5_VL_32B()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_qwen_2_5_vl_32b_signalgroup_evaluation_parallel():
44 | model = Qwen_2_5_VL_32B()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
51 |
--------------------------------------------------------------------------------
/tests/models/test_qwen_vl_2_5_72b.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import Qwen_2_5_VL_72B
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_qwen_2_5_vl_72b_text_generation():
8 | model = Qwen_2_5_VL_72B()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_qwen_2_5_vl_72b_multimodal_generation():
16 | model = Qwen_2_5_VL_72B()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_qwen_2_5_vl_72b_signalgroup_evaluation():
34 | model = Qwen_2_5_VL_72B()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_qwen_2_5_vl_72b_signalgroup_evaluation_parallel():
44 | model = Qwen_2_5_VL_72B()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
51 |
--------------------------------------------------------------------------------
/tests/models/test_llama_3_2_vision_11b.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import Llama_Vision_11B
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_llama_vision_11b_text_generation():
8 | model = Llama_Vision_11B()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_llama_vision_11b_multimodal_generation():
16 | model = Llama_Vision_11B()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_llama_vision_11b_signalgroup_evaluation():
34 | model = Llama_Vision_11B()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_llama_vision_11b_signalgroup_evaluation_parallel():
44 | model = Llama_Vision_11B()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
51 |
--------------------------------------------------------------------------------
/tests/models/test_llama_3_2_vision_90b.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import Llama_Vision_90B
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_llama_vision_90b_text_generation():
8 | model = Llama_Vision_90B()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_llama_vision_90b_multimodal_generation():
16 | model = Llama_Vision_90B()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_llama_vision_90b_signalgroup_evaluation():
34 | model = Llama_Vision_90B()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_llama_vision_90b_signalgroup_evaluation_parallel():
44 | model = Llama_Vision_90B()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
51 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # 如何为本项目做出贡献
2 |
3 | ## 标准开发流程
4 |
5 | 1. 浏览 Github 上的 Issues ,查看你愿意添加的功能或修复的错误,以及它们是否被 Pull Request
6 | - 如果没有,请创建一个新 Issues,除非您的 PR 非常小,否则 PR 应该指向具体的 Issues,这样可以避免重复重做,同时提高代码审查效率。
7 | 2. 如果你是第一次为项目贡献代码,请转到仓库首页单击右上角的"Fork"按钮,这将创建你用于开发的仓库副本
8 |
9 | - 将 Fork 的项目克隆到你的计算机,并添加指向本项目的远程链接:
10 |
11 | ```bash
12 | git clone https://github.com//hello.git
13 | cd hello
14 | git remote add upstream https://github.com/hello.git
15 | ```
16 |
17 | 3. 开发你的贡献
18 |
19 | - 确保您的 Fork 与主存储库同步:
20 |
21 | ```bash
22 | git checkout main
23 | git pull upstream main
24 | ```
25 |
26 | - 创建一个 `git`分支,您将在其中开发您的贡献。为分支使用合理的名称,例如:
27 |
28 | ```bash
29 | git checkout -b /
30 | ```
31 |
32 | - 当你取得进展时,在本地提交你的改动,例如:
33 |
34 | ```bash
35 | git add changed-file.py test/test-changed-file.py
36 | git commit -m "feat(integreations): Add integration with the `awosome` library"
37 | ```
38 |
39 | 4. 发起贡献:
40 |
41 | - [Github Pull Request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests)
42 | - 当您的贡献准备就绪后,将您的分支推送到 Github:
43 |
44 | ```bash
45 | git push origin /
46 | ```
47 |
48 | - 分支上传后,`Github` 将打印一个 URL,用于将您的贡献作为拉取请求提交。在浏览器中打开该 URL,为您的拉取请求编写信息丰富的标题和详细描述,然后提交。
49 |
50 | - 请将相关 Issue(现有 Issue 或您创建的 Issue)链接到您的 PR。请参阅 PR 页面的右栏。或者,在 PR
51 | 描述中提及“修复问题链接” - GitHub 将自动进行链接。
52 |
53 | - 我们将审查您的贡献并提供反馈。要合并审阅者建议的更改,请将编辑提交到您的分支,然后再次推送到分支(无需重新创建拉取请求,它将自动跟踪对分支的修改),例如:
54 |
55 | ```bash
56 | git add tests/test-changed-file.py
57 | git commit -m "test(sdk): Add a test case to address reviewer feedback"
58 | git push origin /
59 | ```
60 |
61 | - 一旦您的拉取请求被审阅者批准,它将被合并到存储库的主分支中。
62 |
63 | ## 安装环境
64 |
65 | 打开您所使用的 python 环境,在根目录下执行以下命令
66 |
67 | ```bash
68 | pip install uv
69 |
70 | uv pip install -e .
71 | ```
72 |
--------------------------------------------------------------------------------
/docs/zh/tutorial.md:
--------------------------------------------------------------------------------
1 | # Tutorial
2 |
3 | 欢迎使用 SpectrumLab!本教程将帮助你快速了解谱学分析、SpectrumLab 平台以及如何使用它来评估大语言模型在光谱学任务上的表现。
4 |
5 | ## 什么是谱学?
6 |
7 | 谱学(Spectroscopy)是研究物质与电磁辐射相互作用的科学分支。通过分析物质吸收、发射或散射的光谱,我们可以获得关于物质结构、组成和性质的详细信息。
8 |
9 | ## 谱学的重要性
10 |
11 | 谱学在现代科学中具有重要地位,它通过分析物质与电磁辐射的相互作用,为理解物质的组成、结构和性质提供了关键手段。在化学中,谱学用于分子结构解析和反应机理研究;在材料科学中,它能表征纳米材料并进行表面分析;在生物学中,则用于研究蛋白质折叠和代谢物检测。同时,谱学在临床医学中也被广泛应用,如通过光谱技术实现无创诊断和疾病早期检测,使其成为现代科学研究和应用中不可或缺的工具。
12 |
13 | ## 常见谱学技术
14 |
15 | - **红外光谱(IR)**:分析分子振动,识别官能团。IR 谱特征吸收峰(如 C=O、O–H、C–H 等)在特征频率范围内具有标志性,是判断官能团的核心工具
16 | - **核磁共振(NMR)**:通过化学位移、信号强度和偶合常数提供分子中原子环境和结构连接信息,常用于确定分子结构(尤其有机化合物)
17 | - **紫外-可见光谱(UV-Vis)**:研究分子的电子跃迁和共轭体系,尤其用于判断电子结构、共轭长度和光学性质,不直接提供结构连接信息
18 | - **质谱(MS)**:测定分子量并通过碎片组合推断分子结构,是判断分子组成和次级结构的重要工具
19 | - **拉曼光谱(Raman)**:提供分子振动信息,能识别与 IR 类似的化学键振动,尤其对对称分子和无极性键敏感,经常作为 IR 的互补方法
20 | - **HSQC 谱图**:一种二维 NMR(^1H–^13C 或 ^1H–^15N)实验,每个交叉点代表一个直接键连接的质子–杂核对,可用于明确 ^1H–^13C(或 ^15N)一键归属,辅助峰归属和结构解析
21 |
22 | ## 什么是 SpectrumLab?
23 |
24 | ### 概述
25 |
26 | SpectrumLab 是一个开创性的统一平台和综合工具包,为加速和系统化化学光谱学领域的深度学习研究而设计。它旨在简化从数据预处理到模型评估的整个 AI 驱动的光谱学研究生命周期,为研究人员和开发者提供一个模块化、可扩展且易于使用的 Python 库和工具生态系统,以推动光谱学领域的人工智能研究和应用。
27 |
28 | ### 核心功能
29 |
30 | #### 模块化与可扩展架构
31 |
32 | SpectrumLab 采用灵活的模块化设计,其核心组件包括:
33 |
34 | - **基准测试组 (Benchmark Group)**:将 SpectrumBench 数据集进行分层组织,支持多种光谱模态和任务类型,并允许用户根据需求灵活组合,创建定制化的评测任务
35 | - **模型集成 (Model Integration)**:提供统一的框架和标准化的 API,可以无缝接入和评测各类外部模型,无论是商业闭源模型(如 GPT-4o)还是本地部署的开源模型
36 | - **评估器 (Evaluator)**:作为评估引擎的核心,支持根据不同任务(如选择题、生成题)定制评估指标和协议,确保评估的严谨性和任务适应性
37 |
38 | #### 全面的工具链生态系统
39 |
40 | 提供一个通过 PyPI 分发的 Python 库,集成了数据处理、模型开发、自动评估和可视化等核心模块,极大地简化了整个研究工作流程。
41 |
42 | #### 自动化基准生成 (SpectrumAnnotator)
43 |
44 | 紧密集成了创新的 SpectrumAnnotator 组件,该组件能利用先进多模态大模型的推理能力,从种子数据集自动生成高质量、多样化的基准测试数据,高效构建评测任务。
45 |
46 | #### 公开排行榜 (Leaderboards)
47 |
48 | 为确保透明度和可复现性,SpectrumLab 建立了一个公开的排行榜系统。该系统系统地追踪和比较各类模型在所有 14 项任务上的性能表现,促进公平竞争和领域的共同进步。
49 |
50 | ## 相关链接
51 |
52 | - [API 参考](/zh/api) - 了解详细的接口说明和代码示例
53 | - [基准测试](/zh/benchmark) - 查看评估指标和数据集详情
54 | - [排行榜](https://huggingface.co/spaces/SpectrumWorld/SpectrumLeaderboard) - 查看模型性能对比
55 |
--------------------------------------------------------------------------------
/tests/models/test_doubao_1_5_vision_pro.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import Doubao_1_5_Vision_Pro
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_doubao_1_5_vision_pro_text_generation():
8 | model = Doubao_1_5_Vision_Pro()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_doubao_1_5_vision_pro_multimodal_generation():
16 | model = Doubao_1_5_Vision_Pro()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_doubao_1_5_vision_pro_signalgroup_evaluation():
34 | model = Doubao_1_5_Vision_Pro()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_doubao_1_5_vision_pro_signalgroup_evaluation_parallel():
44 | model = Doubao_1_5_Vision_Pro()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
51 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | /** 文件夹材质 */
3 | "workbench.iconTheme": "material-icon-theme",
4 | /* 自定义图标关联 */
5 | "material-icon-theme.folders.associations": {
6 | "leaderboard": "Secure",
7 | "evaluator": "Plastic",
8 | "spectrumlab": "Api",
9 | },
10 | "material-icon-theme.files.associations": {},
11 | "editor.codeActionsOnSave": {
12 | "source.fixAll.eslint": "explicit"
13 | },
14 | "editor.formatOnSave": true,
15 | "eslint.format.enable": true,
16 | "[python]": {
17 | "editor.defaultFormatter": "ms-python.black-formatter",
18 | "editor.formatOnSave": true
19 | },
20 | /** TODO tree 配置 */
21 | "todo-tree.general.tags": [
22 | "TODO", // 待办
23 | "FIXME", // 待修复
24 | "COMPAT", // 兼容性问题
25 | "WARNING" // 警告
26 | ],
27 | "todo-tree.highlights.customHighlight": {
28 | "TODO": {
29 | "icon": "check",
30 | "type": "tag",
31 | "foreground": "#ffff00",
32 | "iconColour": "#ffff"
33 | },
34 | "WARNING": {
35 | "icon": "alert",
36 | "type": "tag",
37 | "foreground": "#ff0000",
38 | "iconColour": "#ff0000"
39 | },
40 | "FIXME": {
41 | "icon": "flame",
42 | "type": "tag",
43 | "foreground": "#ff0000",
44 | "iconColour": "#ff0000"
45 | },
46 | "COMPAT": {
47 | "icon": "flame",
48 | "type": "tag",
49 | "foreground": "#00ff00",
50 | "iconColour": "#ffff"
51 | }
52 | },
53 | /** python代码注释 */
54 | "autoDocstring.docstringFormat": "numpy",
55 | /** markdown格式检查 */
56 | "markdownlint.config": {
57 | // 允许使用html标签
58 | "MD033": false,
59 | // 允许首行不是level1标题
60 | "MD041": false
61 | },
62 | /** 不显示文件夹 */
63 | "files.exclude": {
64 | "**/.git": true,
65 | "**/.DS_Store": true,
66 | "**/__pycache__": true,
67 | ".idea": true
68 | },
69 | "python.testing.pytestEnabled": true
70 | }
--------------------------------------------------------------------------------
/tests/models/test_doubao_1_5_vision_pro_thinking.py:
--------------------------------------------------------------------------------
1 | from spectrumlab.models import Doubao_1_5_Vision_Pro_Thinking
2 | from spectrumlab.utils.image_utils import encode_image_to_base64
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
5 |
6 |
7 | def test_doubao_1_5_vision_pro_thinking_text_generation():
8 | model = Doubao_1_5_Vision_Pro_Thinking()
9 | prompt = "What is spectroscopy?"
10 | response = model.generate(prompt)
11 | assert isinstance(response, str)
12 | assert len(response) > 0
13 |
14 |
15 | def test_doubao_1_5_vision_pro_thinking_multimodal_generation():
16 | model = Doubao_1_5_Vision_Pro_Thinking()
17 | image_path = "playground/models/test.jpg"
18 | image_base64 = encode_image_to_base64(image_path)
19 | prompt = {
20 | "text": "Please explain this spectroscopy image.",
21 | "images": [
22 | {
23 | "type": "image_url",
24 | "image_url": {"url": f"data:image/jpg;base64,{image_base64}"},
25 | }
26 | ],
27 | }
28 | response = model.generate(prompt)
29 | assert isinstance(response, str)
30 | assert len(response) > 0
31 |
32 |
33 | def test_doubao_1_5_vision_pro_thinking_signalgroup_evaluation():
34 | model = Doubao_1_5_Vision_Pro_Thinking()
35 | signal_group = SignalGroup("data")
36 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
37 | evaluator = ChoiceEvaluator()
38 | results = evaluator.evaluate(data_items=data, model=model, save_path=None)
39 | assert "metrics" in results
40 | assert "overall" in results["metrics"]
41 |
42 |
43 | def test_doubao_1_5_vision_pro_thinking_signalgroup_evaluation_parallel():
44 | model = Doubao_1_5_Vision_Pro_Thinking()
45 | signal_group = SignalGroup("data")
46 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
47 | evaluator = ChoiceEvaluator()
48 | results = evaluator.evaluate_many(data_items=data, model=model, save_path=None)
49 | assert "metrics" in results
50 | assert "overall" in results["metrics"]
51 |
--------------------------------------------------------------------------------
/docs/.vitepress/config.mts:
--------------------------------------------------------------------------------
1 | import { defineConfig } from 'vitepress'
2 | import { en } from './en'
3 | import { zh } from './zh'
4 |
5 | // https://vitepress.dev/reference/site-config
6 | export default defineConfig({
7 | base: '/SpectrumLab/',
8 |
9 | rewrites: {
10 | 'zh/:rest*': ':rest*'
11 | },
12 |
13 | cleanUrls: true,
14 |
15 | // 全局共享配置
16 | themeConfig: {
17 | socialLinks: [
18 | { icon: 'github', link: 'https://github.com/little1d/spectrumlab' }
19 | ],
20 |
21 | lastUpdated: {
22 | text: "最后更新于",
23 | formatOptions: {
24 | dateStyle: 'full',
25 | timeStyle: 'medium',
26 | },
27 | },
28 |
29 | search: {
30 | provider: 'local',
31 | options: {
32 | locales: {
33 | root: {
34 | translations: {
35 | button: {
36 | buttonText: '搜索文档',
37 | buttonAriaLabel: '搜索文档',
38 | },
39 | modal: {
40 | noResultsText: '无法找到相关结果',
41 | resetButtonTitle: '清除查询条件',
42 | footer: {
43 | selectText: '选择',
44 | navigateText: '切换',
45 | closeText: '关闭',
46 | }
47 | }
48 | }
49 | },
50 | en: {
51 | translations: {
52 | button: {
53 | buttonText: 'Search',
54 | buttonAriaLabel: 'Search',
55 | },
56 | modal: {
57 | noResultsText: 'No results found',
58 | resetButtonTitle: 'Clear search criteria',
59 | footer: {
60 | selectText: 'to select',
61 | navigateText: 'to navigate',
62 | closeText: 'to close',
63 | }
64 | }
65 | }
66 | }
67 | }
68 | }
69 | },
70 | },
71 |
72 | markdown: {
73 | image: {
74 | lazyLoading: true,
75 | }
76 | },
77 |
78 | // 国际化配置
79 | locales: {
80 | root: { label: '简体中文', lang: 'zh-CN', ...zh },
81 | en: { label: 'English', lang: 'en-US', ...en },
82 | },
83 | })
84 |
--------------------------------------------------------------------------------
/docs/.vitepress/en.ts:
--------------------------------------------------------------------------------
1 | import { defineConfig } from 'vitepress'
2 |
3 | // https://vitepress.dev/reference/site-config
4 | export const en = defineConfig({
5 | lang: 'en-US',
6 | title: "SpectrumLab",
7 | description: "A pioneering unified platform designed to systematize and accelerate deep learning research in spectroscopy",
8 | head: [
9 | ['link', { rel: 'icon', type: 'image/svg+xml', href: '/logo.svg' }]
10 | ],
11 | themeConfig: {
12 | siteTitle: "SpectrumLab",
13 | logo: {
14 | src: '/logo.svg',
15 | },
16 | nav: [
17 | { text: 'Tutorial', link: '/en/tutorial' },
18 | { text: 'API', link: '/en/api' },
19 | { text: 'Benchmark', link: '/en/benchmark' },
20 | { text: 'Leaderboard', link: 'https://huggingface.co/spaces/SpectrumWorld/SpectrumLeaderboard' },
21 | ],
22 | sidebar: {
23 | '/en/': [
24 | {
25 | text: 'Getting Started',
26 | items: [
27 | { text: 'Introduction', link: '/en/' },
28 | { text: 'Tutorial', link: '/en/tutorial' },
29 | ]
30 | },
31 | {
32 | text: 'Documentation',
33 | items: [
34 | { text: 'API Reference', link: '/en/api' },
35 | { text: 'Benchmark', link: '/en/benchmark' },
36 | { text: 'Leaderboard', link: 'https://huggingface.co/spaces/SpectrumWorld/SpectrumLeaderboard' },
37 | ]
38 | }
39 | ]
40 | },
41 | footer: {
42 | message: 'Released under the MIT License',
43 | copyright: 'Copyright © 2025 SpectrumLab'
44 | },
45 | docFooter: {
46 | prev: 'Previous page',
47 | next: 'Next page'
48 | },
49 |
50 | outline: {
51 | label: 'On this page'
52 | },
53 |
54 | lastUpdated: {
55 | text: 'Last updated'
56 | },
57 |
58 | darkModeSwitchLabel: 'Appearance',
59 | lightModeSwitchTitle: 'Switch to light theme',
60 | darkModeSwitchTitle: 'Switch to dark theme',
61 | }
62 | })
--------------------------------------------------------------------------------
/leaderboard/vue/src/assets/base.css:
--------------------------------------------------------------------------------
1 | /* color palette from */
2 | :root {
3 | --vt-c-white: #ffffff;
4 | --vt-c-white-soft: #f8f8f8;
5 | --vt-c-white-mute: #f2f2f2;
6 |
7 | --vt-c-black: #181818;
8 | --vt-c-black-soft: #222222;
9 | --vt-c-black-mute: #282828;
10 |
11 | --vt-c-indigo: #2c3e50;
12 |
13 | --vt-c-divider-light-1: rgba(60, 60, 60, 0.29);
14 | --vt-c-divider-light-2: rgba(60, 60, 60, 0.12);
15 | --vt-c-divider-dark-1: rgba(84, 84, 84, 0.65);
16 | --vt-c-divider-dark-2: rgba(84, 84, 84, 0.48);
17 |
18 | --vt-c-text-light-1: var(--vt-c-indigo);
19 | --vt-c-text-light-2: rgba(60, 60, 60, 0.66);
20 | --vt-c-text-dark-1: var(--vt-c-white);
21 | --vt-c-text-dark-2: rgba(235, 235, 235, 0.64);
22 | }
23 |
24 | /* semantic color variables for this project */
25 | :root {
26 | --color-background: var(--vt-c-white);
27 | --color-background-soft: var(--vt-c-white-soft);
28 | --color-background-mute: var(--vt-c-white-mute);
29 |
30 | --color-border: var(--vt-c-divider-light-2);
31 | --color-border-hover: var(--vt-c-divider-light-1);
32 |
33 | --color-heading: var(--vt-c-text-light-1);
34 | --color-text: var(--vt-c-text-light-1);
35 |
36 | --section-gap: 160px;
37 | }
38 |
39 | @media (prefers-color-scheme: dark) {
40 | :root {
41 | --color-background: var(--vt-c-black);
42 | --color-background-soft: var(--vt-c-black-soft);
43 | --color-background-mute: var(--vt-c-black-mute);
44 |
45 | --color-border: var(--vt-c-divider-dark-2);
46 | --color-border-hover: var(--vt-c-divider-dark-1);
47 |
48 | --color-heading: var(--vt-c-text-dark-1);
49 | --color-text: var(--vt-c-text-dark-2);
50 | }
51 | }
52 |
53 | *,
54 | *::before,
55 | *::after {
56 | box-sizing: border-box;
57 | margin: 0;
58 | font-weight: normal;
59 | }
60 |
61 | body {
62 | min-height: 100vh;
63 | color: var(--color-text);
64 | background: var(--color-background);
65 | transition:
66 | color 0.5s,
67 | background-color 0.5s;
68 | line-height: 1.6;
69 | font-family:
70 | Inter,
71 | -apple-system,
72 | BlinkMacSystemFont,
73 | 'Segoe UI',
74 | Roboto,
75 | Oxygen,
76 | Ubuntu,
77 | Cantarell,
78 | 'Fira Sans',
79 | 'Droid Sans',
80 | 'Helvetica Neue',
81 | sans-serif;
82 | font-size: 15px;
83 | text-rendering: optimizeLegibility;
84 | -webkit-font-smoothing: antialiased;
85 | -moz-osx-font-smoothing: grayscale;
86 | }
87 |
--------------------------------------------------------------------------------
/run_evaluation.py:
--------------------------------------------------------------------------------
1 | # import swanlab
2 | from spectrumlab.models import GPT4_1
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.benchmark.generation_group import GenerationGroup
5 | from spectrumlab.benchmark.perception_group import PerceptionGroup
6 | from spectrumlab.benchmark.semantic_group import SemanticGroup
7 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
8 | from spectrumlab.evaluator.open_evaluator import OpenEvaluator
9 |
10 | # export your swanlab api-key
11 |
12 | # Change your model!
13 | # MODEL = GPT4o()
14 | MODEL = GPT4_1()
15 |
16 |
17 | # Change this!!! such as gpt-4o_evaluation_results
18 | SAVE_DIR = "./gpt4_1_generation_evaluation_results"
19 |
20 | # 定义每个 Group 及其子任务和评测器
21 | GROUPS = [
22 | {
23 | "name": "Signal",
24 | "group": SignalGroup("data"),
25 | "evaluator": ChoiceEvaluator(),
26 | "subcategories": None, # None 表示全部
27 | },
28 | {
29 | "name": "Perception",
30 | "group": PerceptionGroup("data"),
31 | "evaluator": ChoiceEvaluator(),
32 | "subcategories": None,
33 | },
34 | {
35 | "name": "Semantic",
36 | "group": SemanticGroup("data"),
37 | "evaluator": ChoiceEvaluator(),
38 | "subcategories": None,
39 | },
40 | {
41 | "name": "Generation",
42 | "group": GenerationGroup("data"),
43 | "evaluator": OpenEvaluator(),
44 | "subcategories": None,
45 | },
46 | ]
47 |
48 | # Change the experiment_name to your model name!!!
49 | # swanlab.init(
50 | # workspace="SpectrumLab",
51 | # project="spectrumlab-eval",
52 | # experiment_name="gpt_4_1_generation_evaluation_results",
53 | # config={"model": MODEL.model_name},
54 | # )
55 |
56 | for group_info in GROUPS:
57 | name = group_info["name"]
58 | group = group_info["group"]
59 | evaluator = group_info["evaluator"]
60 | subcategories = group_info["subcategories"]
61 | print(f"\n===== Evaluating {name} Group =====")
62 | data = group.get_data_by_subcategories(subcategories or "all")
63 | results = evaluator.evaluate(data_items=data, model=MODEL, save_path=SAVE_DIR)
64 | accuracy = results["metrics"]["overall"]["accuracy"]
65 | print(f"{name} Group evaluation completed! Overall accuracy: {accuracy:.2f}%\n")
66 | # swanlab.log({f"{name}_accuracy": accuracy})
67 |
68 | # swanlab.finish()
69 |
70 | # use nohup in the terminal to start the evaluation
71 | # nohup python run_evaluation.py > run_eval.log 2>&1 &
72 |
--------------------------------------------------------------------------------
/docs/.vitepress/zh.ts:
--------------------------------------------------------------------------------
1 | import { defineConfig } from 'vitepress'
2 |
3 | // https://vitepress.dev/reference/site-config
4 | export const zh = defineConfig({
5 | lang: 'zh-CN',
6 | title: "SpectrumLab",
7 | description: "A pioneering unified platform designed to systematize and accelerate deep learning research in spectroscopy.",
8 |
9 | head: [
10 | ['link', { rel: 'icon', type: 'image/svg+xml', href: '/logo.svg' }]
11 | // ['link', { rel: 'preconnect', href: 'https://fonts.googleapis.com' }],
12 | // [
13 | // 'link',
14 | // { rel: 'preconnect', href: 'https://fonts.gstatic.com', crossorigin: '' }
15 | // ],
16 | // [
17 | // 'link',
18 | // { href: 'https://fonts.googleapis.com/css2?family=Roboto&display=swap', rel: 'stylesheet' }
19 | // ]
20 | ],
21 |
22 | themeConfig: {
23 | siteTitle: "SpectrumLab",
24 | logo: {
25 | src: '/logo.svg',
26 | },
27 | nav: [
28 | { text: '教程', link: '/tutorial' },
29 | { text: 'API', link: '/api' },
30 | { text: '基准测试', link: '/benchmark' },
31 | { text: '排行榜', link: 'https://huggingface.co/spaces/SpectrumWorld/SpectrumLeaderboard' },
32 | ],
33 |
34 | sidebar: {
35 | '/': [
36 | {
37 | text: '开始使用',
38 | items: [
39 | { text: '介绍', link: '/' },
40 | { text: '教程', link: '/tutorial' },
41 | ]
42 | },
43 | {
44 | text: '文档',
45 | items: [
46 | { text: 'API 参考', link: '/api' },
47 | { text: '基准测试', link: '/benchmark' },
48 | { text: '排行榜', link: 'https://huggingface.co/spaces/SpectrumWorld/SpectrumLeaderboard' },
49 | ]
50 | }
51 | ]
52 | },
53 | footer: {
54 | message: '基于 MIT 许可发布',
55 | copyright: 'Copyright © 2025 SpectrumLab'
56 | },
57 | docFooter: {
58 | prev: '上一页',
59 | next: '下一页'
60 | },
61 | outline: {
62 | label: '页面导航'
63 | },
64 | lastUpdated: {
65 | text: '最后更新于'
66 | },
67 | darkModeSwitchLabel: '主题',
68 | lightModeSwitchTitle: '切换到浅色模式',
69 | darkModeSwitchTitle: '切换到深色模式',
70 | }
71 | })
72 |
--------------------------------------------------------------------------------
/spectrumlab/utils/image_utils.py:
--------------------------------------------------------------------------------
1 | import base64
2 | from pathlib import Path
3 | from typing import List, Dict, Optional, Any, Union
4 |
5 |
6 | def encode_image_to_base64(image_path: str) -> str:
7 | try:
8 | with open(image_path, "rb") as image_file:
9 | return base64.b64encode(image_file.read()).decode("utf-8")
10 | except Exception as e:
11 | raise ValueError(f"Failed to encode image to base64: {e}")
12 |
13 |
14 | def get_image_mime_type(image_path: str) -> str:
15 | path = Path(image_path)
16 | extension = path.suffix.lower()
17 |
18 | mime_type = {
19 | ".png": "image/png",
20 | ".jpg": "image/jpeg",
21 | ".jpeg": "image/jpeg",
22 | ".gif": "image/gif",
23 | ".bmp": "image/bmp",
24 | ".webp": "image/webp",
25 | }
26 |
27 | return mime_type.get(extension, "image/jpeg")
28 |
29 |
30 | def prepare_images_for_prompt(
31 | image_paths: Union[str, List[str], None],
32 | ) -> List[Dict[str, Any]]:
33 | if not image_paths:
34 | return []
35 |
36 | # Ensure it's a list format
37 | if isinstance(image_paths, str):
38 | image_paths = [image_paths]
39 |
40 | image_data = []
41 | for image_path in image_paths:
42 | if not image_path or not image_path.strip():
43 | continue
44 |
45 | path = Path(image_path)
46 | if not path.exists():
47 | print(f"⚠️ Warning: Image file not found: {image_path}")
48 | continue
49 |
50 | try:
51 | base64_image = encode_image_to_base64(image_path)
52 | mime_type = get_image_mime_type(image_path)
53 |
54 | image_info = {
55 | "type": "image_url",
56 | "image_url": {"url": f"data:{mime_type};base64,{base64_image}"},
57 | }
58 | image_data.append(image_info)
59 |
60 | except Exception as e:
61 | print(f"⚠️ Warning: Failed to process image {image_path}: {e}")
62 | continue
63 |
64 | return image_data
65 |
66 |
67 | def normalize_image_paths(image_paths_field: Any) -> Optional[List[str]]:
68 | if not image_paths_field:
69 | return None
70 | if isinstance(image_paths_field, str):
71 | if image_paths_field.strip() == "":
72 | return None
73 | return [image_paths_field.strip()]
74 | if isinstance(image_paths_field, list):
75 | # 递归处理每个元素,保证都是字符串
76 | paths = []
77 | for p in image_paths_field:
78 | if isinstance(p, str) and p.strip():
79 | paths.append(p.strip())
80 | return paths if paths else None
81 | return None
82 |
--------------------------------------------------------------------------------
/spectrumlab/models/deepseek_vl.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Union, Dict, Any
2 | from .base_api import BaseAPIModel
3 | from spectrumlab.config import Config
4 | from openai import OpenAI
5 |
6 |
7 | class DeepSeek_VL2(BaseAPIModel):
8 | def __init__(
9 | self,
10 | api_key: Optional[str] = None,
11 | base_url: Optional[str] = None,
12 | model_name: Optional[str] = None,
13 | **kwargs,
14 | ):
15 | config = Config()
16 |
17 | # Use provided parameters or fall back to config
18 | self.api_key = api_key or config.deepseek_vl_2_api_key
19 | self.base_url = base_url or config.deepseek_vl_2_base_url
20 | self.model_name = model_name or config.deepseek_vl_2_model_name
21 |
22 | # Validate that we have required configuration
23 | if not self.api_key:
24 | raise ValueError(
25 | "InternVL API key not found. Please set INTERNVL_API_KEY in your .env file "
26 | "or provide api_key parameter."
27 | )
28 |
29 | self.client = OpenAI(
30 | api_key=self.api_key,
31 | base_url=self.base_url,
32 | )
33 |
34 | # Initialize parent class
35 | super().__init__(model_name=self.model_name, **kwargs)
36 |
37 | def generate(
38 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512
39 | ) -> str:
40 | """
41 | Generate response supporting both text and multimodal input.
42 |
43 | Args:
44 | prompt: Either text string or multimodal dict
45 | max_tokens: Maximum tokens to generate
46 |
47 | Returns:
48 | Generated response string
49 | """
50 |
51 | # Link: https://internlm.intern-ai.org.cn/api/document
52 | messages = []
53 |
54 | if isinstance(prompt, dict) and "images" in prompt:
55 | content = []
56 |
57 | content.append({"type": "text", "text": prompt["text"]})
58 |
59 | for image_data in prompt["images"]:
60 | content.append(image_data)
61 |
62 | messages.append({"role": "user", "content": content})
63 | else:
64 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
65 | messages.append({"role": "user", "content": text_content})
66 |
67 | try:
68 | response = self.client.chat.completions.create(
69 | model=self.model_name,
70 | messages=messages,
71 | max_tokens=max_tokens,
72 | )
73 | return response.choices[0].message.content
74 | except Exception as e:
75 | raise RuntimeError(f"InternVL API call failed: {e}")
76 |
--------------------------------------------------------------------------------
/spectrumlab/models/gpt4o_api.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Any, Optional, Union
2 | from .base_api import BaseAPIModel
3 | from spectrumlab.config import Config
4 | from openai import OpenAI
5 |
6 |
7 | class GPT4o(BaseAPIModel):
8 | def __init__(
9 | self,
10 | api_key: Optional[str] = None,
11 | base_url: Optional[str] = None,
12 | model_name: Optional[str] = None,
13 | **kwargs,
14 | ):
15 | config = Config()
16 |
17 | # Use provided parameters or fall back to config
18 | self.api_key = api_key or config.gpt4o_api_key
19 | self.base_url = base_url or config.gpt4o_base_url
20 | self.model_name = model_name or config.gpt4o_model_name
21 |
22 | # Validate that we have required configuration
23 | if not self.api_key:
24 | raise ValueError(
25 | "GPT-4o API key not found. Please set GPT4O_API_KEY in your .env file "
26 | "or provide api_key parameter."
27 | )
28 |
29 | self.client = OpenAI(
30 | api_key=self.api_key,
31 | base_url=self.base_url,
32 | )
33 |
34 | # Initialize parent class
35 | super().__init__(model_name=self.model_name, **kwargs)
36 |
37 | def generate(
38 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512
39 | ) -> str:
40 | """
41 | Generate response supporting both text and multimodal input.
42 |
43 | Args:
44 | prompt: Either text string or multimodal dict
45 | max_tokens: Maximum tokens to generate
46 |
47 | Returns:
48 | Generated response string
49 | """
50 | messages = []
51 |
52 | # Handle multimodal vs text-only prompts
53 | if isinstance(prompt, dict) and "images" in prompt:
54 | # Multimodal prompt
55 | content = []
56 |
57 | content.append({"type": "text", "text": prompt["text"]})
58 |
59 | for image_data in prompt["images"]:
60 | content.append(image_data)
61 |
62 | messages.append({"role": "user", "content": content})
63 | else:
64 | # Text-only prompt
65 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
66 | messages.append({"role": "user", "content": text_content})
67 |
68 | try:
69 | response = self.client.chat.completions.create(
70 | model=self.model_name,
71 | messages=messages,
72 | max_tokens=max_tokens,
73 | )
74 | return response.choices[0].message.content
75 | except Exception as e:
76 | raise RuntimeError(f"GPT-4o API call failed: {e}")
77 |
--------------------------------------------------------------------------------
/spectrumlab/models/grok_api.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Any, Optional, Union
2 | from .base_api import BaseAPIModel
3 | from spectrumlab.config import Config
4 | from openai import OpenAI
5 |
6 |
7 | class Grok_2_Vision(BaseAPIModel):
8 | def __init__(
9 | self,
10 | api_key: Optional[str] = None,
11 | base_url: Optional[str] = None,
12 | model_name: Optional[str] = None,
13 | **kwargs,
14 | ):
15 | config = Config()
16 |
17 | # Use provided parameters or fall back to config
18 | self.api_key = api_key or config.grok_2_vision_api_key
19 | self.base_url = base_url or config.grok_2_vision_base_url
20 | self.model_name = model_name or config.grok_2_vision_model_name
21 |
22 | # Validate that we have required configuration
23 | if not self.api_key:
24 | raise ValueError(
25 | "Grok-2-Vision API key not found. Please set GROK_2_VISION_API_KEY in your .env file "
26 | "or provide api_key parameter."
27 | )
28 |
29 | self.client = OpenAI(
30 | api_key=self.api_key,
31 | base_url=self.base_url,
32 | )
33 |
34 | # Initialize parent class
35 | super().__init__(model_name=self.model_name, **kwargs)
36 |
37 | def generate(
38 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512
39 | ) -> str:
40 | """
41 | Generate response supporting both text and multimodal input.
42 |
43 | Args:
44 | prompt: Either text string or multimodal dict
45 | max_tokens: Maximum tokens to generate
46 |
47 | Returns:
48 | Generated response string
49 | """
50 | messages = []
51 |
52 | # Handle multimodal vs text-only prompts
53 | if isinstance(prompt, dict) and "images" in prompt:
54 | # Multimodal prompt
55 | content = []
56 |
57 | content.append({"type": "text", "text": prompt["text"]})
58 |
59 | for image_data in prompt["images"]:
60 | content.append(image_data)
61 |
62 | messages.append({"role": "user", "content": content})
63 | else:
64 | # Text-only prompt
65 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
66 | messages.append({"role": "user", "content": text_content})
67 |
68 | try:
69 | response = self.client.chat.completions.create(
70 | model=self.model_name,
71 | messages=messages,
72 | max_tokens=max_tokens,
73 | )
74 | return response.choices[0].message.content
75 | except Exception as e:
76 | raise RuntimeError(f"Grok-2-Vision API call failed: {e}")
77 |
--------------------------------------------------------------------------------
/docs/en/benchmark.md:
--------------------------------------------------------------------------------
1 | # Benchmark
2 | ## Benchmark Overview
3 |
4 | The Benchmark of SpectrumLab adopts a hierarchical architecture design, comprehensively evaluating the model's capabilities in spectroscopy tasks from signal processing to advanced semantic understanding. The benchmark test consists of four main levels, with each level containing multiple sub - tasks, suitable for different types of spectral analysis.
5 |
6 | ## Benchmark Details
7 |
8 | ### 1. Signal layer(Signal Level)
9 |
10 | Basic spectral signal processing and analysis, including the following subtasks:
11 |
12 | - **Spectrum Type Classification**:Identify different types of spectra (infrared, nuclear magnetic resonance, Raman, etc.).
13 | - **Spectrum Quality Assessment**:Identify whether the spectrogram is clear, complete, and whether there is obvious noise.
14 | - **Basic Feature Extraction**:Identify basic features such as baselines, peaks, peak positions, and peak intensities in the spectrogram.
15 | - **Impurity Peak Detection**:Identify impurity peaks and abnormal signals in the spectrogram.
16 |
17 | ### 2. Perception Level
18 |
19 | Further spectral visual understanding and pattern recognition, covering:
20 |
21 | - **Basic Property Prediction**:Predict properties directly related to molecular ion peaks, solubility, acidity and alkalinity based on spectral graph features.
22 | - **Elemental Compositional Prediction**:Identify elemental composition and isotope patterns from mass spectrometry, etc.
23 | - **Functional Group Recognition**:Predict the possible functional groups of a molecule based on spectral characteristics (especially characteristic peak positions).
24 | - **Peak Assignment**:Preliminarily assign the main peaks in the spectrum to corresponding chemical groups.
25 |
26 | ### 3. Semantic Level
27 |
28 | Deep spectral semantic understanding and chemical knowledge reasoning, including:
29 |
30 | - **Fusing Spectroscopic Modalitie)**:Make comprehensive judgments by combining multiple spectral or molecular information.
31 | - **Molecular Structure Elucidation**:Match the correct molecular structure from multiple candidates based on spectral information.
32 | - **Multimodal Molecular Reasoning**:Conduct complex chemical reasoning and answering based on spectral and textual information.
33 |
34 | ### 4. Generation Level
35 |
36 | Generate new chemical information creatively. The main tasks are:
37 |
38 | - **Forward Problems**:Infer the molecular structure from spectra, SMILES, or a combination of both.
39 | - **Inverse Problems**:Generate spectra, SMILES, etc. for molecular structures.
40 | - **De Novo Generation**:Generate novel, diverse, and reasonable molecular structures (SMILES, 2D diagrams) and/or predicted multimodal information (spectra, properties) from scratch according to specific targets, such as molecules with specific properties or ligands for specific targets.
41 |
--------------------------------------------------------------------------------
/spectrumlab/models/internvl_api.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Union, Dict, Any
2 | from .base_api import BaseAPIModel
3 | from spectrumlab.config import Config
4 | from openai import OpenAI
5 |
6 |
7 | class InternVL(BaseAPIModel):
8 | def __init__(
9 | self,
10 | api_key: Optional[str] = None,
11 | base_url: Optional[str] = None,
12 | model_name: Optional[str] = None,
13 | **kwargs,
14 | ):
15 | config = Config()
16 |
17 | # Use provided parameters or fall back to config
18 | self.api_key = api_key or config.internvl_api_key
19 | self.base_url = base_url or config.internvl_base_url
20 | self.model_name = model_name or config.internvl_model_name
21 |
22 | # Validate that we have required configuration
23 | if not self.api_key:
24 | raise ValueError(
25 | "InternVL API key not found. Please set INTERNVL_API_KEY in your .env file "
26 | "or provide api_key parameter."
27 | )
28 |
29 | # Ensure base_url has proper protocol for OpenRouter/API services
30 | if self.base_url and not self.base_url.startswith(("http://", "https://")):
31 | self.base_url = f"https://{self.base_url}"
32 |
33 | self.client = OpenAI(
34 | api_key=self.api_key,
35 | base_url=self.base_url,
36 | )
37 |
38 | # Initialize parent class
39 | super().__init__(model_name=self.model_name, **kwargs)
40 |
41 | def generate(
42 | self,
43 | prompt: Union[str, Dict[str, Any]],
44 | max_tokens: int = 512,
45 | **generation_kwargs,
46 | ) -> str:
47 | """
48 | Generate response supporting both text and multimodal input.
49 |
50 | Args:
51 | prompt: Either text string or multimodal dict
52 | max_tokens: Maximum tokens to generate
53 | **generation_kwargs: Additional generation parameters like temperature, top_p, etc.
54 |
55 | Returns:
56 | Generated response string
57 | """
58 |
59 | # Link: https://internlm.intern-ai.org.cn/api/document
60 | messages = []
61 |
62 | if isinstance(prompt, dict) and "images" in prompt:
63 | content = []
64 |
65 | content.append({"type": "text", "text": prompt["text"]})
66 |
67 | for image_data in prompt["images"]:
68 | content.append(image_data)
69 |
70 | messages.append({"role": "user", "content": content})
71 | else:
72 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
73 | messages.append({"role": "user", "content": text_content})
74 |
75 | # Prepare API call parameters
76 | api_params = {
77 | "model": self.model_name,
78 | "messages": messages,
79 | "max_tokens": max_tokens,
80 | }
81 |
82 | # Add any additional generation parameters
83 | api_params.update(generation_kwargs)
84 |
85 | try:
86 | response = self.client.chat.completions.create(**api_params)
87 | return response.choices[0].message.content
88 | except Exception as e:
89 | raise RuntimeError(f"InternVL API call failed: {e}")
90 |
--------------------------------------------------------------------------------
/spectrumlab/models/README.md:
--------------------------------------------------------------------------------
1 | # Model Integration & Testing Pipeline
2 |
3 | This guide explains how to quickly adapt and test a new multimodal model in SpectrumLab.
4 |
5 | ## 1. Environment Configuration (`.env`)
6 |
7 | Add your model's API keys and endpoints to the `.env` file at the project root. Example:
8 |
9 | ```
10 | DEEPSEEK_API_KEY=your_deepseek_key
11 | DEEPSEEK_BASE_URL=https://api.deepseek.com
12 | DEEPSEEK_MODEL_NAME=deepseek-model
13 |
14 | GPT4O_API_KEY=your_gpt4o_key
15 | GPT4O_BASE_URL=https://api.gpt4o.com
16 | GPT4O_MODEL_NAME=gpt-4o
17 |
18 | INTERNVL_API_KEY=your_internvl_key
19 | INTERNVL_BASE_URL=https://api.internvl.com
20 | INTERNVL_MODEL_NAME=internvl-model
21 | ```
22 |
23 | ## 2. Config Class (`@config`)
24 |
25 | Ensure your model's config is added to `spectrumlab/config/base_config.py`:
26 |
27 | ```python
28 | @dataclass
29 | class Config:
30 | ...
31 | yourmodel_api_key: str = os.getenv("YOURMODEL_API_KEY")
32 | yourmodel_base_url: str = os.getenv("YOURMODEL_BASE_URL")
33 | yourmodel_model_name: str = os.getenv("YOURMODEL_MODEL_NAME")
34 | ```
35 |
36 | ## 3. Model Registration
37 |
38 | Implement your model in `spectrumlab/models/yourmodel_api.py` (inherit from `BaseAPIModel` or `BaseModel`).
39 |
40 | Register it in `spectrumlab/models/__init__.py`:
41 |
42 | ```python
43 | from .yourmodel_api import YourModel
44 | __all__ = [ ..., "YourModel" ]
45 | ```
46 |
47 | ## 4. Add Test File
48 |
49 | Create a test file in `tests/models/test_yourmodel.py`. Example:
50 |
51 | ```python
52 | import pytest
53 | from spectrumlab.models import YourModel
54 | from spectrumlab.utils.image_utils import encode_image_to_base64
55 | from spectrumlab.benchmark.signal_group import SignalGroup
56 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
57 |
58 | def test_yourmodel_text_generation():
59 | model = YourModel()
60 | response = model.generate("What is spectroscopy?")
61 | assert isinstance(response, str)
62 | assert len(response) > 0
63 |
64 | def test_yourmodel_multimodal_generation():
65 | model = YourModel()
66 | image_path = "playground/models/test.png"
67 | image_base64 = encode_image_to_base64(image_path)
68 | prompt = {
69 | "text": "Please explain this spectroscopy image.",
70 | "images": [
71 | {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}
72 | ],
73 | }
74 | response = model.generate(prompt)
75 | assert isinstance(response, str)
76 | assert len(response) > 0
77 |
78 | def test_yourmodel_signalgroup_evaluation():
79 | model = YourModel()
80 | signal_group = SignalGroup("data")
81 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
82 | evaluator = ChoiceEvaluator()
83 | results = evaluator.evaluate(data_items=data, model=model)
84 | assert "metrics" in results
85 | assert "overall" in results["metrics"]
86 | ```
87 |
88 | ## 5. Run Tests
89 |
90 | From the project root, run:
91 |
92 | ```
93 | pytest -s -v tests/models/test_yourmodel.py
94 | ```
95 |
96 | Or run all model tests:
97 |
98 | ```
99 | pytest -s -v tests/models/
100 | ```
101 |
102 | ---
103 |
104 | **Tip:**
105 |
106 | - Each model has its own test file for easy debugging and extension.
107 | - Add new models by following steps 1-4 above.
108 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |

5 |
6 |
A pioneering unified platform designed to systematize and accelerate deep learning research in spectroscopy.
7 |
8 |
9 | ## 🚀 Quick Start
10 |
11 | ### Environment Setup
12 |
13 | We recommend using conda and uv for environment management:
14 |
15 | ```bash
16 | # Clone the repository
17 | git clone https://github.com/little1d/SpectrumLab.git
18 | cd SpectrumLab
19 |
20 | # Create conda environment
21 | conda create -n spectrumlab python=3.10
22 | conda activate spectrumlab
23 |
24 | pip install uv
25 | uv pip install -e .
26 | ```
27 |
28 | ### Data Setup
29 |
30 | Download benchmark data from Hugging Face:
31 |
32 | - [SpectrumBench v1.0](https://huggingface.co/SpectrumWorld/spectrumbench_v_1.0/tree/main)
33 |
34 | Extract the data to the `data` directory in the project root.
35 |
36 | ### API Keys Configuration
37 |
38 | ```bash
39 | # Copy and edit environment configuration
40 | cp .env.example .env
41 | # Configure your API keys in the .env file
42 | ```
43 |
44 | ## 💻 Usage
45 |
46 | ### Python API
47 |
48 | ```python
49 | from spectrumlab.benchmark import get_benchmark_group
50 | from spectrumlab.models import GPT4o
51 | from spectrumlab.evaluator import get_evaluator
52 |
53 | # Load benchmark data
54 | benchmark = get_benchmark_group("perception")
55 | data = benchmark.get_data_by_subcategories("all")
56 |
57 | # Initialize model
58 | model = GPT4o()
59 |
60 | # Get evaluator
61 | evaluator = get_evaluator("perception")
62 |
63 | # Run evaluation
64 | results = evaluator.evaluate(
65 | data_items=data,
66 | model=model,
67 | save_path="./results"
68 | )
69 |
70 | print(f"Overall accuracy: {results['metrics']['overall']['accuracy']:.2f}%")
71 | ```
72 |
73 | ### Command Line Interface
74 |
75 | The CLI provides a simple way to run evaluations:
76 |
77 | ```bash
78 | # Basic evaluation
79 | spectrumlab eval --model gpt4o --level perception
80 |
81 | # Specify data path and output directory
82 | spectrumlab eval --model claude --level signal --data-path ./data --output ./my_results
83 |
84 | # Evaluate specific subcategories
85 | spectrumlab eval --model deepseek --level semantic --subcategories "IR_spectroscopy" "Raman_spectroscopy"
86 |
87 | # Customize output length
88 | spectrumlab eval --model internvl --level generation --max-length 1024
89 |
90 | # Get help
91 | spectrumlab eval --help
92 | ```
93 |
94 | ## 🤝 Contributing
95 |
96 | We welcome community contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines.
97 |
98 | ## Citation
99 |
100 | If you use SpectrumLab in your research, please cite our paper:
101 |
102 | ```bibtex
103 | @article{yang2025spectrumworldartificialintelligencefoundation,
104 | title={SpectrumWorld: Artificial Intelligence Foundation for Spectroscopy},
105 | author={Zhuo Yang and Jiaqing Xie and Shuaike Shen and Daolang Wang and Yeyun Chen and Ben Gao and Shuzhou Sun and Biqing Qi and Dongzhan Zhou and Lei Bai and Linjiang Chen and Shufei Zhang and Qinying Gu and Jun Jiang and Tianfan Fu and Yuqiang Li},
106 | year={2025},
107 | eprint={2508.01188},
108 | archivePrefix={arXiv},
109 | primaryClass={cs.LG},
110 | url={https://arxiv.org/abs/2508.01188},
111 | }
112 | ```
113 |
114 | ## Acknowledgments
115 |
116 | - **Experiment Tracking**: [SwanLab](https://github.com/SwanHubX/SwanLab/) for experiment management and visualization
117 | - **Choice Evaluator Framework**: Inspired by [MMAR](https://github.com/ddlBoJack/MMAR)
118 |
--------------------------------------------------------------------------------
/leaderboard/vue/src/components/TheWelcome.vue:
--------------------------------------------------------------------------------
1 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | Documentation
18 |
19 | Vue’s
20 | official documentation
21 | provides you with all information you need to get started.
22 |
23 |
24 |
25 |
26 |
27 |
28 | Tooling
29 |
30 | This project is served and bundled with
31 | Vite. The
32 | recommended IDE setup is
33 | VSCode
34 | +
35 | Vue - Official. If
36 | you need to test your components and web pages, check out
37 | Vitest
38 | and
39 | Cypress
40 | /
41 | Playwright.
42 |
43 |
44 |
45 | More instructions are available in
46 | README.md.
48 |
49 |
50 |
51 |
52 |
53 |
54 | Ecosystem
55 |
56 | Get official tools and libraries for your project:
57 | Pinia,
58 | Vue Router,
59 | Vue Test Utils, and
60 | Vue Dev Tools. If
61 | you need more resources, we suggest paying
62 | Awesome Vue
63 | a visit.
64 |
65 |
66 |
67 |
68 |
69 |
70 | Community
71 |
72 | Got stuck? Ask your question on
73 | Vue Land
74 | (our official Discord server), or
75 | StackOverflow. You should also follow the official
78 | @vuejs.org
79 | Bluesky account or the
80 | @vuejs
81 | X account for latest news in the Vue world.
82 |
83 |
84 |
85 |
86 |
87 |
88 | Support Vue
89 |
90 | As an independent project, Vue relies on community backing for its sustainability. You can help
91 | us by
92 | becoming a sponsor.
93 |
94 |
95 |
--------------------------------------------------------------------------------
/spectrumlab/evaluator/choice_evaluator.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import List, Dict
3 | from .base import BaseEvaluator
4 | from spectrumlab.utils.image_utils import (
5 | prepare_images_for_prompt,
6 | normalize_image_paths,
7 | )
8 |
9 |
10 | class ChoiceEvaluator(BaseEvaluator):
11 | def __init__(self, prediction_key: str = "model_prediction"):
12 | super().__init__(prediction_key)
13 |
14 | def _build_prompt(self, item: Dict) -> str:
15 | question = item.get("question", "")
16 | choices = item.get("choices", [])
17 | image_paths_field = item.get("image_path")
18 |
19 | option_lines = [f"{chr(65 + i)}. {choice}" for i, choice in enumerate(choices)]
20 | options_block = "\n".join(option_lines)
21 |
22 | text_parts = [
23 | f"Question: {question}",
24 | "",
25 | "Available options:",
26 | options_block,
27 | "",
28 | "Please analyze the question and options carefully. Your answer must be exactly one of the provided options, and must be copied verbatim from the options above.",
29 | "Return your answer using the format \\answer{...}, where the content inside the braces is exactly the text of your chosen option (not the option letter or number, and do not use \\box{} or any other wrapper).",
30 | "For example, if you choose the option '~1700 cm⁻¹', you should return: \\answer{~1700 cm⁻¹}",
31 | "Do not return just a value like '~1700 cm' or any partial/incomplete answer. The answer must match one of the options exactly.",
32 | "",
33 | "Your response:",
34 | ]
35 |
36 | text_content = "\n".join(text_parts)
37 |
38 | # Check if there are images
39 | image_paths = normalize_image_paths(image_paths_field)
40 |
41 | if image_paths:
42 | assert all(
43 | isinstance(p, str) for p in image_paths
44 | ), f"image_paths should be List[str], got {image_paths}"
45 | # Prepare image data
46 | image_data = prepare_images_for_prompt(image_paths)
47 |
48 | if image_data:
49 | # Return multimodal format
50 | return {"text": text_content, "images": image_data}
51 |
52 | # Return pure text format
53 | return text_content
54 |
55 | def _extract_prediction(self, response: str, item: Dict) -> str:
56 | """只提取 \\answer{...} 内的内容"""
57 | if not response:
58 | return ""
59 | answer_pattern = r"\\answer\{([^}]+)\}"
60 | matches = re.findall(answer_pattern, response)
61 | if matches:
62 | return matches[-1].strip()
63 | return ""
64 |
65 | def _calculate_accuracy(self, answer: str, prediction: str, item: Dict) -> bool:
66 | """Calculate accuracy using string matching from MMAR."""
67 | choices = item.get("choices", [])
68 | return self._string_match(answer, prediction, choices)
69 |
70 | def _string_match(self, answer: str, prediction: str, choices: List[str]) -> bool:
71 | # Adapted from: MMAR
72 | # Source: https://github.com/ddlBoJack/MMAR/blob/main/code/evaluation.py#L8
73 |
74 | def tokenize(text):
75 | return set(re.findall(r"\b\w+\b", text.lower()))
76 |
77 | prediction_tokens = tokenize(prediction)
78 | answer_tokens = tokenize(answer)
79 |
80 | if not prediction_tokens:
81 | return False
82 |
83 | # Get tokens from incorrect choices
84 | incorrect_tokens = set()
85 | for choice in choices:
86 | choice_tokens = tokenize(choice)
87 | if choice_tokens != answer_tokens:
88 | incorrect_tokens.update(choice_tokens - answer_tokens)
89 |
90 | # Two conditions for correct match
91 | cond1 = answer_tokens.issubset(
92 | prediction_tokens
93 | ) # All answer tokens in prediction
94 | cond2 = prediction_tokens.isdisjoint(
95 | incorrect_tokens
96 | ) # No incorrect choice tokens
97 |
98 | return cond1 and cond2
99 |
--------------------------------------------------------------------------------
/spectrumlab/config/base_config.py:
--------------------------------------------------------------------------------
1 | import os
2 | from dataclasses import dataclass
3 | from dotenv import load_dotenv
4 | from pathlib import Path
5 |
6 | # Load .env from project root directory
7 | project_root = Path(__file__).parent.parent.parent
8 | env_path = project_root / ".env"
9 | load_dotenv(env_path)
10 |
11 |
12 | @dataclass
13 | class Config:
14 | # DeepSeek API Configuration
15 | deepseek_api_key: str = os.getenv("DEEPSEEK_API_KEY")
16 | deepseek_base_url: str = os.getenv("DEEPSEEK_BASE_URL")
17 | deepseek_model_name: str = os.getenv("DEEPSEEK_MODEL_NAME")
18 |
19 | # GPT-4o API Configuration
20 | gpt4o_api_key: str = os.getenv("GPT4O_API_KEY")
21 | gpt4o_base_url: str = os.getenv("GPT4O_BASE_URL")
22 | gpt4o_model_name: str = os.getenv("GPT4O_MODEL_NAME")
23 |
24 | # InternVL API Configuration
25 | internvl_api_key: str = os.getenv("INTERNVL_API_KEY")
26 | internvl_base_url: str = os.getenv("INTERNVL_BASE_URL")
27 | internvl_model_name: str = os.getenv("INTERNVL_MODEL_NAME")
28 |
29 | # Claude API Configuration
30 | claude_api_key: str = os.getenv("CLAUDE_API_KEY")
31 | claude_base_url: str = os.getenv("CLAUDE_BASE_URL")
32 | claude_sonnet_3_5_model_name: str = os.getenv("CLAUDE_SONNET_3_5")
33 | claude_opus_4_model_name: str = os.getenv("CLAUDE_OPUS_4")
34 | claude_haiku_3_5_model_name: str = os.getenv("CLAUDE_HAIKU_3_5")
35 | claude_sonnet_4_model_name: str = os.getenv("CLAUDE_SONNET_4")
36 |
37 | # GPT-4.1, GPT-4-Vision
38 | gpt4_1_api_key: str = os.getenv("GPT4_1_API_KEY")
39 | gpt4_1_base_url: str = os.getenv("GPT4_1_BASE_URL")
40 | gpt4_1_model_name: str = os.getenv("GPT4_1")
41 | gpt4_vision_api_key: str = os.getenv("GPT4_VISION_API_KEY")
42 | gpt4_vision_base_url: str = os.getenv("GPT4_VISION_BASE_URL")
43 | gpt4_vision_model_name: str = os.getenv("GPT4_VISION")
44 |
45 | # Grok-2-Vision
46 | grok_2_vision_api_key: str = os.getenv("GROK_2_VISION_API_KEY")
47 | grok_2_vision_base_url: str = os.getenv("GROK_2_VISION_BASE_URL")
48 | grok_2_vision_model_name: str = os.getenv("GROK_2_VISION")
49 |
50 | # Qwen-VL-Max
51 | qwen_vl_api_key: str = os.getenv("QWEN_VL_API_KEY")
52 | qwen_vl_base_url: str = os.getenv("QWEN_VL_BASE_URL")
53 | qwen_vl_model_name: str = os.getenv("QWEN_VL_MAX")
54 |
55 | # DeepSeek-VL-2
56 | deepseek_vl_2_api_key: str = os.getenv("DEEPSEEK_VL_2_API_KEY")
57 | deepseek_vl_2_base_url: str = os.getenv("DEEPSEEK_VL_2_BASE_URL")
58 | deepseek_vl_2_model_name: str = os.getenv("DEEPSEEK_VL_2")
59 |
60 | # Qwen-2.5-VL-32B
61 | qwen_2_5_vl_32b_api_key: str = os.getenv("QWEN_VL_API_KEY")
62 | qwen_2_5_vl_32b_base_url: str = os.getenv("QWEN_VL_BASE_URL")
63 | qwen_2_5_vl_32b_model_name: str = os.getenv("QWEN_2_5_VL_32B")
64 |
65 | # Qwen-2.5-VL-72B
66 | qwen_2_5_vl_72b_api_key: str = os.getenv("QWEN_VL_API_KEY")
67 | qwen_2_5_vl_72b_base_url: str = os.getenv("QWEN_VL_BASE_URL")
68 | qwen_2_5_vl_72b_model_name: str = os.getenv("QWEN_2_5_VL_72B")
69 |
70 | # Llama-Vision-11B
71 | llama_vision_11b_api_key: str = os.getenv("LLAMA_VISION_API_KEY")
72 | llama_vision_11b_base_url: str = os.getenv("LLAMA_VISION_BASE_URL")
73 | llama_vision_11b_model_name: str = os.getenv("LLAMA_VISION_11B")
74 |
75 | # Llama-Vision-90B
76 | llama_vision_90b_api_key: str = os.getenv("LLAMA_VISION_API_KEY")
77 | llama_vision_90b_base_url: str = os.getenv("LLAMA_VISION_BASE_URL")
78 | llama_vision_90b_model_name: str = os.getenv("LLAMA_VISION_90B")
79 |
80 | # Doubao-1.5-Vision-Pro
81 | doubao_1_5_vision_pro_api_key: str = os.getenv("DOUBAO_1_5_VISION_PRO_API_KEY")
82 | doubao_1_5_vision_pro_base_url: str = os.getenv("DOUBAO_1_5_VISION_PRO_BASE_URL")
83 | doubao_1_5_vision_pro_model_name: str = os.getenv("DOUBAO_1_5_VISION_PRO")
84 |
85 | # Doubao-1.5-Vision-Pro-Thinking
86 | doubao_1_5_vision_pro_thinking_api_key: str = os.getenv(
87 | "DOUBAO_1_5_VISION_PRO_THINKING_API_KEY"
88 | )
89 | doubao_1_5_vision_pro_thinking_base_url: str = os.getenv(
90 | "DOUBAO_1_5_VISION_PRO_THINKING_BASE_URL"
91 | )
92 | doubao_1_5_vision_pro_thinking_model_name: str = os.getenv(
93 | "DOUBAO_1_5_VISION_PRO_THINKING"
94 | )
95 |
--------------------------------------------------------------------------------
/run_ablation_experiments.py:
--------------------------------------------------------------------------------
1 | import swanlab
2 | from spectrumlab.models import Qwen_2_5_VL_32B
3 | from spectrumlab.benchmark.signal_group import SignalGroup
4 | from spectrumlab.benchmark.generation_group import GenerationGroup
5 | from spectrumlab.benchmark.perception_group import PerceptionGroup
6 | from spectrumlab.benchmark.semantic_group import SemanticGroup
7 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
8 | from spectrumlab.evaluator.open_evaluator import OpenEvaluator
9 |
10 | # ABLATION_CONFIGS = [
11 | # {
12 | # "model_class": Qwen_2_5_VL_72B,
13 | # "model_name": "Qwen-2.5-VL-72B",
14 | # "temperature": 0,
15 | # "top_p": 1.0,
16 | # "save_dir": "./ablation_qwen_2_5_vl_72b_temp_0_evaluation_results",
17 | # },
18 | # ]
19 |
20 | ABLATION_CONFIGS = [
21 | {
22 | "model_class": Qwen_2_5_VL_32B,
23 | "model_name": "Qwen-2.5-VL-32B",
24 | "temperature": 1,
25 | "top_p": 1,
26 | "save_dir": "./ablation_qwen_2_5_vl_32b_baselines_evaluation_results",
27 | },
28 | ]
29 |
30 | # 定义每个 Group 及其子任务和评测器 - 先测试Signal组
31 | GROUPS = [
32 | {
33 | "name": "Signal",
34 | "group": SignalGroup("data"),
35 | "evaluator": ChoiceEvaluator(),
36 | "subcategories": None, # None 表示全部
37 | },
38 | {
39 | "name": "Perception",
40 | "group": PerceptionGroup("data"),
41 | "evaluator": ChoiceEvaluator(),
42 | "subcategories": None,
43 | },
44 | {
45 | "name": "Semantic",
46 | "group": SemanticGroup("data"),
47 | "evaluator": ChoiceEvaluator(),
48 | "subcategories": None,
49 | },
50 | {
51 | "name": "Generation",
52 | "group": GenerationGroup("data"),
53 | "evaluator": OpenEvaluator(),
54 | "subcategories": None,
55 | },
56 | ]
57 |
58 | for config in ABLATION_CONFIGS:
59 | print(f"\n{'='*60}")
60 | print(
61 | f"开始消融实验: {config['model_name']} (temperature={config['temperature']}, top_p={config['top_p']})"
62 | )
63 | print(f"{'='*60}")
64 |
65 | model = config["model_class"]()
66 |
67 | # 初始化 SwanLab
68 | swanlab.init(
69 | workspace="SpectrumLab",
70 | project="spectrumlab-ablation",
71 | experiment_name=f"{config['model_name']}_temp_{config['temperature']}_top_p_{config['top_p']}",
72 | config=config,
73 | )
74 |
75 | # 遍历每个评测组
76 | for group_info in GROUPS:
77 | name = group_info["name"]
78 | group = group_info["group"]
79 | evaluator = group_info["evaluator"]
80 | subcategories = group_info["subcategories"]
81 | print(f"\n===== Evaluating {name} Group =====")
82 | data = group.get_data_by_subcategories(subcategories or "all")
83 |
84 | class ModelWithSamplingParams:
85 | def __init__(self, base_model, temperature, top_p):
86 | self.base_model = base_model
87 | self.temperature = temperature
88 | self.top_p = top_p
89 | self.model_name = base_model.model_name
90 |
91 | def generate(self, prompt, max_tokens=512):
92 | return self.base_model.generate(
93 | prompt,
94 | max_tokens=max_tokens,
95 | temperature=self.temperature,
96 | top_p=self.top_p,
97 | )
98 |
99 | wrapped_model = ModelWithSamplingParams(
100 | model, config["temperature"], config["top_p"]
101 | )
102 |
103 | results = evaluator.evaluate(
104 | data_items=data, model=wrapped_model, save_path=config["save_dir"]
105 | )
106 | accuracy = results["metrics"]["overall"]["accuracy"]
107 | print(f"{name} Group evaluation completed! Overall accuracy: {accuracy:.2f}%\n")
108 | swanlab.log({f"{name}_accuracy": accuracy})
109 |
110 | swanlab.finish()
111 | print(f"\n消融实验 {config['model_name']} 完成!")
112 | print(f"结果保存在: {config['save_dir']}")
113 |
114 | # use nohup in the terminal to start the evaluation
115 | # nohup python run_ablation_experiments.py > run_ablation.log 2>&1 &
116 |
--------------------------------------------------------------------------------
/spectrumlab/cli/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | from typing import Optional, List
4 |
5 | from .api import run_evaluation
6 | from spectrumlab.models import (
7 | GPT4o,
8 | Claude_Sonnet_3_5,
9 | DeepSeek_VL2,
10 | InternVL,
11 | Qwen_2_5_VL_32B,
12 | )
13 |
14 | AVAILABLE_MODELS = {
15 | "gpt4o": GPT4o,
16 | "claude": Claude_Sonnet_3_5,
17 | "deepseek": DeepSeek_VL2,
18 | "internvl": InternVL,
19 | "qwen-vl": Qwen_2_5_VL_32B,
20 | }
21 |
22 |
23 | def main(argv: Optional[List[str]] = None) -> int:
24 | parser = argparse.ArgumentParser(
25 | prog="spectrumlab",
26 | description="A pioneering unified platform designed to systematize and accelerate deep learning research in spectroscopy",
27 | )
28 |
29 | parser.add_argument("--version", action="version", version="%(prog)s 0.1.0")
30 |
31 | subparsers = parser.add_subparsers(dest="command", help="Available commands")
32 |
33 | eval_parser = subparsers.add_parser("eval", help="Run model evaluation")
34 |
35 | eval_parser.add_argument(
36 | "--model",
37 | "-m",
38 | required=True,
39 | choices=list(AVAILABLE_MODELS.keys()),
40 | help=f"Model name, options: {', '.join(AVAILABLE_MODELS.keys())}",
41 | )
42 |
43 | eval_parser.add_argument(
44 | "--level",
45 | "-l",
46 | required=True,
47 | choices=["signal", "perception", "semantic", "generation"],
48 | help="Evaluation level",
49 | )
50 |
51 | eval_parser.add_argument(
52 | "--subcategories",
53 | "-s",
54 | nargs="*",
55 | help="Specify subcategories (optional, default: all)",
56 | )
57 |
58 | eval_parser.add_argument(
59 | "--data-path", "-d", default="./data", help="Data path (default: ./data)"
60 | )
61 |
62 | eval_parser.add_argument(
63 | "--output", "-o", default="./results", help="Output path (default: ./results)"
64 | )
65 |
66 | eval_parser.add_argument(
67 | "--max-length", type=int, default=512, help="Max output length (default: 512)"
68 | )
69 |
70 | args = parser.parse_args(argv)
71 |
72 | if args.command == "eval":
73 | try:
74 | # Initialize the model
75 | if args.model not in AVAILABLE_MODELS:
76 | available = ", ".join(AVAILABLE_MODELS.keys())
77 | raise ValueError(
78 | f"Unsupported model: {args.model}. Available: {available}"
79 | )
80 |
81 | model_class = AVAILABLE_MODELS[args.model]
82 | model_instance = model_class()
83 |
84 | results = run_evaluation(
85 | model=model_instance,
86 | level=args.level,
87 | subcategories=args.subcategories,
88 | data_path=args.data_path,
89 | save_path=args.output,
90 | max_out_len=args.max_length,
91 | )
92 |
93 | print("\n" + "=" * 50)
94 | print("📊 Evaluation Results")
95 | print("=" * 50)
96 |
97 | if "error" in results:
98 | print(f"❌ Evaluation failed: {results['error']}")
99 | return 1
100 |
101 | metrics = results.get("metrics", {})
102 | overall = metrics.get("overall", {})
103 |
104 | print("✅ Evaluation completed!")
105 | print(f"📈 Overall accuracy: {overall.get('accuracy', 0):.2f}%")
106 | print(f"✅ Correct answers: {overall.get('correct', 0)}")
107 | print(f"📝 Total questions: {overall.get('total', 0)}")
108 |
109 | subcategory_metrics = metrics.get("subcategory_metrics", {})
110 | if subcategory_metrics:
111 | print("\n📋 Subcategory details:")
112 | for subcategory, sub_metrics in subcategory_metrics.items():
113 | acc = sub_metrics.get("accuracy", 0)
114 | correct = sub_metrics.get("correct", 0)
115 | total = sub_metrics.get("total", 0)
116 | print(f" {subcategory}: {acc:.2f}% ({correct}/{total})")
117 |
118 | print(f"\n💾 Results saved to: {args.output}")
119 | return 0
120 |
121 | except Exception as e:
122 | print(f"❌ Evaluation failed: {e}")
123 | return 1
124 |
125 | elif args.command is None:
126 | parser.print_help()
127 | return 0
128 | else:
129 | print(f"❌ Unknown command: {args.command}")
130 | parser.print_help()
131 | return 1
132 |
133 |
134 | if __name__ == "__main__":
135 | sys.exit(main())
136 |
--------------------------------------------------------------------------------
/spectrumlab/models/llama_api.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Union, Dict, Any
2 | from .base_api import BaseAPIModel
3 | from spectrumlab.config import Config
4 | from openai import OpenAI
5 |
6 |
7 | class Llama_Vision_11B(BaseAPIModel):
8 | def __init__(
9 | self,
10 | api_key: Optional[str] = None,
11 | base_url: Optional[str] = None,
12 | model_name: Optional[str] = None,
13 | **kwargs,
14 | ):
15 | config = Config()
16 |
17 | # Use provided parameters or fall back to config
18 | self.api_key = api_key or config.llama_vision_11b_api_key
19 | self.base_url = base_url or config.llama_vision_11b_base_url
20 | self.model_name = model_name or config.llama_vision_11b_model_name
21 |
22 | # Validate that we have required configuration
23 | if not self.api_key:
24 | raise ValueError(
25 | "Llama-Vision-11B API key not found. Please set LLAMA_VISION_11B_API_KEY in your .env file "
26 | "or provide api_key parameter."
27 | )
28 |
29 | self.client = OpenAI(
30 | api_key=self.api_key,
31 | base_url=self.base_url,
32 | )
33 |
34 | # Initialize parent class
35 | super().__init__(model_name=self.model_name, **kwargs)
36 |
37 | def generate(
38 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512
39 | ) -> str:
40 | """
41 | Generate response supporting both text and multimodal input.
42 |
43 | Args:
44 | prompt: Either text string or multimodal dict
45 | max_tokens: Maximum tokens to generate
46 |
47 | Returns:
48 | Generated response string
49 | """
50 |
51 | # Link: https://internlm.intern-ai.org.cn/api/document
52 | messages = []
53 |
54 | if isinstance(prompt, dict) and "images" in prompt:
55 | content = []
56 |
57 | content.append({"type": "text", "text": prompt["text"]})
58 |
59 | for image_data in prompt["images"]:
60 | content.append(image_data)
61 |
62 | messages.append({"role": "user", "content": content})
63 | else:
64 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
65 | messages.append({"role": "user", "content": text_content})
66 |
67 | try:
68 | response = self.client.chat.completions.create(
69 | model=self.model_name,
70 | messages=messages,
71 | max_tokens=max_tokens,
72 | )
73 | return response.choices[0].message.content
74 | except Exception as e:
75 | raise RuntimeError(f"Llama-Vision-11B API call failed: {e}")
76 |
77 |
78 | class Llama_Vision_90B(BaseAPIModel):
79 | def __init__(
80 | self,
81 | api_key: Optional[str] = None,
82 | base_url: Optional[str] = None,
83 | model_name: Optional[str] = None,
84 | **kwargs,
85 | ):
86 | config = Config()
87 |
88 | # Use provided parameters or fall back to config
89 | self.api_key = api_key or config.llama_vision_90b_api_key
90 | self.base_url = base_url or config.llama_vision_90b_base_url
91 | self.model_name = model_name or config.llama_vision_90b_model_name
92 |
93 | # Validate that we have required configuration
94 | if not self.api_key:
95 | raise ValueError(
96 | "Llama-Vision-90B API key not found. Please set LLAMA_VISION_90B_API_KEY in your .env file "
97 | "or provide api_key parameter."
98 | )
99 |
100 | self.client = OpenAI(
101 | api_key=self.api_key,
102 | base_url=self.base_url,
103 | )
104 |
105 | # Initialize parent class
106 | super().__init__(model_name=self.model_name, **kwargs)
107 |
108 | def generate(
109 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512
110 | ) -> str:
111 | """
112 | Generate response supporting both text and multimodal input.
113 |
114 | Args:
115 | prompt: Either text string or multimodal dict
116 | max_tokens: Maximum tokens to generate
117 |
118 | Returns:
119 | Generated response string
120 | """
121 |
122 | # Link: https://internlm.intern-ai.org.cn/api/document
123 | messages = []
124 |
125 | if isinstance(prompt, dict) and "images" in prompt:
126 | content = []
127 |
128 | content.append({"type": "text", "text": prompt["text"]})
129 |
130 | for image_data in prompt["images"]:
131 | content.append(image_data)
132 |
133 | messages.append({"role": "user", "content": content})
134 | else:
135 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
136 | messages.append({"role": "user", "content": text_content})
137 |
138 | try:
139 | response = self.client.chat.completions.create(
140 | model=self.model_name,
141 | messages=messages,
142 | max_tokens=max_tokens,
143 | )
144 | return response.choices[0].message.content
145 | except Exception as e:
146 | raise RuntimeError(f"Llama-Vision-90B API call failed: {e}")
147 |
--------------------------------------------------------------------------------
/spectrumlab/models/gpt4_v_api.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Any, Optional, Union
2 | from .base_api import BaseAPIModel
3 | from spectrumlab.config import Config
4 | from openai import OpenAI
5 |
6 |
7 | class GPT4_1(BaseAPIModel):
8 | def __init__(
9 | self,
10 | api_key: Optional[str] = None,
11 | base_url: Optional[str] = None,
12 | model_name: Optional[str] = None,
13 | **kwargs,
14 | ):
15 | config = Config()
16 |
17 | # Use provided parameters or fall back to config
18 | self.api_key = api_key or config.gpt4_1_api_key
19 | self.base_url = base_url or config.gpt4_1_base_url
20 | self.model_name = model_name or config.gpt4_1_model_name
21 |
22 | # Validate that we have required configuration
23 | if not self.api_key:
24 | raise ValueError(
25 | "GPT-4.1 API key not found. Please set GPT4_1_API_KEY in your .env file "
26 | "or provide api_key parameter."
27 | )
28 |
29 | self.client = OpenAI(
30 | api_key=self.api_key,
31 | base_url=self.base_url,
32 | )
33 |
34 | # Initialize parent class
35 | super().__init__(model_name=self.model_name, **kwargs)
36 |
37 | def generate(
38 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512
39 | ) -> str:
40 | """
41 | Generate response supporting both text and multimodal input.
42 |
43 | Args:
44 | prompt: Either text string or multimodal dict
45 | max_tokens: Maximum tokens to generate
46 |
47 | Returns:
48 | Generated response string
49 | """
50 | messages = []
51 |
52 | # Handle multimodal vs text-only prompts
53 | if isinstance(prompt, dict) and "images" in prompt:
54 | # Multimodal prompt
55 | content = []
56 |
57 | content.append({"type": "text", "text": prompt["text"]})
58 |
59 | for image_data in prompt["images"]:
60 | content.append(image_data)
61 |
62 | messages.append({"role": "user", "content": content})
63 | else:
64 | # Text-only prompt
65 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
66 | messages.append({"role": "user", "content": text_content})
67 |
68 | try:
69 | response = self.client.chat.completions.create(
70 | model=self.model_name,
71 | messages=messages,
72 | max_tokens=max_tokens,
73 | )
74 | return response.choices[0].message.content
75 | except Exception as e:
76 | raise RuntimeError(f"GPT-4.1 API call failed: {e}")
77 |
78 |
79 | class GPT4_Vision(BaseAPIModel):
80 | def __init__(
81 | self,
82 | api_key: Optional[str] = None,
83 | base_url: Optional[str] = None,
84 | model_name: Optional[str] = None,
85 | **kwargs,
86 | ):
87 | config = Config()
88 |
89 | # Use provided parameters or fall back to config
90 | self.api_key = api_key or config.gpt4_vision_api_key
91 | self.base_url = base_url or config.gpt4_vision_base_url
92 | self.model_name = model_name or config.gpt4_vision_model_name
93 |
94 | # Validate that we have required configuration
95 | if not self.api_key:
96 | raise ValueError(
97 | "GPT-4 Vision API key not found. Please set GPT4_VISION_API_KEY in your .env file "
98 | "or provide api_key parameter."
99 | )
100 |
101 | self.client = OpenAI(
102 | api_key=self.api_key,
103 | base_url=self.base_url,
104 | )
105 |
106 | # Initialize parent class
107 | super().__init__(model_name=self.model_name, **kwargs)
108 |
109 | def generate(
110 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512
111 | ) -> str:
112 | """
113 | Generate response supporting both text and multimodal input.
114 |
115 | Args:
116 | prompt: Either text string or multimodal dict
117 | max_tokens: Maximum tokens to generate
118 |
119 | Returns:
120 | Generated response string
121 | """
122 | messages = []
123 |
124 | # Handle multimodal vs text-only prompts
125 | if isinstance(prompt, dict) and "images" in prompt:
126 | # Multimodal prompt
127 | content = []
128 |
129 | content.append({"type": "text", "text": prompt["text"]})
130 |
131 | for image_data in prompt["images"]:
132 | content.append(image_data)
133 |
134 | messages.append({"role": "user", "content": content})
135 | else:
136 | # Text-only prompt
137 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
138 | messages.append({"role": "user", "content": text_content})
139 |
140 | try:
141 | response = self.client.chat.completions.create(
142 | model=self.model_name,
143 | messages=messages,
144 | max_tokens=max_tokens,
145 | )
146 | return response.choices[0].message.content
147 | except Exception as e:
148 | raise RuntimeError(f"GPT-4 Vision API call failed: {e}")
149 |
--------------------------------------------------------------------------------
/spectrumlab/models/doubao_api.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Union, Dict, Any
2 | from .base_api import BaseAPIModel
3 | from spectrumlab.config import Config
4 | from openai import OpenAI
5 |
6 |
7 | class Doubao_1_5_Vision_Pro(BaseAPIModel):
8 | def __init__(
9 | self,
10 | api_key: Optional[str] = None,
11 | base_url: Optional[str] = None,
12 | model_name: Optional[str] = None,
13 | **kwargs,
14 | ):
15 | config = Config()
16 |
17 | # Use provided parameters or fall back to config
18 | self.api_key = api_key or config.doubao_1_5_vision_pro_api_key
19 | self.base_url = base_url or config.doubao_1_5_vision_pro_base_url
20 | self.model_name = model_name or config.doubao_1_5_vision_pro_model_name
21 |
22 | # Validate that we have required configuration
23 | if not self.api_key:
24 | raise ValueError(
25 | "Doubao-1.5-Vision-Pro API key not found. Please set DOUBAO_1_5_VISION_PRO_API_KEY in your .env file "
26 | "or provide api_key parameter."
27 | )
28 |
29 | self.client = OpenAI(
30 | api_key=self.api_key,
31 | base_url=self.base_url,
32 | )
33 |
34 | # Initialize parent class
35 | super().__init__(model_name=self.model_name, **kwargs)
36 |
37 | def generate(
38 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512
39 | ) -> str:
40 | """
41 | Generate response supporting both text and multimodal input.
42 |
43 | Args:
44 | prompt: Either text string or multimodal dict
45 | max_tokens: Maximum tokens to generate
46 |
47 | Returns:
48 | Generated response string
49 | """
50 |
51 | # Link: https://internlm.intern-ai.org.cn/api/document
52 | messages = []
53 |
54 | if isinstance(prompt, dict) and "images" in prompt:
55 | content = []
56 |
57 | content.append({"type": "text", "text": prompt["text"]})
58 |
59 | for image_data in prompt["images"]:
60 | content.append(image_data)
61 |
62 | messages.append({"role": "user", "content": content})
63 | else:
64 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
65 | messages.append({"role": "user", "content": text_content})
66 |
67 | try:
68 | response = self.client.chat.completions.create(
69 | model=self.model_name,
70 | messages=messages,
71 | max_tokens=max_tokens,
72 | )
73 | return response.choices[0].message.content
74 | except Exception as e:
75 | raise RuntimeError(f"Doubao-1.5-Vision-Pro API call failed: {e}")
76 |
77 |
78 | class Doubao_1_5_Vision_Pro_Thinking(BaseAPIModel):
79 | def __init__(
80 | self,
81 | api_key: Optional[str] = None,
82 | base_url: Optional[str] = None,
83 | model_name: Optional[str] = None,
84 | **kwargs,
85 | ):
86 | config = Config()
87 |
88 | # Use provided parameters or fall back to config
89 | self.api_key = api_key or config.doubao_1_5_vision_pro_thinking_api_key
90 | self.base_url = base_url or config.doubao_1_5_vision_pro_thinking_base_url
91 | self.model_name = model_name or config.doubao_1_5_vision_pro_thinking_model_name
92 |
93 | # Validate that we have required configuration
94 | if not self.api_key:
95 | raise ValueError(
96 | "Doubao-1.5-Vision-Pro-Thinking API key not found. Please set DOUBAO_1_5_VISION_PRO_THINKING_API_KEY in your .env file "
97 | "or provide api_key parameter."
98 | )
99 |
100 | self.client = OpenAI(
101 | api_key=self.api_key,
102 | base_url=self.base_url,
103 | )
104 |
105 | # Initialize parent class
106 | super().__init__(model_name=self.model_name, **kwargs)
107 |
108 | def generate(
109 | self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512
110 | ) -> str:
111 | """
112 | Generate response supporting both text and multimodal input.
113 |
114 | Args:
115 | prompt: Either text string or multimodal dict
116 | max_tokens: Maximum tokens to generate
117 |
118 | Returns:
119 | Generated response string
120 | """
121 |
122 | # Link: https://internlm.intern-ai.org.cn/api/document
123 | messages = []
124 |
125 | if isinstance(prompt, dict) and "images" in prompt:
126 | content = []
127 |
128 | content.append({"type": "text", "text": prompt["text"]})
129 |
130 | for image_data in prompt["images"]:
131 | content.append(image_data)
132 |
133 | messages.append({"role": "user", "content": content})
134 | else:
135 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
136 | messages.append({"role": "user", "content": text_content})
137 |
138 | try:
139 | response = self.client.chat.completions.create(
140 | model=self.model_name,
141 | messages=messages,
142 | max_tokens=max_tokens,
143 | )
144 | return response.choices[0].message.content
145 | except Exception as e:
146 | raise RuntimeError(f"Doubao-1.5-Vision-Pro-Thinking API call failed: {e}")
147 |
--------------------------------------------------------------------------------
/docs/en/tutorial.md:
--------------------------------------------------------------------------------
1 | # Tutorial
2 |
3 | Welcome to use SpectrumLab! This tutorial will help you quickly understand spectroscopy analysis, the SpectrumLab platform, and how to use it to evaluate the performance of large language models on spectroscopy tasks.
4 |
5 | ## What is Spectroscopy?
6 |
7 | Spectroscopy is a branch of science that studies the interaction between matter and electromagnetic radiation. By analyzing the spectra of absorption, emission, or scattering of matter, we can obtain detailed information about the structure, composition, and properties of matter.
8 |
9 | ## The Importance of Spectroscopy
10 |
11 | Spectroscopy plays an important role in modern science. By analyzing the interaction between matter and electromagnetic radiation, it provides a key means to understand the composition, structure, and properties of matter. In chemistry, spectroscopy is used for molecular structure analysis and reaction mechanism research. In materials science, it can characterize nanomaterials and conduct surface analysis. In biology, it is used to study protein folding and metabolite detection. At the same time, spectroscopy is also widely used in clinical medicine. For example, non-invasive diagnosis and early disease detection can be achieved through spectroscopic techniques, making it an indispensable tool in modern scientific research and applications.
12 |
13 | ## Common Spectroscopic Techniques
14 |
15 | - **Infrared spectrum(IR)**:Analyze molecular vibrations and identify functional groups. The characteristic absorption peaks in the IR spectrum (such as C=O, O–H, C–H, etc.) are iconic within the characteristic frequency range and are the core tools for determining functional groups.
16 | - **Nuclear Magnetic Resonance(NMR)**:Provide information about the atomic environment and structural connectivity in a molecule through chemical shift, signal intensity, and coupling constants, which is often used to determine the molecular structure (especially for organic compounds).
17 | - **Ultraviolet - Visible Spectroscopy(UV-Vis)**:Study the electronic transitions and conjugated systems of molecules, especially for determining the electronic structure, conjugation length, and optical properties, without directly providing structural connectivity information.
18 | - **Mass Spectrometry (MS)**:Determining the molecular weight and inferring the molecular structure through fragment combination are important tools for determining the molecular composition and secondary structure.
19 | - **Raman spectroscopy (Raman)**:Provides molecular vibration information, can identify chemical bond vibrations similar to IR, is particularly sensitive to symmetric molecules and non - polar bonds, and is often used as a complementary method to IR.
20 | - **HSQC spectrum**:A two-dimensional NMR (^1H–^13C or ^1H–^15N) experiment where each cross peak represents a directly bonded proton-heteroatom pair. It can be used to unambiguously assign ^1H–^13C (or ^15N) one-bond correlations, assist in peak assignment, and structure elucidation.
21 |
22 | ## What is SpectrumLab?
23 |
24 | ### Overview
25 |
26 | SpectrumLab is a groundbreaking unified platform and comprehensive toolkit designed to accelerate and systematize deep learning research in the field of chemical spectroscopy. It aims to streamline the entire AI - driven spectroscopy research lifecycle, from data pre - processing to model evaluation. It provides researchers and developers with a modular, scalable, and easy - to - use ecosystem of Python libraries and tools to drive artificial intelligence research and applications in the field of spectroscopy.
27 |
28 | ### Core Functions
29 |
30 | #### Modular and Extensible Architecture
31 |
32 | SpectrumLab Adopt a flexible modular design, and its core components include:
33 |
34 | - **Benchmark Group**:Hierarchically organize the SpectrumBench dataset to support multiple spectral modalities and task types, and allow users to flexibly combine according to their needs to create customized evaluation tasks.
35 | - **Model Integration**:Provide a unified framework and standardized API that can seamlessly integrate and evaluate various external models, whether they are commercial closed - source models (such as GPT - 4o) or open - source models deployed locally.
36 | - **Evaluator**:As the core of the evaluation engine, it supports the customization of evaluation indicators and protocols according to different tasks (such as multiple-choice questions, generation questions), ensuring the rigor of evaluation and task adaptability.
37 |
38 | #### A Comprehensive Toolchain Ecosystem
39 |
40 | Provide a Python library distributed through PyPI that integrates core modules such as data processing, model development, automatic evaluation, and visualization, greatly simplifying the entire research workflow.
41 |
42 | #### SpectrumAnnotator
43 |
44 | It is closely integrated with the innovative SpectrumAnnotator component, which can utilize the reasoning capabilities of advanced multimodal large models to automatically generate high-quality and diverse benchmark test data from the seed dataset, and efficiently build evaluation tasks.
45 |
46 | #### Leaderboards
47 |
48 | To ensure transparency and reproducibility, SpectrumLab has established a public leaderboard system. This system systematically tracks and compares the performance of various models on all 14 tasks, promoting fair competition and the common progress of the field.
49 |
50 | ## Related links
51 |
52 | - [API Reference](/zh/api) - Understand the detailed interface description and code examples
53 | - [Benchmark](/zh/benchmark) - View the details of evaluation metrics and the dataset
54 | - [Leaderboard](https://huggingface.co/spaces/SpectrumWorld/SpectrumLeaderboard) - View the comparison of model performance
55 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # SpectrumLab Documentation
2 |
3 | Welcome to the SpectrumLab documentation! This guide will help you contribute to our documentation system.
4 |
5 | ## About This Documentation
6 |
7 | This documentation is built with [VitePress](https://vitepress.dev/), a static site generator designed for creating fast, beautiful documentation websites. Our documentation supports both English and Chinese languages to serve our global community.
8 |
9 | ## Contributing to the Documentation
10 |
11 | Contributing to our documentation is straightforward! Simply clone the project, add or modify Markdown files, commit your changes, and create a Pull Request.
12 |
13 | ### Prerequisites
14 |
15 | Before you begin, ensure you have the following installed:
16 |
17 | - [Node.js](https://nodejs.org/) (version 18 or higher)
18 | - [npm](https://www.npmjs.com/) (comes with Node.js)
19 |
20 | ### Step 1: Clone the Repository
21 |
22 | ```bash
23 | git clone https://github.com/little1d/SpectrumLab.git
24 | cd SpectrumLab
25 | ```
26 |
27 | ### Step 2: Install Dependencies
28 |
29 | Navigate to the docs directory and install the required dependencies:
30 |
31 | ```bash
32 | cd docs
33 | npm install
34 | ```
35 |
36 | Alternatively, if you prefer using the development dependencies globally:
37 |
38 | ```bash
39 | npm add -D vitepress
40 | npm install
41 | ```
42 |
43 | ### Step 3: Create a New Branch
44 |
45 | Create a new branch for your documentation changes. We recommend using the naming convention `docs/` (e.g., `docs/api`, `docs/examples`, `docs/benchmarks`).
46 |
47 | ```bash
48 | git checkout -b docs/
49 | ```
50 |
51 | For detailed branching and contribution guidelines, please refer to our [Contributing Guide](https://github.com/little1d/SpectrumLab/blob/main/CONTRIBUTING.md).
52 |
53 | ### Step 4: Preview Your Changes
54 |
55 | #### Local Development
56 |
57 | To start the development server and preview your changes in real-time:
58 |
59 | ```bash
60 | npm run docs:dev
61 | ```
62 |
63 | This will start a local server (typically at `http://localhost:5173`) where you can preview your documentation changes.
64 |
65 | #### Production Build
66 |
67 | If you need to test the complete compilation and packaging:
68 |
69 | ```bash
70 | # Build the documentation
71 | npm run docs:build
72 |
73 | # Preview the production build
74 | npm run docs:preview
75 | ```
76 |
77 | > **Note:** These commands are configured in `docs/package.json`. You can modify them if needed.
78 |
79 | ### Step 5: Deployment
80 |
81 | We have automated deployment set up using GitHub Actions. The deployment process is triggered automatically when:
82 |
83 | - Changes are pushed to the `main` branch
84 | - Changes are made to files in the `docs/` directory
85 | - Changes are made to the deployment workflow file
86 |
87 | **All you need to do is create a Pull Request!** Once your PR is merged into the main branch, the documentation will be automatically deployed to GitHub Pages.
88 |
89 | ## Documentation Structure
90 |
91 | Our documentation is organized as follows:
92 |
93 | ```
94 | docs/
95 | ├── .vitepress/ # VitePress configuration
96 | ├── public/ # Static assets
97 | ├── assets/ # Documentation assets
98 | ├── en/ # English documentation
99 | │ ├── index.md
100 | │ ├── tutorial.md
101 | │ ├── api.md
102 | │ └── benchmark.md
103 | ├── zh/ # Chinese documentation
104 | │ ├── index.md
105 | │ ├── tutorial.md
106 | │ ├── api.md
107 | │ └── benchmark.md
108 | ├── index.md # Homepage
109 | ├── package.json # Dependencies and scripts
110 | └── README.md # This file
111 | ```
112 |
113 | ## Writing Guidelines
114 |
115 | ### Language Support
116 |
117 | - **English**: Primary language for the documentation
118 | - **Chinese**: Full translation available for Chinese-speaking users
119 |
120 | ### Content Guidelines
121 |
122 | 1. **Be Clear and Concise**: Write in simple, clear language
123 | 2. **Use Code Examples**: Include practical examples wherever possible
124 | 3. **Maintain Consistency**: Follow the existing style and structure
125 | 4. **Cross-Reference**: Link to related sections when appropriate
126 |
127 | ### Markdown Features
128 |
129 | VitePress supports many Markdown features including:
130 |
131 | - **Code Blocks**: With syntax highlighting
132 | - **Custom Containers**: For tips, warnings, and notes
133 | - **Mathematical Expressions**: Using LaTeX syntax
134 | - **Mermaid Diagrams**: For flowcharts and diagrams
135 |
136 | Example:
137 |
138 | ```markdown
139 | ::: tip
140 | This is a helpful tip!
141 | :::
142 |
143 | ::: warning
144 | This is a warning message.
145 | :::
146 |
147 | ::: danger
148 | This is a danger alert.
149 | :::
150 | ```
151 |
152 | ## Getting Help
153 |
154 | If you encounter any issues or have questions about contributing to the documentation:
155 |
156 | 1. Check our [existing issues](https://github.com/little1d/SpectrumLab/issues)
157 | 2. Create a new issue with the `documentation` label
158 | 3. Refer to the [VitePress documentation](https://vitepress.dev/) for technical questions
159 | 4. Review our [Contributing Guide](https://github.com/little1d/SpectrumLab/blob/main/CONTRIBUTING.md) for general contribution guidelines
160 |
161 | ## Resources
162 |
163 | - [VitePress Guide](https://vitepress.dev/guide/getting-started)
164 | - [Markdown Guide](https://www.markdownguide.org/)
165 | - [GitHub Docs: About Pull Requests](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests)
166 |
167 | Thank you for contributing to SpectrumLab documentation! 🎉
168 |
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | # SpectrumLab Environment Configuration
2 | # Copy this file to .env and fill in your actual API keys and configurations
3 |
4 | # =============================================================================
5 | # DeepSeek API Configuration
6 | # =============================================================================
7 | DEEPSEEK_API_KEY=your_deepseek_api_key_here
8 | DEEPSEEK_BASE_URL=https://api.deepseek.com
9 | DEEPSEEK_MODEL_NAME=deepseek-chat
10 |
11 | # =============================================================================
12 | # GPT-4o API Configuration
13 | # =============================================================================
14 | GPT4O_API_KEY=your_openai_api_key_here
15 | GPT4O_BASE_URL=https://api.openai.com/v1
16 | GPT4O_MODEL_NAME=gpt-4o
17 |
18 | # =============================================================================
19 | # InternVL API Configuration
20 | # =============================================================================
21 | INTERNVL_API_KEY=your_internvl_api_key_here
22 | INTERNVL_BASE_URL=https://chat.intern-ai.org.cn/api/v1/
23 | INTERNVL_MODEL_NAME=internvl3-latest
24 |
25 | # =============================================================================
26 | # Claude API Configuration
27 | # =============================================================================
28 | CLAUDE_API_KEY=your_anthropic_api_key_here
29 | CLAUDE_BASE_URL=https://api.anthropic.com
30 | CLAUDE_SONNET_3_5=anthropic/claude-3.5-sonnet
31 | CLAUDE_OPUS_4=claude-opus-4-20250514
32 | CLAUDE_HAIKU_3_5=claude-3-5-haiku-20241022
33 | CLAUDE_SONNET_4=anthropic/claude-sonnet-4
34 |
35 | # =============================================================================
36 | # GPT-4.1 and GPT-4-Vision Configuration
37 | # =============================================================================
38 | GPT4_1_API_KEY=your_gpt4_1_api_key_here
39 | GPT4_1_BASE_URL=https://api.openai.com
40 | GPT4_1=gpt-4.1-2025-04-14
41 |
42 | GPT4_VISION_API_KEY=your_gpt4_vision_api_key_here
43 | GPT4_VISION_BASE_URL=https://api.openai.com
44 | GPT4_VISION=gpt-4-vision-preview
45 |
46 | # =============================================================================
47 | # Grok-2-Vision Configuration
48 | # =============================================================================
49 | GROK_2_VISION_API_KEY=your_grok_api_key_here
50 | GROK_2_VISION_BASE_URL=https://api.x.ai
51 | GROK_2_VISION=x-ai/grok-2-vision-1212
52 |
53 | # =============================================================================
54 | # Qwen-VL Configuration
55 | # =============================================================================
56 | QWEN_VL_API_KEY=your_qwen_api_key_here
57 | QWEN_VL_BASE_URL=https://dashscope.aliyuncs.com
58 | QWEN_VL=qwen-vl-max
59 |
60 | # =============================================================================
61 | # Qwen-2.5-VL-32B Configuration
62 | # =============================================================================
63 | QWEN_2_5_VL_32B_API_KEY=your_qwen_2_5_vl_32b_api_key_here
64 | QWEN_2_5_VL_32B_BASE_URL=https://dashscope.aliyuncs.com
65 | QWEN_2_5_VL_32B=Qwen/Qwen2.5-VL-32B-Instruct
66 |
67 | # =============================================================================
68 | # Qwen-2.5-VL-72B Configuration
69 | # =============================================================================
70 | QWEN_2_5_VL_72B_API_KEY=your_qwen_2_5_vl_72b_api_key_here
71 | QWEN_2_5_VL_72B_BASE_URL=https://dashscope.aliyuncs.com
72 | QWEN_2_5_VL_72B=Qwen/Qwen2.5-VL-72B-Instruct
73 |
74 | # =============================================================================
75 | # DeepSeek-VL-2 Configuration
76 | # =============================================================================
77 | DEEPSEEK_VL_2_API_KEY=your_deepseek_vl_2_api_key_here
78 | DEEPSEEK_VL_2_BASE_URL=https://api.deepseek.com
79 | DEEPSEEK_VL_2=deepseek-ai/deepseek-vl2
80 |
81 | # =============================================================================
82 | # Llama-Vision-11B Configuration
83 | # =============================================================================
84 | LLAMA_VISION_11B_API_KEY=your_llama_vision_11b_api_key_here
85 | LLAMA_VISION_11B_BASE_URL=https://api.meta.com
86 | LLAMA_VISION_11B=llama-3.2-11b-vision-instruct
87 |
88 | # =============================================================================
89 | # Llama-Vision-90B Configuration
90 | # =============================================================================
91 | LLAMA_VISION_90B_API_KEY=your_llama_vision_90b_api_key_here
92 | LLAMA_VISION_90B_BASE_URL=https://api.meta.com
93 | LLAMA_VISION_90B=meta-llama/llama-3.2-90b-vision-instruct
94 |
95 | # =============================================================================
96 | # Doubao-1.5-Vision-Pro Configuration
97 | # =============================================================================
98 | DOUBAO_1_5_VISION_PRO_API_KEY=your_doubao_vision_pro_api_key_here
99 | DOUBAO_1_5_VISION_PRO_BASE_URL=https://ark.cn-beijing.volces.com
100 | DOUBAO_1_5_VISION_PRO=doubao-1-5-vision-pro-250328
101 |
102 | # =============================================================================
103 | # Doubao-1.5-Vision-Pro-Thinking Configuration
104 | # =============================================================================
105 | DOUBAO_1_5_VISION_PRO_THINKING_API_KEY=your_doubao_vision_pro_thinking_api_key_here
106 | DOUBAO_1_5_VISION_PRO_THINKING_BASE_URL=https://ark.cn-beijing.volces.com
107 | DOUBAO_1_5_VISION_PRO_THINKING=doubao-1.5-thinking-vision-pro-250428
108 |
109 | # =============================================================================
110 | # Instructions
111 | # =============================================================================
112 | # 1. Copy this file to .env: cp .env.example .env
113 | # 2. Replace all placeholder values (your_*_here) with your actual API keys
114 | # 3. Configure the base URLs according to your API providers
115 | # 4. Make sure to keep your .env file secure and never commit it to version control
116 |
--------------------------------------------------------------------------------
/docs/zh/api.md:
--------------------------------------------------------------------------------
1 | # API Reference
2 |
3 | SpectrumLab 提供了简洁而强大的 API 接口,帮助你快速构建光谱学深度学习应用。本文档涵盖了核心模块的使用方法和自定义扩展指南。
4 |
5 | ## Benchmark 模块
6 |
7 | Benchmark 模块是 SpectrumLab 的数据访问核心,提供了统一的接口来加载和管理不同层级的光谱学基准测试数据。
8 |
9 | ### 获取 Benchmark Group
10 |
11 | 通过 `get_benchmark_group` 函数可以获取四个不同层级的基准测试组:
12 |
13 | ```python
14 | from spectrumlab.benchmark import get_benchmark_group
15 |
16 | signal_group = get_benchmark_group("signal") # 信号层
17 | perception_group = get_benchmark_group("perception") # 感知层
18 | semantic_group = get_benchmark_group("semantic") # 语义层
19 | generation_group = get_benchmark_group("generation") # 生成层
20 | ```
21 |
22 | ### 数据访问
23 |
24 | 每个 Benchmark Group 提供了灵活的数据访问方法:
25 |
26 | ```python
27 | # 获取所有数据
28 | data = signal_group.get_data_by_subcategories("all")
29 |
30 | # 获取特定子类别数据
31 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
32 |
33 | # 获取 Benchmark Group 可用的所有 sub-categories
34 | subcategories = signal_group.get_available_subcategories()
35 | print(subcategories)
36 | ```
37 |
38 | **方法说明:**
39 |
40 | - `get_data_by_subcategories("all")`: 返回该层级下所有子类别的数据
41 | - `get_data_by_subcategories([...])`: 返回指定子类别的数据列表
42 | - `get_available_subcategories()`: 查看当前层级包含的所有子类别名称
43 |
44 | ## Model 模块
45 |
46 | Model 模块提供了统一的模型接口,支持多种预训练模型和自定义模型的集成。
47 |
48 | ### 使用现有模型
49 |
50 | SpectrumLab 内置了多种先进的多模态模型接口:
51 |
52 | ```python
53 | from spectrumlab.models import GPT4oAPI
54 |
55 | gpt4o = GPT4oAPI()
56 |
57 | response = gpt4o.generate("Your Prompts")
58 | ```
59 |
60 | **支持的模型:**
61 |
62 | - `GPT4oAPI`: OpenAI GPT-4o
63 | - `ClaudeAPI`: Anthropic Claude 系列
64 | - `DeepSeekAPI`: DeepSeek-VL
65 | - `QwenVLAPI`: Qwen-VL 系列
66 | - `InternVLAPI`: InternVL 系列
67 |
68 | ### 自定义模型
69 |
70 | 通过继承 `BaseModel` 类,你可以轻松集成自己的模型:
71 |
72 | ```python
73 | from spectrumlab.models.base import BaseModel
74 |
75 | class CustomModel(BaseModel):
76 | def __init__(self):
77 | super().__init__()
78 | self.model_name = "CustomModel"
79 |
80 | def generate(self, prompt, max_out_len=512):
81 | # 实现你的模型调用逻辑
82 | # 这里可以是 API 调用、本地模型推理等
83 | return response
84 | ```
85 |
86 | **自定义要求:**
87 |
88 | - 必须实现 `generate` 方法
89 | - 支持文本和多模态输入
90 | - 返回字符串格式的响应
91 |
92 | ## Evaluator 模块
93 |
94 | Evaluator 模块负责模型评估的核心逻辑,提供了标准化的评估流程和灵活的自定义选项。
95 |
96 | ### 基础使用
97 |
98 | 对于选择题类型的评估任务,可以直接使用 `ChoiceEvaluator`:
99 |
100 | ```python
101 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
102 |
103 | evaluator = ChoiceEvaluator()
104 |
105 | results = evaluator.evaluate(
106 | data_items=data,
107 | model=model,
108 | max_out_len=512,
109 | save_path="./eval_results"
110 | )
111 | ```
112 |
113 | **参数说明:**
114 |
115 | - `data_items`: 评估数据列表
116 | - `model`: 模型实例
117 | - `max_out_len`: 最大输出长度
118 | - `save_path`: 结果保存路径
119 |
120 | ### 自定义 Evaluator
121 |
122 | 通过继承 `BaseEvaluator` 类,你可以定制评估逻辑以适应特定任务需求:
123 |
124 | ```python
125 | from spectrumlab.evaluator.base import BaseEvaluator
126 |
127 | class CustomEvaluator(BaseEvaluator):
128 | def _build_prompt(self, item):
129 | """构建输入提示词"""
130 | question = item["question"]
131 | choices = item["choices"]
132 | return f"问题:{question}\n选项:{choices}\n请选择正确答案:"
133 |
134 | def _extract_prediction(self, response, item):
135 | """从模型响应中提取预测结果"""
136 | import re
137 | match = re.search(r'\box\{([^}]+)\}', response)
138 | return match.group(1) if match else ""
139 |
140 | def _calculate_accuracy(self, answer, prediction, item):
141 | """计算准确率"""
142 | return answer.strip().lower() == prediction.strip().lower()
143 | ```
144 |
145 | **核心方法:**
146 |
147 | - `_build_prompt`: 根据数据项构建模型输入
148 | - `_extract_prediction`: 从模型输出中提取预测答案
149 | - `_calculate_accuracy`: 判断预测是否正确
150 |
151 | ## 完整评估示例
152 |
153 | 以下是一个完整的评估流程示例,展示了从数据加载到结果分析的全过程:
154 |
155 | ```python
156 | from spectrumlab.benchmark.signal_group import SignalGroup
157 | from spectrumlab.models import GPT4oAPI
158 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
159 |
160 | # 1. 加载数据
161 | signal_group = SignalGroup("data")
162 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
163 |
164 | # 2. 初始化模型和评估器
165 | model = GPT4oAPI()
166 | evaluator = ChoiceEvaluator()
167 |
168 | # 3. 运行评估
169 | results = evaluator.evaluate(
170 | data_items=data,
171 | model=model,
172 | save_path="./evaluation_results"
173 | )
174 |
175 | # 4. 查看评估结果
176 | print(f"评估完成!整体准确率: {results['metrics']['overall']['accuracy']:.2f}%")
177 |
178 | # 查看详细结果
179 | for subcategory, metrics in results['metrics']['subcategory_metrics'].items():
180 | print(f"{subcategory}: {metrics['accuracy']:.2f}% ({metrics['correct']}/{metrics['total']})")
181 | ```
182 |
183 | ## 数据格式
184 |
185 | ### 输入数据格式
186 |
187 | 每个数据项遵循以下格式:
188 |
189 | ```python
190 | {
191 | "question": "基于该红外光谱图,该化合物最可能是?",
192 | "choices": ["苯甲酸", "苯甲醛", "苯甲醇", "苯乙酸"],
193 | "answer": "苯甲酸",
194 | "image_path": "./data/signal/ir_001.png", # 可选
195 | "category": "Chemistry",
196 | "sub_category": "Spectrum Type Classification"
197 | }
198 | ```
199 |
200 | ### 输出结果格式
201 |
202 | 评估结果包含详细的性能指标:
203 |
204 | ```python
205 | {
206 | "metrics": {
207 | "overall": {
208 | "accuracy": 85.5,
209 | "correct": 171,
210 | "total": 200
211 | },
212 | "subcategory_metrics": {
213 | "Spectrum Type Classification": {
214 | "accuracy": 90.0,
215 | "correct": 45,
216 | "total": 50
217 | }
218 | }
219 | },
220 | "saved_files": ["result_001.json"],
221 | "total_items": 200
222 | }
223 | ```
224 |
225 | ## 环境配置
226 |
227 | 使用 API 模型前需要配置相应的环境变量:
228 |
229 | ```bash
230 | # OpenAI 模型
231 | export OPENAI_API_KEY="your_openai_api_key"
232 |
233 | # Anthropic 模型
234 | export ANTHROPIC_API_KEY="your_anthropic_api_key"
235 |
236 | # DeepSeek 模型
237 | export DEEPSEEK_API_KEY="your_deepseek_api_key"
238 |
239 | # 其他模型...
240 | ```
241 |
242 | ## 快速开始
243 |
244 | 1. **安装依赖**:`pip install spectrumlab`
245 | 2. **配置 API 密钥**:设置相应的环境变量
246 | 3. **加载数据**:使用 Benchmark 模块获取评估数据
247 | 4. **选择模型**:初始化预训练模型或自定义模型
248 | 5. **运行评估**:使用 Evaluator 执行评估并保存结果
249 |
--------------------------------------------------------------------------------
/leaderboard/manage_leaderboard.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Simplified Leaderboard Management for SpectrumLab
4 | Core functionality for batch import operations
5 | """
6 |
7 | import json
8 | from datetime import datetime
9 | from pathlib import Path
10 | from typing import Dict, Any, Optional
11 | import statistics
12 |
13 |
14 | class LeaderboardManager:
15 | def __init__(self, leaderboard_path: str = "leaderboard/leaderboard_v_1.0.json"):
16 | self.leaderboard_path = Path(leaderboard_path)
17 | self.data = self._load_leaderboard()
18 |
19 | def _load_leaderboard(self) -> Dict[str, Any]:
20 | """Load leaderboard data from JSON file"""
21 | if self.leaderboard_path.exists():
22 | try:
23 | with open(self.leaderboard_path, "r", encoding="utf-8") as f:
24 | content = f.read().strip()
25 | if not content: # File is empty
26 | return {"leaderboard_info": {"total_models": 0}, "models": []}
27 | return json.loads(content)
28 | except (json.JSONDecodeError, ValueError) as e:
29 | print(
30 | f"Warning: Invalid JSON in {self.leaderboard_path}. Creating new leaderboard. Error: {e}"
31 | )
32 | return {"leaderboard_info": {"total_models": 0}, "models": []}
33 | else:
34 | return {"leaderboard_info": {"total_models": 0}, "models": []}
35 |
36 | def _save_leaderboard(self):
37 | """Save leaderboard data to JSON file"""
38 | # Ensure directory exists
39 | self.leaderboard_path.parent.mkdir(parents=True, exist_ok=True)
40 |
41 | # Update total_models count
42 | self.data["leaderboard_info"]["total_models"] = len(self.data["models"])
43 |
44 | # Sort models by overall accuracy (descending)
45 | self.data["models"].sort(
46 | key=lambda x: x["results"].get("overall_accuracy", 0), reverse=True
47 | )
48 |
49 | with open(self.leaderboard_path, "w", encoding="utf-8") as f:
50 | json.dump(self.data, f, indent=4, ensure_ascii=False)
51 |
52 | def _calculate_category_accuracy(
53 | self, category_results: Dict[str, Any]
54 | ) -> Optional[float]:
55 | """Calculate category accuracy from subcategories"""
56 | subcategories = category_results.get("subcategories", {})
57 | valid_scores = []
58 |
59 | for subcat_name, subcat_data in subcategories.items():
60 | accuracy = subcat_data.get("accuracy")
61 | if accuracy is not None:
62 | valid_scores.append(accuracy)
63 |
64 | return round(statistics.mean(valid_scores), 2) if valid_scores else None
65 |
66 | def _calculate_overall_accuracy(self, results: Dict[str, Any]) -> Optional[float]:
67 | """Calculate overall accuracy from all categories"""
68 | category_scores = []
69 |
70 | for category in ["Signal", "Perception", "Semantic", "Generation"]:
71 | if category in results:
72 | category_accuracy = results[category].get("accuracy")
73 | if category_accuracy is not None:
74 | category_scores.append(category_accuracy)
75 |
76 | return round(statistics.mean(category_scores), 2) if category_scores else None
77 |
78 | def _recalculate_model_scores(self, model: Dict[str, Any]):
79 | """Recalculate all accuracy scores for a model"""
80 | results = model["results"]
81 |
82 | # Calculate category accuracies
83 | for category in ["Signal", "Perception", "Semantic", "Generation"]:
84 | if category in results:
85 | calculated_accuracy = self._calculate_category_accuracy(
86 | results[category]
87 | )
88 | if calculated_accuracy is not None:
89 | results[category]["accuracy"] = calculated_accuracy
90 |
91 | # Calculate overall accuracy
92 | overall_accuracy = self._calculate_overall_accuracy(results)
93 | if overall_accuracy is not None:
94 | results["overall_accuracy"] = overall_accuracy
95 |
96 | def find_model(self, model_name: str) -> Optional[Dict[str, Any]]:
97 | """Find a model by name"""
98 | for model in self.data["models"]:
99 | if model["name"] == model_name:
100 | return model
101 | return None
102 |
103 | def add_model(
104 | self,
105 | model_info: Dict[str, Any],
106 | subcategory_scores: Dict[str, Dict[str, float]],
107 | ):
108 | """Add a new model to the leaderboard"""
109 | # Check if model already exists
110 | existing_model = self.find_model(model_info["name"])
111 | if existing_model:
112 | print(f"Model '{model_info['name']}' already exists. Skipping...")
113 | return False
114 |
115 | # Create model entry
116 | model_entry = {
117 | "name": model_info["name"],
118 | "name_link": model_info.get("name_link", ""),
119 | "submitter": model_info.get("submitter", ""),
120 | "submitter_link": model_info.get("submitter_link", ""),
121 | "submission_time": datetime.now().isoformat() + "Z",
122 | "model_type": model_info.get("model_type", "unknown"),
123 | "model_size": model_info.get("model_size", "Unknown"),
124 | "is_multimodal": model_info.get("is_multimodal", False),
125 | "results": {},
126 | "model_info": {
127 | "homepage": model_info.get("homepage", ""),
128 | "paper": model_info.get("paper", ""),
129 | "code": model_info.get("code", ""),
130 | "description": model_info.get("description", ""),
131 | },
132 | }
133 |
134 | # Add subcategory scores
135 | for category, subcats in subcategory_scores.items():
136 | model_entry["results"][category] = {
137 | "accuracy": None, # Will be calculated
138 | "subcategories": {},
139 | }
140 |
141 | for subcat, accuracy in subcats.items():
142 | model_entry["results"][category]["subcategories"][subcat] = {
143 | "accuracy": accuracy if accuracy is not None else None
144 | }
145 |
146 | # Calculate derived scores
147 | self._recalculate_model_scores(model_entry)
148 |
149 | # Add to leaderboard
150 | self.data["models"].append(model_entry)
151 | self._save_leaderboard()
152 |
153 | print(f"✅ Successfully added model '{model_info['name']}' to leaderboard")
154 | return True
155 |
--------------------------------------------------------------------------------
/spectrumlab/benchmark/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC
2 | from pathlib import Path
3 | from typing import List, Dict, Union
4 | import json
5 | import os
6 |
7 |
8 | class BaseGroup(ABC):
9 | def __init__(self, level: str, path: str = "./data"):
10 | self.level = level
11 | self.data_root = Path(path).resolve()
12 | self.path = self.data_root / self.level
13 | self.datasets = {}
14 | self._load_datasets()
15 |
16 | def _load_datasets(self):
17 | """
18 | Load benchmark datasets for the current level.
19 | """
20 | print(f"Loading datasets for level '{self.level}'...")
21 | print(f"Looking for local datasets in: {self.path}")
22 |
23 | if self.path.exists() and self.path.is_dir():
24 | print("✅ Local datasets found, loading...")
25 | self._load_from_local(self.path)
26 | else:
27 | print("❌ Local datasets not found, falling back to HuggingFace...")
28 | self._load_from_remote(self.path)
29 |
30 | print(
31 | f"📊 Total available sub-categories in '{self.level}' level: {len(self.datasets)}"
32 | )
33 | print(f"📋 Available sub-categories: {list(self.datasets.keys())}")
34 |
35 | def _load_from_local(self, level_path: Path):
36 | self.datasets = {}
37 |
38 | for sub_category_dir in level_path.iterdir():
39 | if not sub_category_dir.is_dir():
40 | continue
41 | sub_category_name = sub_category_dir.name
42 | json_filename = f"{sub_category_name.replace(' ', '_')}_datasets.json"
43 | json_file = sub_category_dir / json_filename
44 |
45 | if json_file.exists():
46 | try:
47 | data = self._load_json(json_file)
48 | if data:
49 | self.datasets[sub_category_name] = data
50 | print(
51 | f" ✔ Loaded {len(data)} items from '{sub_category_name}'"
52 | )
53 | else:
54 | print(f" ⚠ Empty data in '{sub_category_name}'")
55 | except Exception as e:
56 | print(f" ✖ Failed to load '{sub_category_name}': {e}")
57 | else:
58 | print(f" ⚠ No {json_filename} found in '{sub_category_name}'")
59 |
60 | def _load_from_remote(self, local_level_path: Path):
61 | # TODO
62 | self.datasets = {}
63 |
64 | def _fix_image_path(self, image_path):
65 | if isinstance(image_path, list):
66 | return [self._fix_image_path(p) for p in image_path]
67 | if not image_path or not str(image_path).strip():
68 | return image_path
69 | # 支持 ./data/ 和 data/ 开头
70 | s = str(image_path)
71 | if s.startswith("./data/"):
72 | relative_part = s[7:]
73 | corrected_path = self.data_root / relative_part
74 | return str(corrected_path)
75 | if s.startswith("data/"):
76 | corrected_path = self.data_root / s[5:]
77 | return str(corrected_path)
78 | # 如果已经是绝对路径,直接返回
79 | if os.path.isabs(s):
80 | return s
81 | # 其它相对路径,拼到 data_root 下
82 | corrected_path = self.data_root / s
83 | return str(corrected_path)
84 |
85 | def _load_json(self, file_path: Path) -> List[Dict]:
86 | try:
87 | with open(file_path, "r", encoding="utf-8") as f:
88 | data = json.load(f)
89 | if isinstance(data, list):
90 | for item in data:
91 | if isinstance(item, dict) and "image_path" in item:
92 | if item["image_path"]:
93 | original_path = item["image_path"]
94 | item["image_path"] = self._fix_image_path(original_path)
95 | # 修正 answer 字段(如果是图片路径或图片路径 list)
96 | if isinstance(item, dict) and "answer" in item:
97 | answer = item["answer"]
98 | # 只修正字符串类型且像图片路径的 answer
99 | if isinstance(answer, str) and answer.lower().endswith(
100 | (".png", ".jpg", ".jpeg", ".bmp", ".gif", ".webp")
101 | ):
102 | item["answer"] = self._fix_image_path(answer)
103 | # 如果 answer 是 list(极少见),也递归修正
104 | if isinstance(answer, list):
105 | item["answer"] = [
106 | self._fix_image_path(a) for a in answer
107 | ]
108 | return data
109 | else:
110 | print(f"Warning: Expected list in {file_path}, got {type(data)}")
111 | return []
112 | except json.JSONDecodeError as e:
113 | print(f"Error parsing JSON in {file_path}: {e}")
114 | return []
115 | except Exception as e:
116 | print(f"Error reading file {file_path}: {e}")
117 | return []
118 |
119 | def get_data_by_subcategories(
120 | self, subcategories: Union[str, List[str]] = "all"
121 | ) -> List[Dict]:
122 | if subcategories == "all":
123 | subcategories = self.get_available_subcategories()
124 | print(
125 | f"🔍 Selecting all available sub-categories ({len(subcategories)} total)"
126 | )
127 | elif isinstance(subcategories, str):
128 | subcategories = [subcategories]
129 | print(f"🔍 Selecting sub-category: '{subcategories[0]}'")
130 | else:
131 | print(f"🔍 Selecting {len(subcategories)} sub-categories: {subcategories}")
132 |
133 | available = set(self.get_available_subcategories())
134 | invalid_subcategories = [s for s in subcategories if s not in available]
135 | if invalid_subcategories:
136 | raise ValueError(
137 | f"Invalid subcategory names: {invalid_subcategories}. "
138 | f"Available subcategories: {list(available)}"
139 | )
140 |
141 | all_data = []
142 | total_items = 0
143 | for subcategory in subcategories:
144 | category_data = self.datasets.get(subcategory, [])
145 | all_data.extend(category_data)
146 | total_items += len(category_data)
147 | print(f" 📦 '{subcategory}': {len(category_data)} items")
148 |
149 | print(f"✅ Total selected items: {total_items}")
150 | return all_data
151 |
152 | def get_available_subcategories(self) -> List[str]:
153 | return list(self.datasets.keys())
154 |
--------------------------------------------------------------------------------
/spectrumlab/evaluator/open_evaluator.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Dict, Any, Optional
3 | from .base import BaseEvaluator
4 | from spectrumlab.utils.image_utils import (
5 | prepare_images_for_prompt,
6 | normalize_image_paths,
7 | )
8 | from spectrumlab.models import GPT4o
9 | from tqdm import tqdm
10 |
11 |
12 | class OpenEvaluator(BaseEvaluator):
13 | def __init__(
14 | self,
15 | prediction_key: str = "model_prediction",
16 | score_model: Optional[Any] = None,
17 | ):
18 | super().__init__(prediction_key)
19 | # 支持自定义评分模型,默认 GPT4o
20 | self.score_model = score_model or GPT4o()
21 |
22 | def _build_prompt(self, item: Dict) -> Any:
23 | """
24 | 为被测模型构建解题 prompt。
25 | """
26 | question = item.get("question", "")
27 | images = normalize_image_paths(item.get("image_path"))
28 | text_content = f"Question: {question}\nPlease answer the question."
29 | if images:
30 | assert all(
31 | isinstance(p, str) for p in images
32 | ), f"images should be List[str], got {images}"
33 | return {"text": text_content, "images": prepare_images_for_prompt(images)}
34 | else:
35 | return text_content
36 |
37 | def _build_score_prompt(self, item: Dict, model_output: Any) -> Any:
38 | """
39 | 构建评分 prompt,包含评分准则。
40 | """
41 | question = item.get("question", "")
42 | images = normalize_image_paths(item.get("image_path"))
43 | reference_answer = item.get("answer", "")
44 | # 支持图片型参考答案
45 | reference_images = []
46 | if isinstance(reference_answer, str) and reference_answer.lower().endswith(
47 | (".png", ".jpg", ".jpeg", ".bmp", ".gif", ".webp")
48 | ):
49 | reference_images = [reference_answer]
50 | reference_answer_text = "[See reference image]"
51 | else:
52 | reference_answer_text = reference_answer
53 | # 支持图片型模型输出
54 | model_output_images = []
55 | if isinstance(model_output, str) and model_output.lower().endswith(
56 | (".png", ".jpg", ".jpeg", ".bmp", ".gif", ".webp")
57 | ):
58 | model_output_images = [model_output]
59 | model_output_text = "[See model output image]"
60 | else:
61 | model_output_text = model_output
62 | # 评分准则
63 | prompt_lines = [
64 | "You are an expert evaluator. Given the following question, reference answer, and model answer, please rate the model answer on a scale of 0 to 1, and explain your reasoning.",
65 | "Scoring rules:",
66 | "- If the reference answer is an image but the model output does not contain an image, score 0.",
67 | "- If the reference answer is text but the model output does not contain text, score 0.",
68 | "- Otherwise, score based on the similarity and correctness of the model output compared to the reference answer.",
69 | "- If both text and image are present, consider both in your evaluation.",
70 | "Please output your score in the format: \\score{X}, where X is a number between 0 and 1.",
71 | "",
72 | f"Question: {question}",
73 | ]
74 | if images:
75 | prompt_lines.append("[See question image(s)]")
76 | prompt_lines.append("")
77 | prompt_lines.append(f"Reference Answer: {reference_answer_text}")
78 | if reference_images:
79 | prompt_lines.append("[See reference answer image(s)]")
80 | prompt_lines.append("")
81 | prompt_lines.append(f"Model Output: {model_output_text}")
82 | if model_output_images:
83 | prompt_lines.append("[See model output image(s)]")
84 | prompt_lines.append("")
85 | prompt_lines.append("Your response:")
86 | text_content = "\n".join(prompt_lines)
87 | # 构建多模态输入
88 | all_images = []
89 | if images:
90 | assert all(
91 | isinstance(p, str) for p in images
92 | ), f"images should be List[str], got {images}"
93 | all_images += prepare_images_for_prompt(images)
94 | if reference_images:
95 | assert all(
96 | isinstance(p, str) for p in reference_images
97 | ), f"reference_images should be List[str], got {reference_images}"
98 | all_images += prepare_images_for_prompt(reference_images)
99 | if model_output_images:
100 | assert all(
101 | isinstance(p, str) for p in model_output_images
102 | ), f"model_output_images should be List[str], got {model_output_images}"
103 | all_images += prepare_images_for_prompt(model_output_images)
104 | if all_images:
105 | return {"text": text_content, "images": all_images}
106 | else:
107 | return text_content
108 |
109 | def _extract_prediction(self, response: str, item: Dict) -> float:
110 | """
111 | 提取 \\score{X}
112 | """
113 | if not response:
114 | return 0.0
115 | score_pattern = r"\\score\{([0-9.]+)\}"
116 | matches = re.findall(score_pattern, response)
117 | if matches:
118 | try:
119 | score = float(matches[-1])
120 | return max(0.0, min(1.0, score))
121 | except Exception:
122 | return 0.0
123 | return 0.0
124 |
125 | def _calculate_accuracy(self, answer: Any, prediction: float, item: Dict) -> bool:
126 | return prediction >= 0.5
127 |
128 | def evaluate(
129 | self,
130 | data_items,
131 | model,
132 | max_out_len=512,
133 | batch_size=None,
134 | save_path="./eval_results",
135 | score_model=None,
136 | ):
137 | """
138 | 两阶段评测:先用被测模型生成答案,再用评分模型打分。
139 | 支持 score_model 参数。
140 | """
141 | score_model = score_model or self.score_model
142 | results = []
143 | print("🚀 Running model inference...")
144 | model_outputs = []
145 | # 1. 让被测模型生成答案(带进度条)
146 | for item in tqdm(data_items, desc="Generating responses", unit="item"):
147 | prompt = self._build_prompt(item)
148 | model_output = model.generate(prompt, max_out_len)
149 | model_outputs.append(model_output)
150 | # 2. 评分阶段(带进度条)
151 | print("📝 Running scoring model...")
152 | for item, model_output in tqdm(
153 | zip(data_items, model_outputs),
154 | total=len(data_items),
155 | desc="Scoring responses",
156 | unit="item",
157 | ):
158 | score_prompt = self._build_score_prompt(item, model_output)
159 | score_response = score_model.generate(score_prompt, max_out_len)
160 | score = self._extract_prediction(score_response, item)
161 | # 3. 保存所有信息
162 | item_result = item.copy()
163 | item_result[self.prediction_key] = score
164 | item_result["model_output"] = model_output
165 | item_result["score_response"] = score_response
166 | item_result["pass"] = self._calculate_accuracy(
167 | item.get("answer", ""), score, item
168 | )
169 | results.append(item_result)
170 | # 4. 保存和统计
171 | saved_files = self._save_results(results, save_path)
172 | metrics = self._calculate_metrics(results)
173 | self._print_results(metrics)
174 | return {
175 | "metrics": metrics,
176 | "saved_files": saved_files,
177 | "total_items": len(results),
178 | }
179 |
--------------------------------------------------------------------------------
/docs/en/api.md:
--------------------------------------------------------------------------------
1 | # API Reference
2 |
3 | SpectrumLab Provides a concise and powerful API interface to help you quickly build deep learning applications in spectroscopy. This document covers the usage of core modules and guidelines for custom extensions.
4 |
5 | ## Benchmark Module
6 |
7 | Benchmark The module is the core of data access in SpectrumLab, providing a unified interface to load and manage spectroscopic benchmarking data at different levels.
8 |
9 | ### Get Benchmark Group
10 |
11 | The `get_benchmark_group` function can be used to obtain benchmark test groups at four different levels:
12 |
13 | ```python
14 | from spectrumlab.benchmark import get_benchmark_group
15 |
16 | signal_group = get_benchmark_group("signal") # Signal layer
17 | perception_group = get_benchmark_group("perception") # Perception layer
18 | semantic_group = get_benchmark_group("semantic") # Semantic layer
19 | generation_group = get_benchmark_group("generation") # Generation layer
20 | ```
21 |
22 | ### Data Access
23 |
24 | Each Benchmark Group provides flexible data access methods:
25 |
26 | ```python
27 | # Get all data
28 | data = signal_group.get_ data_by_subcategories("all")
29 |
30 | # Obtain data for specific sub - categories
31 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
32 |
33 | # Get all available sub-categories of the Benchmark Group
34 | subcategories = signal_group.get_available_subcategories()
35 | print(subcategories)
36 | ```
37 |
38 | **Method description:**
39 |
40 | - `get_data_by_subcategories("all")`: Return the data of all sub - categories at this level
41 | - `get_data_by_subcategories([...])`: Return the data list of the specified subcategory
42 | - `get_available_subcategories()`: View the names of all sub - categories contained in the current level
43 |
44 | ## Model Module
45 |
46 | Model The module provides a unified model interface, supporting the integration of various pre-trained models and custom models.
47 |
48 | ### Use Existing Models
49 |
50 | SpectrumLab Built-in multiple advanced multimodal model interfaces:
51 |
52 | ```python
53 | from spectrumlab.models import GPT4oAPI
54 |
55 | gpt4o = GPT4oAPI()
56 |
57 | response = gpt4o.generate("Your Prompts")
58 | ```
59 |
60 | **Supported models:**
61 |
62 | - `GPT4oAPI`: OpenAI GPT-4o
63 | - `ClaudeAPI`: Anthropic Claude series
64 | - `DeepSeekAPI`: DeepSeek-VL
65 | - `QwenVLAPI`: Qwen-VL series
66 | - `InternVLAPI`: InternVL series
67 |
68 | ### Custom Model
69 |
70 | By inheriting from the `BaseModel` class, you can easily integrate your own model:
71 |
72 | ```python
73 | from spectrumlab.models.base import BaseModel
74 |
75 | class CustomModel(BaseModel):
76 | def __init__(self):
77 | super().__init__()
78 | self.model_name = "CustomModel"
79 |
80 | def generate(self, prompt, max_out_len=512):
81 | # Implement the logic for calling your model
82 | # This could be API calls, local model inference, etc.
83 | return response
84 | ```
85 |
86 | **Custom requirements:**
87 |
88 | - The `generate` method must be implemented
89 | - Support text and multimodal input
90 | - Return the response in string format
91 |
92 | ## Evaluator Module
93 |
94 | Evaluator This module is responsible for the core logic of model evaluation, providing a standardized evaluation process and flexible customization options.
95 |
96 | ### Basic Usage
97 |
98 | For evaluation tasks of the multiple - choice question type, you can directly use `ChoiceEvaluator`:
99 |
100 | ```python
101 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
102 |
103 | evaluator = ChoiceEvaluator()
104 |
105 | results = evaluator.evaluate(
106 | data_items=data,
107 | model=model,
108 | max_out_len=512,
109 | save_path="./eval_results"
110 | )
111 | ```
112 |
113 | **Parameter description:**
114 |
115 | - `data_items`: List of evaluation data
116 | - `model`: Model instance
117 | - `max_out_len`: Maximum output length
118 | - `save_path`: Result saving path
119 |
120 | ### Customize Evaluator
121 |
122 | By inheriting from the `BaseEvaluator` class, you can customize the evaluation logic to meet the requirements of specific tasks:
123 |
124 | ```python
125 | from spectrumlab.evaluator.base import BaseEvaluator
126 |
127 | class CustomEvaluator(BaseEvaluator):
128 | def _build_prompt(self, item):
129 | """Build input prompt words"""
130 | question = item["question"]
131 | choices = item["choices"]
132 | return f"Problem:{question}\nOptions:{choices}\nPlease choose the correct answer.:"
133 |
134 | def _extract_prediction(self, response, item):
135 | """Extract the prediction results from the model response"""
136 | import re
137 | match = re.search(r'\box\{([^}]+)\}', response)
138 | return match.group(1) if match else ""
139 |
140 | def _calculate_accuracy(self, answer, prediction, item):
141 | """Calculate accuracy"""
142 | return answer.strip().lower() == prediction.strip().lower()
143 | ```
144 |
145 | **Core methods:**
146 |
147 | - `_build_prompt`: Build model input based on data items
148 | - `_extract_prediction`: Extract predicted answers from the model output
149 | - `_calculate_accuracy`: Judge whether the prediction is correct
150 |
151 | ## Complete Evaluation Example
152 |
153 | The following is a complete example of the evaluation process, demonstrating the entire process from data loading to result analysis:
154 |
155 | ```python
156 | from spectrumlab.benchmark.signal_group import SignalGroup
157 | from spectrumlab.models import GPT4oAPI
158 | from spectrumlab.evaluator.choice_evaluator import ChoiceEvaluator
159 |
160 | # 1. Load data
161 | signal_group = SignalGroup("data")
162 | data = signal_group.get_data_by_subcategories(["Spectrum Type Classification"])
163 |
164 | # 2. Initialize the model and evaluator
165 | model = GPT4oAPI()
166 | evaluator = ChoiceEvaluator()
167 |
168 | # 3. Run evaluation
169 | results = evaluator.evaluate(
170 | data_items=data,
171 | model=model,
172 | save_path="./evaluation_results"
173 | )
174 |
175 | # 4. View the evaluation results
176 | print(f"Evaluation completed! Overall accuracy: {results['metrics']['overall']['accuracy']:.2f}%")
177 |
178 | # View detailed results
179 | for subcategory, metrics in results['metrics']['subcategory_metrics'].items():
180 | print(f"{subcategory}: {metrics['accuracy']:.2f}% ({metrics['correct']}/{metrics['total']})")
181 | ```
182 |
183 | ## Data Format
184 |
185 | ### Input Data format
186 |
187 | Each data item follows the following format:
188 |
189 | ```python
190 | {
191 | "question": "Based on this infrared spectrogram, what is the most likely compound??",
192 | "choices": ["benzoic acid, benzaldehyde, benzyl alcohol, phenylacetic acid"],
193 | "answer": "benzoic acid",
194 | "image_path": "./data/signal/ir_001.png", # optional
195 | "category": "Chemistry",
196 | "sub_category": "Spectrum Type Classification"
197 | }
198 | ```
199 |
200 | ### Output Result format
201 |
202 | The evaluation results include detailed performance indicators.:
203 |
204 | ```python
205 | {
206 | "metrics": {
207 | "overall": {
208 | "accuracy": 85.5,
209 | "correct": 171,
210 | "total": 200
211 | },
212 | "subcategory_metrics": {
213 | "Spectrum Type Classification": {
214 | "accuracy": 90.0,
215 | "correct": 45,
216 | "total": 50
217 | }
218 | }
219 | },
220 | "saved_files": ["result_001.json"],
221 | "total_items": 200
222 | }
223 | ```
224 |
225 | ## Environment configuration
226 |
227 | Before using the API model, you need to configure the corresponding environment variables:
228 |
229 | ```bash
230 | # OpenAI models
231 | export OPENAI_API_KEY="your_openai_api_key"
232 |
233 | # Anthropic model
234 | export ANTHROPIC_API_KEY="your_anthropic_api_key"
235 |
236 | # DeepSeek model
237 | export DEEPSEEK_API_KEY="your_deepseek_api_key"
238 |
239 | # Other models...
240 | ```
241 |
242 | ## Quick start
243 |
244 | 1. **Install dependencies**:`pip install spectrumlab`
245 | 2. **Configure API key**:Set the corresponding environment variables
246 | 3. **Load data**:Use the Benchmark module to obtain evaluation data
247 | 4. **Select a model**:Initialize the pre-trained model or custom model
248 | 5. **Run evaluation**:Use Evaluator to perform the evaluation and save the results
249 |
--------------------------------------------------------------------------------
/spectrumlab/models/qwen_vl_api.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Any, Optional, Union
2 | from .base_api import BaseAPIModel
3 | from spectrumlab.config import Config
4 | from openai import OpenAI
5 |
6 |
7 | class Qwen_VL_Max(BaseAPIModel):
8 | def __init__(
9 | self,
10 | api_key: Optional[str] = None,
11 | base_url: Optional[str] = None,
12 | model_name: Optional[str] = None,
13 | **kwargs,
14 | ):
15 | config = Config()
16 |
17 | # Use provided parameters or fall back to config
18 | self.api_key = api_key or config.qwen_vl_api_key
19 | self.base_url = base_url or config.qwen_vl_base_url
20 | self.model_name = model_name or config.qwen_vl_model_name
21 |
22 | # Validate that we have required configuration
23 | if not self.api_key:
24 | raise ValueError(
25 | "Qwen-VL-Max API key not found. Please set QWEN_VL_MAX_API_KEY in your .env file "
26 | "or provide api_key parameter."
27 | )
28 |
29 | # Ensure base_url has proper protocol for OpenRouter
30 | if self.base_url and not self.base_url.startswith(("http://", "https://")):
31 | self.base_url = f"https://{self.base_url}"
32 |
33 | self.client = OpenAI(
34 | api_key=self.api_key,
35 | base_url=self.base_url,
36 | )
37 |
38 | # Initialize parent class
39 | super().__init__(model_name=self.model_name, **kwargs)
40 |
41 | def generate(
42 | self,
43 | prompt: Union[str, Dict[str, Any]],
44 | max_tokens: int = 512,
45 | **generation_kwargs,
46 | ) -> str:
47 | """
48 | Generate response supporting both text and multimodal input.
49 |
50 | Args:
51 | prompt: Either text string or multimodal dict
52 | max_tokens: Maximum tokens to generate
53 | **generation_kwargs: Additional generation parameters like temperature, top_p, etc.
54 |
55 | Returns:
56 | Generated response string
57 | """
58 | messages = []
59 |
60 | # Handle multimodal vs text-only prompts
61 | if isinstance(prompt, dict) and "images" in prompt:
62 | # Multimodal prompt
63 | content = []
64 |
65 | content.append({"type": "text", "text": prompt["text"]})
66 |
67 | for image_data in prompt["images"]:
68 | content.append(image_data)
69 |
70 | messages.append({"role": "user", "content": content})
71 | else:
72 | # Text-only prompt
73 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
74 | messages.append({"role": "user", "content": text_content})
75 |
76 | # Prepare API call parameters
77 | api_params = {
78 | "model": self.model_name,
79 | "messages": messages,
80 | "max_tokens": max_tokens,
81 | }
82 |
83 | # Add any additional generation parameters
84 | api_params.update(generation_kwargs)
85 |
86 | try:
87 | response = self.client.chat.completions.create(**api_params)
88 | return response.choices[0].message.content
89 | except Exception as e:
90 | raise RuntimeError(f"Qwen-VL-Max API call failed: {e}")
91 |
92 |
93 | class Qwen_2_5_VL_32B(BaseAPIModel):
94 | def __init__(
95 | self,
96 | api_key: Optional[str] = None,
97 | base_url: Optional[str] = None,
98 | model_name: Optional[str] = None,
99 | **kwargs,
100 | ):
101 | config = Config()
102 |
103 | # Use provided parameters or fall back to config
104 | self.api_key = api_key or config.qwen_2_5_vl_32b_api_key
105 | self.base_url = base_url or config.qwen_2_5_vl_32b_base_url
106 | self.model_name = model_name or config.qwen_2_5_vl_32b_model_name
107 |
108 | # Validate that we have required configuration
109 | if not self.api_key:
110 | raise ValueError(
111 | "Qwen-2.5-VL-32B API key not found. Please set QWEN_2_5_VL_32B_API_KEY in your .env file "
112 | "or provide api_key parameter."
113 | )
114 |
115 | # Ensure base_url has proper protocol for OpenRouter
116 | if self.base_url and not self.base_url.startswith(("http://", "https://")):
117 | self.base_url = f"https://{self.base_url}"
118 |
119 | self.client = OpenAI(
120 | api_key=self.api_key,
121 | base_url=self.base_url,
122 | )
123 |
124 | # Initialize parent class
125 | super().__init__(model_name=self.model_name, **kwargs)
126 |
127 | def generate(
128 | self,
129 | prompt: Union[str, Dict[str, Any]],
130 | max_tokens: int = 512,
131 | **generation_kwargs,
132 | ) -> str:
133 | """
134 | Generate response supporting both text and multimodal input.
135 |
136 | Args:
137 | prompt: Either text string or multimodal dict
138 | max_tokens: Maximum tokens to generate
139 | **generation_kwargs: Additional generation parameters like temperature, top_p, etc.
140 |
141 | Returns:
142 | Generated response string
143 | """
144 | messages = []
145 |
146 | # Handle multimodal vs text-only prompts
147 | if isinstance(prompt, dict) and "images" in prompt:
148 | # Multimodal prompt
149 | content = []
150 |
151 | content.append({"type": "text", "text": prompt["text"]})
152 |
153 | for image_data in prompt["images"]:
154 | content.append(image_data)
155 |
156 | messages.append({"role": "user", "content": content})
157 | else:
158 | # Text-only prompt
159 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
160 | messages.append({"role": "user", "content": text_content})
161 |
162 | # Prepare API call parameters
163 | api_params = {
164 | "model": self.model_name,
165 | "messages": messages,
166 | "max_tokens": max_tokens,
167 | }
168 |
169 | # Add any additional generation parameters
170 | api_params.update(generation_kwargs)
171 |
172 | try:
173 | response = self.client.chat.completions.create(**api_params)
174 | return response.choices[0].message.content
175 | except Exception as e:
176 | raise RuntimeError(f"Qwen-2.5-VL-32B API call failed: {e}")
177 |
178 |
179 | class Qwen_2_5_VL_72B(BaseAPIModel):
180 | def __init__(
181 | self,
182 | api_key: Optional[str] = None,
183 | base_url: Optional[str] = None,
184 | model_name: Optional[str] = None,
185 | **kwargs,
186 | ):
187 | config = Config()
188 |
189 | # Use provided parameters or fall back to config
190 | self.api_key = api_key or config.qwen_2_5_vl_72b_api_key
191 | self.base_url = base_url or config.qwen_2_5_vl_72b_base_url
192 | self.model_name = model_name or config.qwen_2_5_vl_72b_model_name
193 |
194 | # Validate that we have required configuration
195 | if not self.api_key:
196 | raise ValueError(
197 | "Qwen-2.5-VL-72B API key not found. Please set QWEN_2_5_VL_72B_API_KEY in your .env file "
198 | "or provide api_key parameter."
199 | )
200 |
201 | # Ensure base_url has proper protocol for OpenRouter
202 | if self.base_url and not self.base_url.startswith(("http://", "https://")):
203 | self.base_url = f"https://{self.base_url}"
204 |
205 | self.client = OpenAI(
206 | api_key=self.api_key,
207 | base_url=self.base_url,
208 | )
209 |
210 | # Initialize parent class
211 | super().__init__(model_name=self.model_name, **kwargs)
212 |
213 | def generate(
214 | self,
215 | prompt: Union[str, Dict[str, Any]],
216 | max_tokens: int = 512,
217 | **generation_kwargs,
218 | ) -> str:
219 | """
220 | Generate response supporting both text and multimodal input.
221 |
222 | Args:
223 | prompt: Either text string or multimodal dict
224 | max_tokens: Maximum tokens to generate
225 | **generation_kwargs: Additional generation parameters like temperature, top_p, etc.
226 |
227 | Returns:
228 | Generated response string
229 | """
230 | messages = []
231 |
232 | # Handle multimodal vs text-only prompts
233 | if isinstance(prompt, dict) and "images" in prompt:
234 | # Multimodal prompt
235 | content = []
236 |
237 | content.append({"type": "text", "text": prompt["text"]})
238 |
239 | for image_data in prompt["images"]:
240 | content.append(image_data)
241 |
242 | messages.append({"role": "user", "content": content})
243 | else:
244 | # Text-only prompt
245 | text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
246 | messages.append({"role": "user", "content": text_content})
247 |
248 | # Prepare API call parameters
249 | api_params = {
250 | "model": self.model_name,
251 | "messages": messages,
252 | "max_tokens": max_tokens,
253 | }
254 |
255 | # Add any additional generation parameters
256 | api_params.update(generation_kwargs)
257 |
258 | try:
259 | response = self.client.chat.completions.create(**api_params)
260 | return response.choices[0].message.content
261 | except Exception as e:
262 | raise RuntimeError(f"Qwen-2.5-VL-72B API call failed: {e}")
263 |
--------------------------------------------------------------------------------