├── .npmrc ├── postcss.config.mjs ├── public ├── vercel.svg ├── window.svg ├── file.svg ├── globe.svg └── next.svg ├── src ├── lib │ ├── adapters │ │ ├── index.ts │ │ ├── retriever-to-langchain.ts │ │ ├── tool-to-langchain.ts │ │ └── __tests__ │ │ │ ├── tool-to-langchain.test.ts │ │ │ └── retriever-to-langchain.test.ts │ ├── index.ts │ ├── rag │ │ ├── batch-embedding-generator.ts │ │ ├── rag-module.ts │ │ ├── langchain-rag-retriever.ts │ │ ├── document-loader.ts │ │ ├── code-documentation-store.ts │ │ ├── mock-data-generator.ts │ │ ├── __tests__ │ │ │ └── langchain-rag.test.ts │ │ └── bm25-retriever.ts │ ├── hierarchical-graph.ts │ ├── tools │ │ ├── keyword-search-tool.ts │ │ └── base.ts │ ├── metrics.ts │ └── __tests__ │ │ ├── metrics.test.ts │ │ ├── bm25-retriever.test.ts │ │ └── tool-base.test.ts ├── app │ ├── layout.tsx │ ├── page.tsx │ ├── api │ │ ├── rag │ │ │ ├── vector-embedding │ │ │ │ └── route.ts │ │ │ ├── vector-embedding-batch │ │ │ │ └── route.ts │ │ │ ├── vector-search │ │ │ │ └── route.ts │ │ │ ├── execute │ │ │ │ └── route.ts │ │ │ ├── rewrite │ │ │ │ └── route.ts │ │ │ ├── vector-store-init │ │ │ │ └── route.ts │ │ │ ├── generate-candidates │ │ │ │ └── route.ts │ │ │ ├── retrieve │ │ │ │ └── route.ts │ │ │ ├── generate-documents │ │ │ │ └── route.ts │ │ │ └── generate-code-documentation │ │ │ │ └── route.ts │ │ └── chat │ │ │ └── route.ts │ ├── globals.css │ ├── treesitter-playground │ │ └── page.tsx │ └── rag-keyword-playground │ │ └── page.tsx ├── components │ ├── layout │ │ ├── LoadingSpinner.tsx │ │ ├── ScrollToTop.tsx │ │ ├── Sidebar.tsx │ │ ├── PageHeader.tsx │ │ ├── PageTransition.tsx │ │ ├── StatusIndicator.tsx │ │ ├── AppLayout.tsx │ │ ├── Breadcrumbs.tsx │ │ └── QuickNavigation.tsx │ ├── treesitter │ │ ├── LanguageSelector.tsx │ │ ├── QueryResults.tsx │ │ ├── TreeViewer.tsx │ │ └── CodeEditor.tsx │ ├── WorkbenchProvider.tsx │ ├── panels │ │ └── InteractionPanel.tsx │ └── rag-playground │ │ ├── InteractionPanel.tsx │ │ └── ResultsPanel.tsx └── __tests__ │ ├── vector-search.test.ts │ └── structured-output.test.ts ├── .prettierrc ├── jest.config.js ├── .prettierignore ├── .husky └── pre-push ├── tsconfig.json ├── .gitignore ├── eslint.config.mjs ├── next.config.ts ├── .github ├── workflows │ └── ci.yml └── CONTRIBUTING.md ├── README.md ├── playwright.config.ts ├── package.json └── tests └── navigation.test.ts /.npmrc: -------------------------------------------------------------------------------- 1 | legacy-peer-deps=true 2 | 3 | -------------------------------------------------------------------------------- /postcss.config.mjs: -------------------------------------------------------------------------------- 1 | const config = { 2 | plugins: ['@tailwindcss/postcss'], 3 | }; 4 | 5 | export default config; 6 | -------------------------------------------------------------------------------- /public/vercel.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/lib/adapters/index.ts: -------------------------------------------------------------------------------- 1 | export { ToolToLangChainAdapter } from './tool-to-langchain'; 2 | export { BM25RetrieverAdapter } from './retriever-to-langchain'; 3 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "semi": true, 3 | "trailingComma": "es5", 4 | "singleQuote": true, 5 | "printWidth": 100, 6 | "tabWidth": 2, 7 | "useTabs": false, 8 | "arrowParens": "always", 9 | "endOfLine": "lf", 10 | "bracketSpacing": true, 11 | "bracketSameLine": false, 12 | "proseWrap": "preserve" 13 | } 14 | -------------------------------------------------------------------------------- /public/window.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/file.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | preset: 'ts-jest', 3 | testEnvironment: 'node', 4 | roots: ['/src'], 5 | testMatch: ['**/__tests__/**/*.test.ts'], 6 | moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], 7 | moduleNameMapper: { 8 | '^@/(.*)$': '/src/$1', 9 | }, 10 | collectCoverageFrom: ['src/**/*.ts', '!src/**/*.d.ts', '!src/**/__tests__/**'], 11 | coveragePathIgnorePatterns: ['/node_modules/', '/__tests__/'], 12 | setupFilesAfterEnv: ['/src/__tests__/setup.ts'], 13 | }; 14 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | node_modules/ 3 | pnpm-lock.yaml 4 | package-lock.json 5 | yarn.lock 6 | 7 | # Build outputs 8 | .next/ 9 | out/ 10 | build/ 11 | dist/ 12 | 13 | # Cache directories 14 | .turbo/ 15 | .cache/ 16 | .vercel/ 17 | 18 | # Test coverage 19 | coverage/ 20 | .nyc_output/ 21 | 22 | # Generated files 23 | next-env.d.ts 24 | 25 | # Logs 26 | *.log 27 | 28 | # OS files 29 | .DS_Store 30 | 31 | # IDE 32 | .vscode/ 33 | .idea/ 34 | 35 | # Playwright 36 | playwright-report/ 37 | test-results/ 38 | 39 | # Public assets 40 | public/wasm/ 41 | 42 | # Documentation (optional, remove if you want to format these) 43 | *.md 44 | -------------------------------------------------------------------------------- /.husky/pre-push: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | . "$(dirname "$0")/_/husky.sh" 3 | 4 | echo "🔍 Running pre-push checks..." 5 | 6 | # Run lint 7 | echo "📝 Checking lint..." 8 | npm run lint 9 | if [ $? -ne 0 ]; then 10 | echo "❌ Lint failed. Please fix the issues before pushing." 11 | exit 1 12 | fi 13 | 14 | # Run build 15 | echo "🔨 Building..." 16 | npm run build 17 | if [ $? -ne 0 ]; then 18 | echo "❌ Build failed. Please fix the issues before pushing." 19 | exit 1 20 | fi 21 | 22 | # Run tests 23 | echo "🧪 Running tests..." 24 | npm run test 25 | if [ $? -ne 0 ]; then 26 | echo "❌ Tests failed. Please fix the issues before pushing." 27 | exit 1 28 | fi 29 | 30 | echo "✅ All checks passed! Pushing..." 31 | 32 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2017", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "preserve", 15 | "incremental": true, 16 | "plugins": [ 17 | { 18 | "name": "next" 19 | } 20 | ], 21 | "paths": { 22 | "@/*": ["./src/*"] 23 | } 24 | }, 25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], 26 | "exclude": ["node_modules"] 27 | } 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.* 7 | .yarn/* 8 | !.yarn/patches 9 | !.yarn/plugins 10 | !.yarn/releases 11 | !.yarn/versions 12 | 13 | # testing 14 | /coverage 15 | 16 | # next.js 17 | /.next/ 18 | /out/ 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | .pnpm-debug.log* 32 | 33 | # env files (can opt-in for committing if needed) 34 | .env* 35 | 36 | # vercel 37 | .vercel 38 | 39 | # typescript 40 | *.tsbuildinfo 41 | next-env.d.ts 42 | .idea 43 | playwright-report 44 | test-results 45 | public/wasm -------------------------------------------------------------------------------- /src/lib/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Context Engineer Workbench - 核心库导出 3 | */ 4 | 5 | // Tool 相关导出 6 | export { Tool, ToolRegistry } from './tools/base'; 7 | 8 | export type { ToolDefinition, ToolParameterSchema, ToolExecutionResult } from './tools/base'; 9 | 10 | export { KeywordSearchTool } from './tools/keyword-search-tool'; 11 | 12 | // RAG 相关导出 13 | export { BM25Retriever } from './rag/bm25-retriever'; 14 | 15 | export type { DocumentChunk, RetrievalResult, RetrievalResultItem } from './rag/bm25-retriever'; 16 | 17 | export { RAGModule, createRAGModule } from './rag/rag-module'; 18 | 19 | export type { RAGModuleConfig } from './rag/rag-module'; 20 | 21 | // LangChain 适配器导出 22 | export { ToolToLangChainAdapter, BM25RetrieverAdapter } from './adapters'; 23 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import { dirname } from 'path'; 2 | import { fileURLToPath } from 'url'; 3 | import { FlatCompat } from '@eslint/eslintrc'; 4 | 5 | const __filename = fileURLToPath(import.meta.url); 6 | const __dirname = dirname(__filename); 7 | 8 | const compat = new FlatCompat({ 9 | baseDirectory: __dirname, 10 | }); 11 | 12 | const eslintConfig = [ 13 | ...compat.extends('next/core-web-vitals', 'next/typescript'), 14 | ...compat.extends('plugin:prettier/recommended'), 15 | { 16 | ignores: [ 17 | 'node_modules/**', 18 | '.next/**', 19 | 'out/**', 20 | 'build/**', 21 | 'next-env.d.ts', 22 | 'playwright-report/**', 23 | 'test-results/**', 24 | 'public/wasm/**', 25 | ], 26 | }, 27 | ]; 28 | 29 | export default eslintConfig; 30 | -------------------------------------------------------------------------------- /src/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import type { Metadata } from 'next'; 2 | import { Geist, Geist_Mono } from 'next/font/google'; 3 | import './globals.css'; 4 | 5 | const geistSans = Geist({ 6 | variable: '--font-geist-sans', 7 | subsets: ['latin'], 8 | }); 9 | 10 | const geistMono = Geist_Mono({ 11 | variable: '--font-geist-mono', 12 | subsets: ['latin'], 13 | }); 14 | 15 | export const metadata: Metadata = { 16 | title: 'Context Engineer Workbench', 17 | description: 'An interactive learning platform for context engineering', 18 | }; 19 | 20 | export default function RootLayout({ children }: { children: React.ReactNode }) { 21 | return ( 22 | 23 | {children} 24 | 25 | ); 26 | } 27 | -------------------------------------------------------------------------------- /public/globe.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/app/page.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import Workbench from '@/components/Workbench'; 4 | import AppLayout from '@/components/layout/AppLayout'; 5 | import ConfigPanel from '@/components/panels/ConfigPanel'; 6 | import { WorkbenchProvider, useWorkbench } from '@/components/WorkbenchProvider'; 7 | 8 | export default function Home() { 9 | return ( 10 | 11 | }> 12 | 13 | 14 | 15 | ); 16 | } 17 | 18 | // Sidebar content for workbench 19 | function WorkbenchSidebar() { 20 | const { config, updateConfig, updateRAGConfig, updateMemoryConfig } = useWorkbench(); 21 | 22 | return ( 23 |
24 | void} 27 | onRAGConfigChange={updateRAGConfig as (updates: unknown) => void} 28 | onMemoryConfigChange={updateMemoryConfig as (updates: unknown) => void} 29 | /> 30 |
31 | ); 32 | } 33 | -------------------------------------------------------------------------------- /src/components/layout/LoadingSpinner.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | 5 | interface LoadingSpinnerProps { 6 | size?: 'sm' | 'md' | 'lg'; 7 | message?: string; 8 | fullScreen?: boolean; 9 | } 10 | 11 | export default function LoadingSpinner({ 12 | size = 'md', 13 | message = 'Loading...', 14 | fullScreen = false, 15 | }: LoadingSpinnerProps) { 16 | const sizeClasses = { 17 | sm: 'w-4 h-4', 18 | md: 'w-8 h-8', 19 | lg: 'w-12 h-12', 20 | }; 21 | 22 | const containerClasses = fullScreen 23 | ? 'fixed inset-0 bg-white bg-opacity-75 flex items-center justify-center z-50' 24 | : 'flex items-center justify-center p-8'; 25 | 26 | return ( 27 |
28 |
29 |
36 | {message &&

{message}

} 37 |
38 |
39 | ); 40 | } 41 | -------------------------------------------------------------------------------- /next.config.ts: -------------------------------------------------------------------------------- 1 | import type { NextConfig } from 'next'; 2 | import fs from 'fs'; 3 | import path from 'path'; 4 | 5 | // Copy WASM files from node_modules to public directory 6 | function copyWasmFiles() { 7 | const sourceDir = path.join(process.cwd(), 'node_modules/@unit-mesh/treesitter-artifacts/wasm'); 8 | const targetDir = path.join(process.cwd(), 'public/wasm'); 9 | 10 | // Create target directory if it doesn't exist 11 | if (!fs.existsSync(targetDir)) { 12 | fs.mkdirSync(targetDir, { recursive: true }); 13 | } 14 | 15 | // Copy all WASM files 16 | if (fs.existsSync(sourceDir)) { 17 | const files = fs.readdirSync(sourceDir); 18 | files.forEach((file) => { 19 | if (file.endsWith('.wasm')) { 20 | const source = path.join(sourceDir, file); 21 | const target = path.join(targetDir, file); 22 | fs.copyFileSync(source, target); 23 | console.log(`Copied ${file} to public/wasm/`); 24 | } 25 | }); 26 | } 27 | } 28 | 29 | // Run on build 30 | if (process.env.NODE_ENV === 'production' || process.env.NEXT_PHASE === 'phase-production-build') { 31 | copyWasmFiles(); 32 | } 33 | 34 | const nextConfig: NextConfig = { 35 | /* config options here */ 36 | }; 37 | 38 | export default nextConfig; 39 | -------------------------------------------------------------------------------- /src/components/layout/ScrollToTop.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React, { useState, useEffect } from 'react'; 4 | import { ChevronUpIcon } from '@heroicons/react/24/outline'; 5 | 6 | export default function ScrollToTop() { 7 | const [isVisible, setIsVisible] = useState(false); 8 | 9 | // Show button when page is scrolled down 10 | useEffect(() => { 11 | const toggleVisibility = () => { 12 | if (window.pageYOffset > 300) { 13 | setIsVisible(true); 14 | } else { 15 | setIsVisible(false); 16 | } 17 | }; 18 | 19 | window.addEventListener('scroll', toggleVisibility); 20 | return () => window.removeEventListener('scroll', toggleVisibility); 21 | }, []); 22 | 23 | const scrollToTop = () => { 24 | window.scrollTo({ 25 | top: 0, 26 | behavior: 'smooth', 27 | }); 28 | }; 29 | 30 | if (!isVisible) { 31 | return null; 32 | } 33 | 34 | return ( 35 | 42 | ); 43 | } 44 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [main, master, develop] 6 | pull_request: 7 | branches: [main, master, develop] 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: pnpm/action-setup@v4 15 | with: 16 | version: 9 17 | - uses: actions/setup-node@v4 18 | with: 19 | node-version: '20' 20 | cache: 'pnpm' 21 | - run: pnpm install --frozen-lockfile 22 | - run: pnpm run lint 23 | 24 | build: 25 | runs-on: ubuntu-latest 26 | steps: 27 | - uses: actions/checkout@v4 28 | - uses: pnpm/action-setup@v4 29 | with: 30 | version: 9 31 | - uses: actions/setup-node@v4 32 | with: 33 | node-version: '20' 34 | cache: 'pnpm' 35 | - run: pnpm install --frozen-lockfile 36 | - run: pnpm run build 37 | 38 | test: 39 | runs-on: ubuntu-latest 40 | steps: 41 | - uses: actions/checkout@v4 42 | - uses: pnpm/action-setup@v4 43 | with: 44 | version: 9 45 | - uses: actions/setup-node@v4 46 | with: 47 | node-version: '20' 48 | cache: 'pnpm' 49 | - run: pnpm install --frozen-lockfile 50 | - run: pnpm run test 51 | 52 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # 贡献指南 2 | 3 | ## 本地开发工作流 4 | 5 | ### 1. 安装依赖 6 | ```bash 7 | npm install --legacy-peer-deps 8 | ``` 9 | 10 | ### 2. 本地开发 11 | ```bash 12 | npm run dev 13 | ``` 14 | 15 | ### 3. 推送前检查 16 | 17 | 在推送代码前,系统会自动运行以下检查(通过 pre-push hook): 18 | 19 | - **Lint 检查** - 代码风格和质量检查 20 | - **Build 检查** - 确保代码能成功编译 21 | - **测试** - 运行所有单元测试 22 | 23 | 如果任何检查失败,推送将被阻止。请修复问题后重新推送。 24 | 25 | ### 4. 手动运行检查 26 | 27 | ```bash 28 | # 运行 lint 29 | npm run lint 30 | 31 | # 修复 lint 问题 32 | npm run lint -- --fix 33 | 34 | # 构建项目 35 | npm run build 36 | 37 | # 运行测试 38 | npm run test 39 | 40 | # 运行 E2E 测试 41 | npm run test:e2e 42 | ``` 43 | 44 | ## GitHub Actions CI 45 | 46 | 所有推送和 PR 都会自动触发 GitHub Actions 工作流,包括: 47 | 48 | - **Lint** - 代码质量检查 49 | - **Build** - 编译检查 50 | - **Test** - 单元测试 51 | 52 | 所有检查必须通过才能合并 PR。 53 | 54 | ## 常见问题 55 | 56 | ### Pre-push hook 失败 57 | 58 | 如果 pre-push hook 失败,请: 59 | 60 | 1. 查看错误信息 61 | 2. 修复相应的问题 62 | 3. 重新尝试推送 63 | 64 | ### 跳过 pre-push hook 65 | 66 | 如果需要临时跳过 hook(不推荐),可以使用: 67 | 68 | ```bash 69 | git push --no-verify 70 | ``` 71 | 72 | 但这样会跳过 GitHub Actions 的检查,可能导致 CI 失败。 73 | 74 | ## 最佳实践 75 | 76 | 1. **经常运行本地检查** - 在推送前运行 `npm run lint` 和 `npm run build` 77 | 2. **编写测试** - 为新功能添加测试 78 | 3. **保持代码整洁** - 使用 `npm run lint -- --fix` 自动修复问题 79 | 4. **查看 CI 日志** - 如果 GitHub Actions 失败,查看详细日志 80 | 81 | -------------------------------------------------------------------------------- /public/next.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/app/api/rag/vector-embedding/route.ts: -------------------------------------------------------------------------------- 1 | import { NextRequest, NextResponse } from 'next/server'; 2 | import { generateSingleEmbedding } from '@/lib/rag/batch-embedding-generator'; 3 | 4 | /** 5 | * POST /api/rag/vector-embedding 6 | * Generate vector embedding for text using GLM embedding-3 model 7 | */ 8 | export async function POST(request: NextRequest) { 9 | try { 10 | const { text } = await request.json(); 11 | 12 | if (!text || typeof text !== 'string') { 13 | return NextResponse.json( 14 | { error: 'Text parameter is required and must be a string' }, 15 | { status: 400 } 16 | ); 17 | } 18 | 19 | // Generate embedding using batch generator 20 | const embedding = await generateSingleEmbedding(text); 21 | 22 | return NextResponse.json({ 23 | success: true, 24 | text, 25 | embedding, 26 | model: 'embedding-3', 27 | timestamp: Date.now(), 28 | usage: { 29 | promptTokens: Math.ceil(text.length / 4), 30 | totalTokens: Math.ceil(text.length / 4), 31 | }, 32 | }); 33 | } catch (error) { 34 | console.error('Vector embedding error:', error); 35 | return NextResponse.json( 36 | { 37 | error: error instanceof Error ? error.message : 'Unknown error', 38 | }, 39 | { status: 500 } 40 | ); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/app/globals.css: -------------------------------------------------------------------------------- 1 | @import "tailwindcss"; 2 | 3 | :root { 4 | --background: #ffffff; 5 | --foreground: #171717; 6 | } 7 | 8 | @theme inline { 9 | --color-background: var(--background); 10 | --color-foreground: var(--foreground); 11 | --font-sans: var(--font-geist-sans); 12 | --font-mono: var(--font-geist-mono); 13 | } 14 | 15 | @media (prefers-color-scheme: dark) { 16 | :root { 17 | --background: #0a0a0a; 18 | --foreground: #ededed; 19 | } 20 | } 21 | 22 | body { 23 | background: var(--background); 24 | color: var(--foreground); 25 | font-family: Arial, Helvetica, sans-serif; 26 | } 27 | 28 | /* CodeMirror Styles */ 29 | .cm-editor { 30 | height: 100%; 31 | font-family: 'Courier New', monospace; 32 | font-size: 14px; 33 | } 34 | 35 | .cm-content { 36 | padding: 12px 0; 37 | } 38 | 39 | .cm-gutters { 40 | background-color: #f5f5f5; 41 | border-right: 1px solid #e0e0e0; 42 | } 43 | 44 | .cm-activeLineGutter { 45 | background-color: #e8f4f8; 46 | } 47 | 48 | .cm-cursor { 49 | border-left: 2px solid #333; 50 | } 51 | 52 | .cm-selectionBackground { 53 | background-color: rgba(39, 95, 255, 0.3); 54 | } 55 | 56 | .cm-line { 57 | padding: 0 12px; 58 | } 59 | 60 | /* Capture name highlighting */ 61 | .cm-capture-name { 62 | font-weight: bold; 63 | } 64 | -------------------------------------------------------------------------------- /src/app/treesitter-playground/page.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React, { useState, useEffect } from 'react'; 4 | import AppLayout from '@/components/layout/AppLayout'; 5 | import PageHeader from '@/components/layout/PageHeader'; 6 | import TreeSitterPlayground from '@/components/treesitter/TreeSitterPlayground'; 7 | 8 | export default function TreeSitterPlaygroundPage() { 9 | const [isClient, setIsClient] = useState(false); 10 | 11 | useEffect(() => { 12 | setIsClient(true); 13 | }, []); 14 | 15 | if (!isClient) { 16 | return ( 17 | 18 |
19 |
20 |
21 |

Loading TreeSitter Playground...

22 |
23 |
24 |
25 | ); 26 | } 27 | 28 | return ( 29 | 30 | 36 | 37 | 38 | ); 39 | } 40 | -------------------------------------------------------------------------------- /src/app/api/rag/vector-embedding-batch/route.ts: -------------------------------------------------------------------------------- 1 | import { NextRequest, NextResponse } from 'next/server'; 2 | import { generateBatchEmbeddings } from '@/lib/rag/batch-embedding-generator'; 3 | 4 | /** 5 | * POST /api/rag/vector-embedding-batch 6 | * Generate embeddings for multiple texts using GLM API 7 | */ 8 | export async function POST(request: NextRequest) { 9 | try { 10 | const { texts } = await request.json(); 11 | 12 | if (!Array.isArray(texts) || texts.length === 0) { 13 | return NextResponse.json( 14 | { error: 'texts array is required and must not be empty' }, 15 | { status: 400 } 16 | ); 17 | } 18 | 19 | // Generate embeddings for all texts 20 | const embeddingResults = await generateBatchEmbeddings(texts); 21 | 22 | // Extract just the embeddings 23 | const embeddings = embeddingResults.map((result) => result.embedding); 24 | 25 | return NextResponse.json({ 26 | success: true, 27 | count: embeddings.length, 28 | embeddings, 29 | model: 'embedding-3', 30 | timestamp: Date.now(), 31 | usage: { 32 | promptTokens: texts.reduce((sum, text) => sum + Math.ceil(text.length / 4), 0), 33 | }, 34 | }); 35 | } catch (error) { 36 | console.error('Batch embedding error:', error); 37 | return NextResponse.json( 38 | { 39 | error: error instanceof Error ? error.message : 'Unknown error', 40 | }, 41 | { status: 500 } 42 | ); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/lib/adapters/retriever-to-langchain.ts: -------------------------------------------------------------------------------- 1 | import { BaseRetriever } from '@langchain/core/retrievers'; 2 | import { Document } from '@langchain/core/documents'; 3 | import { BM25Retriever } from '../rag/bm25-retriever'; 4 | 5 | /** 6 | * 将我们的 BM25Retriever 适配为 LangChain BaseRetriever 7 | * 保持我们的抽象,让 LangChain.js 适配我们的实现 8 | */ 9 | export class BM25RetrieverAdapter extends BaseRetriever { 10 | lc_namespace = ['context_engineer', 'retrievers']; 11 | 12 | private bm25Retriever: BM25Retriever; 13 | private topK: number; 14 | 15 | constructor(bm25Retriever: BM25Retriever, topK: number = 3) { 16 | super(); 17 | this.bm25Retriever = bm25Retriever; 18 | this.topK = topK; 19 | } 20 | 21 | async _getRelevantDocuments(query: string): Promise { 22 | const result = await this.bm25Retriever.retrieve(query, this.topK); 23 | 24 | return result.chunks.map( 25 | (item) => 26 | new Document({ 27 | pageContent: item.chunk.content, 28 | metadata: { 29 | ...item.chunk.metadata, 30 | score: item.score, 31 | rank: item.rank, 32 | id: item.chunk.id, 33 | }, 34 | }) 35 | ); 36 | } 37 | 38 | /** 39 | * 获取原始的 BM25Retriever 对象 40 | */ 41 | getOriginalRetriever(): BM25Retriever { 42 | return this.bm25Retriever; 43 | } 44 | 45 | /** 46 | * 获取检索结果(包含评分信息) 47 | */ 48 | async retrieveWithScores(query: string) { 49 | return this.bm25Retriever.retrieve(query, this.topK); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/components/layout/Sidebar.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | import { XMarkIcon } from '@heroicons/react/24/outline'; 5 | 6 | interface SidebarProps { 7 | isOpen: boolean; 8 | onClose: () => void; 9 | content?: React.ReactNode; 10 | } 11 | 12 | export default function Sidebar({ isOpen, onClose, content }: SidebarProps) { 13 | return ( 14 | <> 15 | {/* Mobile backdrop */} 16 | {isOpen && ( 17 |
18 | )} 19 | 20 | {/* Sidebar */} 21 |
29 | {/* Mobile close button */} 30 |
31 | 38 |
39 | 40 | {/* Sidebar content */} 41 |
{content}
42 |
43 | 44 | ); 45 | } 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app). 2 | 3 | ## Getting Started 4 | 5 | First, run the development server: 6 | 7 | ```bash 8 | npm run dev 9 | # or 10 | yarn dev 11 | # or 12 | pnpm dev 13 | # or 14 | bun dev 15 | ``` 16 | 17 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. 18 | 19 | You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file. 20 | 21 | This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel. 22 | 23 | ## Learn More 24 | 25 | To learn more about Next.js, take a look at the following resources: 26 | 27 | - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. 28 | - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. 29 | 30 | You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome! 31 | 32 | ## Deploy on Vercel 33 | 34 | The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js. 35 | 36 | Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details. 37 | -------------------------------------------------------------------------------- /src/components/layout/PageHeader.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | import Breadcrumbs from './Breadcrumbs'; 5 | 6 | interface BreadcrumbItem { 7 | name: string; 8 | href?: string; 9 | icon?: React.ReactNode; 10 | } 11 | 12 | interface PageHeaderProps { 13 | title: string; 14 | description: string; 15 | flowDescription?: string; 16 | breadcrumbs?: BreadcrumbItem[]; 17 | actions?: React.ReactNode; 18 | } 19 | 20 | export default function PageHeader({ 21 | title, 22 | description, 23 | flowDescription, 24 | breadcrumbs, 25 | actions, 26 | }: PageHeaderProps) { 27 | return ( 28 |
29 |
30 | {/* Breadcrumbs */} 31 | 32 | 33 | {/* Header content */} 34 |
35 |
36 |

{title}

37 |

{description}

38 | {flowDescription && ( 39 |

40 | Flow: {flowDescription} 41 |

42 | )} 43 |
44 | 45 | {/* Actions */} 46 | {actions &&
{actions}
} 47 |
48 |
49 |
50 | ); 51 | } 52 | -------------------------------------------------------------------------------- /src/components/layout/PageTransition.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React, { useEffect, useState } from 'react'; 4 | import { usePathname } from 'next/navigation'; 5 | 6 | interface PageTransitionProps { 7 | children: React.ReactNode; 8 | } 9 | 10 | export default function PageTransition({ children }: PageTransitionProps) { 11 | const pathname = usePathname(); 12 | const [isLoading, setIsLoading] = useState(false); 13 | const [displayChildren, setDisplayChildren] = useState(children); 14 | 15 | useEffect(() => { 16 | setIsLoading(true); 17 | 18 | // Small delay to show transition effect 19 | const timer = setTimeout(() => { 20 | setDisplayChildren(children); 21 | setIsLoading(false); 22 | }, 150); 23 | 24 | return () => clearTimeout(timer); 25 | }, [pathname, children]); 26 | 27 | return ( 28 |
29 | {/* Loading overlay */} 30 | {isLoading && ( 31 |
32 |
33 |
34 |

Loading...

35 |
36 |
37 | )} 38 | 39 | {/* Page content with fade transition */} 40 |
46 | {displayChildren} 47 |
48 |
49 | ); 50 | } 51 | -------------------------------------------------------------------------------- /src/components/treesitter/LanguageSelector.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | 5 | interface LanguageSelectorProps { 6 | value: string; 7 | onChange: (language: string) => void; 8 | } 9 | 10 | const LANGUAGES = [ 11 | { value: 'bash', label: 'Bash' }, 12 | { value: 'c', label: 'C' }, 13 | { value: 'cpp', label: 'C++' }, 14 | { value: 'c_sharp', label: 'C#' }, 15 | { value: 'go', label: 'Go' }, 16 | { value: 'java', label: 'Java' }, 17 | { value: 'javascript', label: 'JavaScript' }, 18 | { value: 'kotlin', label: 'Kotlin' }, 19 | { value: 'lua', label: 'Lua' }, 20 | { value: 'php', label: 'PHP' }, 21 | { value: 'python', label: 'Python' }, 22 | { value: 'rescript', label: 'ReScript' }, 23 | { value: 'rust', label: 'Rust' }, 24 | { value: 'swift', label: 'Swift' }, 25 | { value: 'tlaplus', label: 'TLA+' }, 26 | { value: 'typescript', label: 'TypeScript' }, 27 | { value: 'zig', label: 'Zig' }, 28 | ]; 29 | 30 | export default function LanguageSelector({ value, onChange }: LanguageSelectorProps) { 31 | return ( 32 |
33 | 36 | 48 |
49 | ); 50 | } 51 | -------------------------------------------------------------------------------- /src/app/api/chat/route.ts: -------------------------------------------------------------------------------- 1 | import { streamText, convertToModelMessages } from 'ai'; 2 | import { createDeepSeek } from '@ai-sdk/deepseek'; 3 | 4 | export const maxDuration = 30; 5 | 6 | const deepseek = createDeepSeek({ 7 | apiKey: process.env.DEEPSEEK_API_KEY || '', 8 | }); 9 | 10 | export async function POST(req: Request) { 11 | try { 12 | const { messages, config } = await req.json(); 13 | 14 | // Validate messages 15 | if (!messages || !Array.isArray(messages)) { 16 | throw new Error('Messages must be an array'); 17 | } 18 | 19 | // Build system prompt based on config 20 | let systemPrompt = 'You are a helpful AI assistant.'; 21 | 22 | if (config?.enableRAG) { 23 | systemPrompt += 24 | ' You have access to relevant documents and can provide information based on them.'; 25 | } 26 | 27 | if (config?.enableMemory) { 28 | systemPrompt += ' You remember previous conversations and can reference them.'; 29 | } 30 | 31 | if (config?.enableTools) { 32 | systemPrompt += 33 | ' You have access to various tools and can use them to help answer questions.'; 34 | } 35 | 36 | // Convert UIMessages to ModelMessages 37 | const modelMessages = convertToModelMessages(messages); 38 | 39 | const result = streamText({ 40 | model: deepseek(config?.model || 'deepseek-chat'), 41 | messages: modelMessages, 42 | temperature: config?.temperature || 0.7, 43 | system: systemPrompt, 44 | }); 45 | 46 | return result.toUIMessageStreamResponse(); 47 | } catch (error) { 48 | console.error('Chat API Error:', error); 49 | return new Response(JSON.stringify({ error: 'Failed to process chat request' }), { 50 | status: 500, 51 | headers: { 'Content-Type': 'application/json' }, 52 | }); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/components/layout/StatusIndicator.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React, { useState, useEffect } from 'react'; 4 | import { usePathname } from 'next/navigation'; 5 | 6 | interface StatusIndicatorProps { 7 | className?: string; 8 | } 9 | 10 | export default function StatusIndicator({ className = '' }: StatusIndicatorProps) { 11 | const pathname = usePathname(); 12 | const [isLoading, setIsLoading] = useState(false); 13 | const [isOnline, setIsOnline] = useState(true); 14 | 15 | // Monitor route changes 16 | useEffect(() => { 17 | setIsLoading(true); 18 | const timer = setTimeout(() => setIsLoading(false), 300); 19 | return () => clearTimeout(timer); 20 | }, [pathname]); 21 | 22 | // Monitor online status 23 | useEffect(() => { 24 | const handleOnline = () => setIsOnline(true); 25 | const handleOffline = () => setIsOnline(false); 26 | 27 | window.addEventListener('online', handleOnline); 28 | window.addEventListener('offline', handleOffline); 29 | 30 | return () => { 31 | window.removeEventListener('online', handleOnline); 32 | window.removeEventListener('offline', handleOffline); 33 | }; 34 | }, []); 35 | 36 | return ( 37 |
38 | {/* Loading indicator */} 39 | {isLoading && ( 40 |
41 |
42 | Loading... 43 |
44 | )} 45 | 46 | {/* Online status */} 47 |
48 |
49 | {isOnline ? 'Online' : 'Offline'} 50 |
51 |
52 | ); 53 | } 54 | -------------------------------------------------------------------------------- /src/app/api/rag/vector-search/route.ts: -------------------------------------------------------------------------------- 1 | import { NextRequest, NextResponse } from 'next/server'; 2 | import { initializeVectorStore, vectorSearch } from '@/lib/rag/vector-store'; 3 | 4 | /** 5 | * POST /api/rag/vector-search 6 | * Perform vector search using cosine similarity 7 | */ 8 | export async function POST(request: NextRequest) { 9 | try { 10 | const { embedding, topK = 5 } = await request.json(); 11 | 12 | if (!embedding || !Array.isArray(embedding)) { 13 | return NextResponse.json( 14 | { error: 'Embedding parameter is required and must be an array' }, 15 | { status: 400 } 16 | ); 17 | } 18 | 19 | // Ensure vector store is initialized 20 | await initializeVectorStore(); 21 | 22 | // Perform vector search 23 | const results = await vectorSearch(embedding, topK); 24 | 25 | // Transform results to include similarity scores 26 | const searchResults = results.map((doc, index) => ({ 27 | id: doc.id, 28 | title: doc.title, 29 | content: doc.content, 30 | score: calculateSimilarity(embedding, doc.embedding), 31 | rank: index + 1, 32 | })); 33 | 34 | return NextResponse.json({ 35 | success: true, 36 | resultCount: searchResults.length, 37 | results: searchResults, 38 | topK, 39 | timestamp: Date.now(), 40 | }); 41 | } catch (error) { 42 | console.error('Vector search error:', error); 43 | return NextResponse.json( 44 | { 45 | error: error instanceof Error ? error.message : 'Unknown error', 46 | }, 47 | { status: 500 } 48 | ); 49 | } 50 | } 51 | 52 | /** 53 | * Calculate cosine similarity between two vectors 54 | */ 55 | function calculateSimilarity(a: number[], b: number[]): number { 56 | if (a.length !== b.length) { 57 | return 0; 58 | } 59 | 60 | let dotProduct = 0; 61 | let normA = 0; 62 | let normB = 0; 63 | 64 | for (let i = 0; i < a.length; i++) { 65 | dotProduct += a[i] * b[i]; 66 | normA += a[i] * a[i]; 67 | normB += b[i] * b[i]; 68 | } 69 | 70 | const denominator = Math.sqrt(normA) * Math.sqrt(normB); 71 | if (denominator === 0) { 72 | return 0; 73 | } 74 | 75 | return dotProduct / denominator; 76 | } 77 | -------------------------------------------------------------------------------- /playwright.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig, devices } from '@playwright/test'; 2 | 3 | /** 4 | * @see https://playwright.dev/docs/test-configuration 5 | */ 6 | export default defineConfig({ 7 | testDir: './tests/e2e', 8 | /* Run tests in files in parallel */ 9 | fullyParallel: true, 10 | /* Fail the build on CI if you accidentally left test.only in the source code. */ 11 | forbidOnly: !!process.env.CI, 12 | /* Retry on CI only */ 13 | retries: process.env.CI ? 2 : 0, 14 | /* Opt out of parallel tests on CI. */ 15 | workers: process.env.CI ? 1 : undefined, 16 | /* Reporter to use. See https://playwright.dev/docs/test-reporters */ 17 | reporter: 'html', 18 | /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */ 19 | use: { 20 | /* Base URL to use in actions like `await page.goto('/')`. */ 21 | baseURL: 'http://localhost:3001', 22 | 23 | /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */ 24 | trace: 'on-first-retry', 25 | }, 26 | 27 | /* Configure projects for major browsers */ 28 | projects: [ 29 | { 30 | name: 'chromium', 31 | use: { ...devices['Desktop Chrome'] }, 32 | }, 33 | 34 | { 35 | name: 'firefox', 36 | use: { ...devices['Desktop Firefox'] }, 37 | }, 38 | 39 | { 40 | name: 'webkit', 41 | use: { ...devices['Desktop Safari'] }, 42 | }, 43 | 44 | /* Test against mobile viewports. */ 45 | // { 46 | // name: 'Mobile Chrome', 47 | // use: { ...devices['Pixel 5'] }, 48 | // }, 49 | // { 50 | // name: 'Mobile Safari', 51 | // use: { ...devices['iPhone 12'] }, 52 | // }, 53 | 54 | /* Test against branded browsers. */ 55 | // { 56 | // name: 'Microsoft Edge', 57 | // use: { ...devices['Desktop Edge'], channel: 'msedge' }, 58 | // }, 59 | // { 60 | // name: 'Google Chrome', 61 | // use: { ...devices['Desktop Chrome'], channel: 'chrome' }, 62 | // }, 63 | ], 64 | 65 | /* Run your local dev server before starting the tests */ 66 | webServer: { 67 | command: 'npm run dev', 68 | url: 'http://localhost:3001', 69 | reuseExistingServer: !process.env.CI, 70 | }, 71 | }); 72 | -------------------------------------------------------------------------------- /src/components/layout/AppLayout.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React, { useState } from 'react'; 4 | import { usePathname } from 'next/navigation'; 5 | import MainNavigation from './MainNavigation'; 6 | import Sidebar from './Sidebar'; 7 | import PageTransition from './PageTransition'; 8 | import ScrollToTop from './ScrollToTop'; 9 | 10 | interface AppLayoutProps { 11 | children: React.ReactNode; 12 | showSidebar?: boolean; 13 | sidebarContent?: React.ReactNode; 14 | maxWidth?: 'full' | '7xl' | '6xl' | '5xl'; 15 | } 16 | 17 | export default function AppLayout({ 18 | children, 19 | showSidebar = false, 20 | sidebarContent, 21 | maxWidth = '7xl', 22 | }: AppLayoutProps) { 23 | const [sidebarOpen, setSidebarOpen] = useState(false); 24 | const pathname = usePathname(); 25 | 26 | // Determine if we're on the main workbench page 27 | const isWorkbenchPage = pathname === '/'; 28 | 29 | const maxWidthClass = { 30 | full: 'max-w-full', 31 | '7xl': 'max-w-7xl', 32 | '6xl': 'max-w-6xl', 33 | '5xl': 'max-w-5xl', 34 | }[maxWidth]; 35 | 36 | return ( 37 |
38 | {/* Main Navigation */} 39 | setSidebarOpen(!sidebarOpen)} 41 | showSidebarToggle={showSidebar} 42 | /> 43 | 44 |
45 | {/* Sidebar for workbench page */} 46 | {showSidebar && ( 47 | setSidebarOpen(false)} 50 | content={sidebarContent} 51 | /> 52 | )} 53 | 54 | {/* Main Content */} 55 |
56 | 57 | {isWorkbenchPage ? ( 58 | // For workbench page, use full height layout 59 |
{children}
60 | ) : ( 61 | // For other pages, use standard layout with padding 62 |
{children}
63 | )} 64 |
65 |
66 |
67 | 68 | {/* Scroll to top button */} 69 | 70 |
71 | ); 72 | } 73 | -------------------------------------------------------------------------------- /src/app/api/rag/execute/route.ts: -------------------------------------------------------------------------------- 1 | import { streamText } from 'ai'; 2 | import { createDeepSeek } from '@ai-sdk/deepseek'; 3 | import { NextRequest, NextResponse } from 'next/server'; 4 | 5 | const deepseek = createDeepSeek({ 6 | apiKey: process.env.DEEPSEEK_API_KEY || '', 7 | }); 8 | 9 | interface SearchResultItem { 10 | metadata?: { title?: string }; 11 | content: string; 12 | score?: number; 13 | } 14 | 15 | export async function POST(request: NextRequest) { 16 | try { 17 | const { query, searchResults } = (await request.json()) as { 18 | query: string; 19 | searchResults: SearchResultItem[]; 20 | }; 21 | 22 | if (!query || typeof query !== 'string') { 23 | return NextResponse.json({ error: 'Query parameter is required' }, { status: 400 }); 24 | } 25 | 26 | // Format search results for context 27 | const context = 28 | searchResults 29 | ?.map( 30 | (result: SearchResultItem, idx: number) => 31 | `[${idx + 1}] ${result.metadata?.title || 'Document'} (Score: ${result.score ? (result.score * 100).toFixed(1) : '0'}%)\n${result.content}` 32 | ) 33 | .join('\n\n') || 'No search results available'; 34 | 35 | // Use streaming to generate the answer 36 | const result = await streamText({ 37 | model: deepseek('deepseek-chat'), 38 | messages: [ 39 | { 40 | role: 'user', 41 | content: `Based on the following search results, please answer the user's query comprehensively. 42 | 43 | User Query: "${query}" 44 | 45 | Search Results: 46 | ${context} 47 | 48 | Please provide a detailed answer that: 49 | 1. Directly addresses the user's query 50 | 2. Cites the relevant search results 51 | 3. Explains the key concepts 52 | 4. Is clear and well-structured`, 53 | }, 54 | ], 55 | temperature: 0.7, 56 | }); 57 | 58 | // Get the full text response 59 | const fullText = await result.text; 60 | 61 | return NextResponse.json({ 62 | success: true, 63 | query, 64 | result: fullText, 65 | usage: result.usage, 66 | timestamp: Date.now(), 67 | }); 68 | } catch (error) { 69 | console.error('RAG execution error:', error); 70 | return NextResponse.json( 71 | { 72 | error: error instanceof Error ? error.message : 'Unknown error', 73 | }, 74 | { status: 500 } 75 | ); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/app/api/rag/rewrite/route.ts: -------------------------------------------------------------------------------- 1 | import { streamText } from 'ai'; 2 | import { createDeepSeek } from '@ai-sdk/deepseek'; 3 | import { NextRequest, NextResponse } from 'next/server'; 4 | 5 | const deepseek = createDeepSeek({ 6 | apiKey: process.env.DEEPSEEK_API_KEY || '', 7 | }); 8 | 9 | export async function POST(request: NextRequest) { 10 | try { 11 | const { query } = await request.json(); 12 | 13 | if (!query || typeof query !== 'string') { 14 | return NextResponse.json( 15 | { error: 'Query parameter is required and must be a string' }, 16 | { status: 400 } 17 | ); 18 | } 19 | 20 | // Use streaming to get the rewritten query 21 | const result = await streamText({ 22 | model: deepseek('deepseek-chat'), 23 | messages: [ 24 | { 25 | role: 'user', 26 | content: `You are an expert at query rewriting for information retrieval. 27 | 28 | Your task is to rewrite the following user query to make it more suitable for keyword-based search (BM25). 29 | 30 | Original query: "${query}" 31 | 32 | Please provide: 33 | 1. A rewritten query optimized for keyword search 34 | 2. The rewriting technique used (e.g., HyDE, Query2Doc, Expansion, etc.) 35 | 36 | Format your response as JSON: 37 | { 38 | "rewritten": "your rewritten query here", 39 | "technique": "technique name", 40 | "explanation": "brief explanation of the rewriting" 41 | }`, 42 | }, 43 | ], 44 | temperature: 0.7, 45 | }); 46 | 47 | // Get the full text response 48 | const fullText = await result.text; 49 | 50 | // Parse the JSON response 51 | const jsonMatch = fullText.match(/\{[\s\S]*\}/); 52 | if (!jsonMatch) { 53 | throw new Error('Failed to parse rewrite response'); 54 | } 55 | 56 | const rewriteData = JSON.parse(jsonMatch[0]); 57 | 58 | return NextResponse.json({ 59 | success: true, 60 | original: query, 61 | rewritten: rewriteData.rewritten, 62 | technique: rewriteData.technique, 63 | explanation: rewriteData.explanation, 64 | timestamp: Date.now(), 65 | usage: result.usage, 66 | }); 67 | } catch (error) { 68 | console.error('Query rewrite error:', error); 69 | return NextResponse.json( 70 | { 71 | error: error instanceof Error ? error.message : 'Unknown error', 72 | }, 73 | { status: 500 } 74 | ); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/lib/rag/batch-embedding-generator.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Batch embedding generator for vector search 3 | * Generates embeddings for multiple documents using GLM API 4 | */ 5 | 6 | const GLM_API_KEY = process.env.GLM_API_KEY || ''; 7 | const GLM_API_URL = 'https://open.bigmodel.cn/api/paas/v4/embeddings'; 8 | 9 | export interface EmbeddingResult { 10 | text: string; 11 | embedding: number[]; 12 | } 13 | 14 | interface GLMEmbeddingResponse { 15 | data: Array<{ 16 | embedding: number[]; 17 | index: number; 18 | }>; 19 | model: string; 20 | usage: { 21 | prompt_tokens: number; 22 | total_tokens: number; 23 | }; 24 | } 25 | 26 | /** 27 | * Generate embeddings for a batch of texts using GLM API 28 | */ 29 | export async function generateBatchEmbeddings(texts: string[]): Promise { 30 | if (!GLM_API_KEY) { 31 | throw new Error('GLM_API_KEY is not configured'); 32 | } 33 | 34 | if (texts.length === 0) { 35 | return []; 36 | } 37 | 38 | try { 39 | // Call GLM embedding API with batch of texts 40 | const response = await fetch(GLM_API_URL, { 41 | method: 'POST', 42 | headers: { 43 | 'Content-Type': 'application/json', 44 | Authorization: `Bearer ${GLM_API_KEY}`, 45 | }, 46 | body: JSON.stringify({ 47 | model: 'embedding-3', 48 | input: texts, 49 | }), 50 | }); 51 | 52 | if (!response.ok) { 53 | const errorData = await response.text(); 54 | console.error('GLM API error:', response.status, errorData); 55 | throw new Error(`GLM API error: ${response.status}`); 56 | } 57 | 58 | const data: GLMEmbeddingResponse = await response.json(); 59 | 60 | if (!data.data || data.data.length === 0) { 61 | throw new Error('No embedding data returned from GLM API'); 62 | } 63 | 64 | // Map embeddings back to texts 65 | const results: EmbeddingResult[] = texts.map((text, index) => ({ 66 | text, 67 | embedding: data.data[index]?.embedding || [], 68 | })); 69 | 70 | return results; 71 | } catch (error) { 72 | console.error('Batch embedding generation error:', error); 73 | throw error; 74 | } 75 | } 76 | 77 | /** 78 | * Generate embedding for a single text 79 | */ 80 | export async function generateSingleEmbedding(text: string): Promise { 81 | const results = await generateBatchEmbeddings([text]); 82 | if (results.length === 0) { 83 | throw new Error('Failed to generate embedding'); 84 | } 85 | return results[0].embedding; 86 | } 87 | -------------------------------------------------------------------------------- /src/app/api/rag/vector-store-init/route.ts: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { initializeVectorStore, getVectorStore } from '@/lib/rag/vector-store'; 3 | import { generateMockDocuments } from '@/lib/rag/mock-data-generator'; 4 | import { generateBatchEmbeddings } from '@/lib/rag/batch-embedding-generator'; 5 | 6 | let initialized = false; 7 | 8 | /** 9 | * POST /api/rag/vector-store-init 10 | * Initialize the vector store with sample documents and their embeddings 11 | */ 12 | export async function POST() { 13 | try { 14 | if (!initialized) { 15 | // Generate mock documents 16 | const mockDocs = generateMockDocuments(); 17 | 18 | // Extract texts for embedding generation 19 | const texts = mockDocs.map((doc) => doc.content); 20 | 21 | // Generate embeddings using GLM API 22 | const embeddingResults = await generateBatchEmbeddings(texts); 23 | 24 | // Combine documents with their embeddings 25 | const docsWithEmbeddings = mockDocs.map((doc, index) => ({ 26 | id: doc.id, 27 | title: doc.title, 28 | content: doc.content, 29 | embedding: embeddingResults[index]?.embedding || [], 30 | createdAt: Date.now(), 31 | })); 32 | 33 | // Initialize vector store with documents and embeddings 34 | await initializeVectorStore(docsWithEmbeddings); 35 | initialized = true; 36 | } 37 | 38 | return NextResponse.json({ 39 | success: true, 40 | message: 'Vector store initialized successfully with GLM embeddings', 41 | initialized: true, 42 | timestamp: Date.now(), 43 | }); 44 | } catch (error) { 45 | console.error('Vector store initialization error:', error); 46 | return NextResponse.json( 47 | { 48 | error: error instanceof Error ? error.message : 'Unknown error', 49 | }, 50 | { status: 500 } 51 | ); 52 | } 53 | } 54 | 55 | /** 56 | * GET /api/rag/vector-store-init 57 | * Check if vector store is initialized 58 | */ 59 | export async function GET() { 60 | try { 61 | const store = getVectorStore(); 62 | 63 | return NextResponse.json({ 64 | success: true, 65 | initialized: store !== null, 66 | message: store ? 'Vector store is initialized' : 'Vector store is not initialized', 67 | timestamp: Date.now(), 68 | }); 69 | } catch (error) { 70 | console.error('Vector store status error:', error); 71 | return NextResponse.json( 72 | { 73 | error: error instanceof Error ? error.message : 'Unknown error', 74 | }, 75 | { status: 500 } 76 | ); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "context-engineer-workbench", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev --turbopack", 7 | "build": "next build --turbopack", 8 | "start": "next start", 9 | "lint": "eslint", 10 | "test": "jest", 11 | "test:watch": "jest --watch", 12 | "test:coverage": "jest --coverage", 13 | "test:e2e": "playwright test", 14 | "test:e2e:ui": "playwright test --ui", 15 | "test:e2e:headed": "playwright test --headed", 16 | "prepare": "husky install" 17 | }, 18 | "dependencies": { 19 | "@ai-sdk/deepseek": "^1.0.23", 20 | "@ai-sdk/openai": "^2.0.53", 21 | "@ai-sdk/react": "^2.0.76", 22 | "@codemirror/autocomplete": "^6.19.0", 23 | "@codemirror/basic-setup": "^0.20.0", 24 | "@codemirror/commands": "^6.9.0", 25 | "@codemirror/lang-cpp": "^6.0.3", 26 | "@codemirror/lang-css": "^6.3.1", 27 | "@codemirror/lang-go": "^6.0.1", 28 | "@codemirror/lang-html": "^6.4.11", 29 | "@codemirror/lang-java": "^6.0.2", 30 | "@codemirror/lang-javascript": "^6.2.4", 31 | "@codemirror/lang-json": "^6.0.2", 32 | "@codemirror/lang-python": "^6.2.1", 33 | "@codemirror/lang-rust": "^6.0.2", 34 | "@codemirror/lang-sql": "^6.10.0", 35 | "@codemirror/lang-xml": "^6.1.0", 36 | "@codemirror/language": "^6.11.3", 37 | "@codemirror/search": "^6.5.11", 38 | "@codemirror/state": "^6.5.2", 39 | "@codemirror/view": "^6.38.6", 40 | "@heroicons/react": "^2.2.0", 41 | "@langchain/community": "^1.0.0", 42 | "@langchain/core": "^1.0.1", 43 | "@langchain/textsplitters": "^1.0.0", 44 | "ai": "^5.0.76", 45 | "d3": "^7.9.0", 46 | "gpt-tokenizer": "^3.2.0", 47 | "graphology": "^0.26.0", 48 | "graphology-types": "^0.24.8", 49 | "langchain": "^1.0.1", 50 | "next": "15.5.6", 51 | "openai": "^6.6.0", 52 | "playwright": "^1.56.1", 53 | "react": "19.1.0", 54 | "react-dom": "19.1.0", 55 | "react-markdown": "^10.1.0", 56 | "rxdb": "^16.20.0", 57 | "rxjs": "^7.8.2", 58 | "zod": "^3.25.76", 59 | "zod-to-json-schema": "^3.24.6" 60 | }, 61 | "devDependencies": { 62 | "@eslint/eslintrc": "^3", 63 | "@playwright/test": "^1.56.1", 64 | "@tailwindcss/postcss": "^4", 65 | "@types/d3": "^7.4.3", 66 | "@types/jest": "^30.0.0", 67 | "@types/node": "^20", 68 | "@types/react": "^19", 69 | "@types/react-dom": "^19", 70 | "eslint": "^9", 71 | "eslint-config-next": "15.5.6", 72 | "eslint-config-prettier": "^10.1.8", 73 | "eslint-plugin-prettier": "^5.5.4", 74 | "husky": "^9.1.7", 75 | "jest": "^30.2.0", 76 | "prettier": "^3.6.2", 77 | "tailwindcss": "^4", 78 | "ts-jest": "^29.4.5", 79 | "typescript": "^5" 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/app/api/rag/generate-candidates/route.ts: -------------------------------------------------------------------------------- 1 | import { streamText } from 'ai'; 2 | import { createDeepSeek } from '@ai-sdk/deepseek'; 3 | import { NextRequest, NextResponse } from 'next/server'; 4 | 5 | const deepseek = createDeepSeek({ 6 | apiKey: process.env.DEEPSEEK_API_KEY || '', 7 | }); 8 | 9 | interface CandidateDocument { 10 | id: string; 11 | title: string; 12 | content: string; 13 | } 14 | 15 | /** 16 | * POST /api/rag/generate-candidates 17 | * Generate candidate documents related to the query using AI 18 | * These candidates will be used for vector embedding and similarity search 19 | */ 20 | export async function POST(request: NextRequest) { 21 | try { 22 | const { query } = await request.json(); 23 | 24 | if (!query || typeof query !== 'string') { 25 | return NextResponse.json({ error: 'Query parameter is required' }, { status: 400 }); 26 | } 27 | 28 | // Use AI to generate candidate documents related to the query 29 | const result = await streamText({ 30 | model: deepseek('deepseek-chat'), 31 | messages: [ 32 | { 33 | role: 'user', 34 | content: `Generate 5 candidate documents related to the query: "${query}" 35 | 36 | Each document should be a realistic paragraph (3-4 sentences) that could be relevant to the query. 37 | Format your response as a JSON array with this structure: 38 | [ 39 | { 40 | "title": "Document Title", 41 | "content": "Document content here..." 42 | }, 43 | ... 44 | ] 45 | 46 | Make sure the documents are diverse and cover different aspects of the query topic.`, 47 | }, 48 | ], 49 | temperature: 0.7, 50 | }); 51 | 52 | // Get the full text response 53 | const fullText = await result.text; 54 | 55 | // Parse the JSON response 56 | const jsonMatch = fullText.match(/\[[\s\S]*\]/); 57 | if (!jsonMatch) { 58 | throw new Error('Failed to parse candidate documents response'); 59 | } 60 | 61 | interface ParsedDocument { 62 | title: string; 63 | content: string; 64 | } 65 | 66 | const documents: ParsedDocument[] = JSON.parse(jsonMatch[0]); 67 | 68 | // Add IDs to documents 69 | const candidateDocuments: CandidateDocument[] = documents.map((doc, idx) => ({ 70 | id: `candidate-${idx}`, 71 | title: doc.title, 72 | content: doc.content, 73 | })); 74 | 75 | return NextResponse.json({ 76 | success: true, 77 | query, 78 | candidates: candidateDocuments, 79 | count: candidateDocuments.length, 80 | usage: result.usage, 81 | }); 82 | } catch (error) { 83 | console.error('Generate candidates error:', error); 84 | return NextResponse.json( 85 | { 86 | error: error instanceof Error ? error.message : 'Unknown error', 87 | }, 88 | { status: 500 } 89 | ); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/lib/adapters/tool-to-langchain.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-explicit-any */ 2 | import { StructuredTool } from '@langchain/core/tools'; 3 | import { Tool, ToolExecutionResult } from '../tools/base'; 4 | import { z } from 'zod'; 5 | 6 | /** 7 | * 将我们的 Tool 适配为 LangChain Tool 8 | * 保持我们的抽象,让 LangChain.js 适配我们的实现 9 | */ 10 | export class ToolToLangChainAdapter extends StructuredTool { 11 | private ourTool: Tool; 12 | name: string; 13 | description: string; 14 | schema: z.ZodSchema; 15 | 16 | constructor(ourTool: Tool) { 17 | const schema = jsonSchemaToZod(ourTool.getParameters()); 18 | 19 | super(); 20 | 21 | this.ourTool = ourTool; 22 | this.name = ourTool.getName(); 23 | this.description = ourTool.getDescription(); 24 | this.schema = schema; 25 | } 26 | 27 | async _call(input: Record): Promise { 28 | try { 29 | const result = await this.ourTool.call(input); 30 | return JSON.stringify(result.data || result); 31 | } catch (error) { 32 | return JSON.stringify({ 33 | success: false, 34 | error: error instanceof Error ? error.message : String(error), 35 | }); 36 | } 37 | } 38 | 39 | /** 40 | * 获取原始的 Tool 对象 41 | */ 42 | getOriginalTool(): Tool { 43 | return this.ourTool; 44 | } 45 | 46 | /** 47 | * 获取执行结果(包含元数据) 48 | */ 49 | async executeWithMetadata(params: Record): Promise { 50 | return this.ourTool.call(params); 51 | } 52 | } 53 | 54 | /** 55 | * 将 JSON Schema 转换为 Zod Schema 56 | */ 57 | function jsonSchemaToZod(schema: any): z.ZodSchema { 58 | if (!schema || !schema.properties) { 59 | return z.record(z.any()); 60 | } 61 | 62 | const properties: Record = {}; 63 | 64 | for (const [key, prop] of Object.entries(schema.properties)) { 65 | const propSchema = prop as any; 66 | let zodSchema: z.ZodSchema; 67 | 68 | switch (propSchema.type) { 69 | case 'string': 70 | zodSchema = z.string(); 71 | break; 72 | case 'number': 73 | zodSchema = z.number(); 74 | break; 75 | case 'integer': 76 | zodSchema = z.number().int(); 77 | break; 78 | case 'boolean': 79 | zodSchema = z.boolean(); 80 | break; 81 | case 'array': 82 | zodSchema = z.array(z.any()); 83 | break; 84 | case 'object': 85 | zodSchema = z.record(z.any()); 86 | break; 87 | default: 88 | zodSchema = z.any(); 89 | } 90 | 91 | // 处理 required 字段 92 | if (!schema.required?.includes(key)) { 93 | zodSchema = zodSchema.optional(); 94 | } 95 | 96 | properties[key] = zodSchema; 97 | } 98 | 99 | return z.object(properties); 100 | } 101 | -------------------------------------------------------------------------------- /src/components/layout/Breadcrumbs.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | import Link from 'next/link'; 5 | import { usePathname } from 'next/navigation'; 6 | import { ChevronRightIcon, HomeIcon } from '@heroicons/react/24/outline'; 7 | 8 | interface BreadcrumbItem { 9 | name: string; 10 | href?: string; 11 | icon?: React.ReactNode; 12 | } 13 | 14 | interface BreadcrumbsProps { 15 | items?: BreadcrumbItem[]; 16 | showHome?: boolean; 17 | } 18 | 19 | // Auto-generate breadcrumbs based on pathname 20 | function generateBreadcrumbs(pathname: string): BreadcrumbItem[] { 21 | const pathSegments = pathname.split('/').filter(Boolean); 22 | const breadcrumbs: BreadcrumbItem[] = []; 23 | 24 | // Map of path segments to readable names 25 | const pathNameMap: Record = { 26 | 'rag-keyword-playground': 'Keyword Search', 27 | 'rag-vector-playground': 'Vector Search', 28 | 'rag-graph-playground': 'Graph Search', 29 | 'treesitter-playground': 'TreeSitter', 30 | }; 31 | 32 | let currentPath = ''; 33 | 34 | pathSegments.forEach((segment, index) => { 35 | currentPath += `/${segment}`; 36 | const isLast = index === pathSegments.length - 1; 37 | 38 | breadcrumbs.push({ 39 | name: pathNameMap[segment] || segment.charAt(0).toUpperCase() + segment.slice(1), 40 | href: isLast ? undefined : currentPath, 41 | }); 42 | }); 43 | 44 | return breadcrumbs; 45 | } 46 | 47 | export default function Breadcrumbs({ items, showHome = true }: BreadcrumbsProps) { 48 | const pathname = usePathname(); 49 | 50 | // Use provided items or auto-generate from pathname 51 | const breadcrumbItems = items || generateBreadcrumbs(pathname); 52 | 53 | // Don't show breadcrumbs on home page 54 | if (pathname === '/' || breadcrumbItems.length === 0) { 55 | return null; 56 | } 57 | 58 | const allItems: BreadcrumbItem[] = showHome 59 | ? [{ name: 'Home', href: '/', icon: }, ...breadcrumbItems] 60 | : breadcrumbItems; 61 | 62 | return ( 63 | 84 | ); 85 | } 86 | -------------------------------------------------------------------------------- /src/lib/adapters/__tests__/tool-to-langchain.test.ts: -------------------------------------------------------------------------------- 1 | import { ToolToLangChainAdapter } from '../tool-to-langchain'; 2 | import { KeywordSearchTool } from '../../tools/keyword-search-tool'; 3 | import { RAGModule } from '../../rag/rag-module'; 4 | 5 | describe('ToolToLangChainAdapter', () => { 6 | let adapter: ToolToLangChainAdapter; 7 | let keywordSearchTool: KeywordSearchTool; 8 | let ragModule: RAGModule; 9 | 10 | beforeEach(() => { 11 | ragModule = new RAGModule(); 12 | ragModule.addDocuments([ 13 | { 14 | id: 'doc1', 15 | content: 'TypeScript is a programming language', 16 | metadata: { source: 'test' }, 17 | }, 18 | { 19 | id: 'doc2', 20 | content: 'JavaScript runs in the browser', 21 | metadata: { source: 'test' }, 22 | }, 23 | ]); 24 | 25 | keywordSearchTool = new KeywordSearchTool(); 26 | keywordSearchTool.addDocuments([ 27 | { 28 | id: 'doc1', 29 | content: 'TypeScript is a programming language', 30 | metadata: { source: 'test' }, 31 | }, 32 | { 33 | id: 'doc2', 34 | content: 'JavaScript runs in the browser', 35 | metadata: { source: 'test' }, 36 | }, 37 | ]); 38 | adapter = new ToolToLangChainAdapter(keywordSearchTool); 39 | }); 40 | 41 | it('should adapt our tool to LangChain tool', () => { 42 | expect(adapter.lc_namespace).toBeDefined(); 43 | expect(adapter.schema).toBeDefined(); 44 | }); 45 | 46 | it('should execute tool through adapter', async () => { 47 | const result = await adapter.invoke({ 48 | query: 'TypeScript', 49 | topK: 3, 50 | }); 51 | 52 | expect(result).toBeDefined(); 53 | expect(typeof result).toBe('string'); 54 | 55 | const parsed = JSON.parse(result); 56 | expect(parsed).toBeDefined(); 57 | }); 58 | 59 | it('should handle JSON string input', async () => { 60 | const result = await adapter._call( 61 | JSON.stringify({ 62 | query: 'JavaScript', 63 | topK: 2, 64 | }) 65 | ); 66 | 67 | expect(result).toBeDefined(); 68 | const parsed = JSON.parse(result); 69 | expect(parsed).toBeDefined(); 70 | }); 71 | 72 | it('should get original tool', () => { 73 | const originalTool = adapter.getOriginalTool(); 74 | expect(originalTool).toBe(keywordSearchTool); 75 | }); 76 | 77 | it('should execute with metadata', async () => { 78 | const result = await adapter.executeWithMetadata({ 79 | query: 'TypeScript', 80 | topK: 3, 81 | }); 82 | 83 | expect(result.success).toBe(true); 84 | expect(result.executionTime).toBeGreaterThanOrEqual(0); 85 | }); 86 | 87 | it('should handle errors gracefully', async () => { 88 | const result = await adapter._call('invalid json'); 89 | const parsed = JSON.parse(result); 90 | expect(parsed.success).toBe(false); 91 | expect(parsed.error).toBeDefined(); 92 | }); 93 | }); 94 | -------------------------------------------------------------------------------- /src/components/WorkbenchProvider.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React, { createContext, useContext, useState } from 'react'; 4 | 5 | export interface WorkbenchConfig { 6 | model: string; 7 | provider: string; 8 | temperature: number; 9 | maxTokens: number; 10 | streamResponses: boolean; 11 | enableRAG: boolean; 12 | enableMemory: boolean; 13 | enableTools: boolean; 14 | enableAdvancedPrompting: boolean; 15 | ragConfig: { 16 | chunkSize: number; 17 | chunkOverlap: number; 18 | topK: number; 19 | similarityThreshold: number; 20 | searchMode: 'semantic' | 'keyword' | 'hybrid'; 21 | }; 22 | memoryConfig: { 23 | enableChatHistory: boolean; 24 | historyLength: number; 25 | enableUserProfile: boolean; 26 | }; 27 | } 28 | 29 | interface WorkbenchContextType { 30 | config: WorkbenchConfig; 31 | updateConfig: (updates: Partial) => void; 32 | updateRAGConfig: (updates: Partial) => void; 33 | updateMemoryConfig: (updates: Partial) => void; 34 | } 35 | 36 | const WorkbenchContext = createContext(undefined); 37 | 38 | export function useWorkbench() { 39 | const context = useContext(WorkbenchContext); 40 | if (!context) { 41 | throw new Error('useWorkbench must be used within a WorkbenchProvider'); 42 | } 43 | return context; 44 | } 45 | 46 | interface WorkbenchProviderProps { 47 | children: React.ReactNode; 48 | } 49 | 50 | export function WorkbenchProvider({ children }: WorkbenchProviderProps) { 51 | const [config, setConfig] = useState({ 52 | model: 'deepseek-chat', 53 | provider: 'DeepSeek', 54 | temperature: 0.7, 55 | maxTokens: 2000, 56 | streamResponses: true, 57 | enableRAG: false, 58 | enableMemory: false, 59 | enableTools: false, 60 | enableAdvancedPrompting: false, 61 | ragConfig: { 62 | chunkSize: 500, 63 | chunkOverlap: 50, 64 | topK: 3, 65 | similarityThreshold: 0.7, 66 | searchMode: 'semantic', 67 | }, 68 | memoryConfig: { 69 | enableChatHistory: true, 70 | historyLength: 10, 71 | enableUserProfile: false, 72 | }, 73 | }); 74 | 75 | const updateConfig = (updates: Partial) => { 76 | setConfig((prev) => ({ ...prev, ...updates })); 77 | }; 78 | 79 | const updateRAGConfig = (updates: Partial) => { 80 | setConfig((prev) => ({ 81 | ...prev, 82 | ragConfig: { ...prev.ragConfig, ...updates }, 83 | })); 84 | }; 85 | 86 | const updateMemoryConfig = (updates: Partial) => { 87 | setConfig((prev) => ({ 88 | ...prev, 89 | memoryConfig: { ...prev.memoryConfig, ...updates }, 90 | })); 91 | }; 92 | 93 | const value = { 94 | config, 95 | updateConfig, 96 | updateRAGConfig, 97 | updateMemoryConfig, 98 | }; 99 | 100 | return {children}; 101 | } 102 | -------------------------------------------------------------------------------- /src/lib/adapters/__tests__/retriever-to-langchain.test.ts: -------------------------------------------------------------------------------- 1 | import { BM25RetrieverAdapter } from '../retriever-to-langchain'; 2 | import { BM25Retriever } from '../../rag/bm25-retriever'; 3 | import { Document } from '@langchain/core/documents'; 4 | 5 | describe('BM25RetrieverAdapter', () => { 6 | let adapter: BM25RetrieverAdapter; 7 | let bm25Retriever: BM25Retriever; 8 | 9 | beforeEach(() => { 10 | bm25Retriever = new BM25Retriever(); 11 | 12 | // 添加测试文档 13 | bm25Retriever.addDocuments([ 14 | { 15 | id: 'doc1', 16 | content: 'TypeScript is a programming language', 17 | metadata: { source: 'test', chunkIndex: 0 }, 18 | }, 19 | { 20 | id: 'doc2', 21 | content: 'JavaScript runs in the browser', 22 | metadata: { source: 'test', chunkIndex: 1 }, 23 | }, 24 | { 25 | id: 'doc3', 26 | content: 'Python is used for data science', 27 | metadata: { source: 'test', chunkIndex: 2 }, 28 | }, 29 | ]); 30 | 31 | adapter = new BM25RetrieverAdapter(bm25Retriever, 2); 32 | }); 33 | 34 | it('should adapt BM25Retriever to LangChain Retriever', () => { 35 | expect(adapter).toBeDefined(); 36 | expect(adapter.lc_namespace).toEqual(['context_engineer', 'retrievers']); 37 | }); 38 | 39 | it('should retrieve documents as LangChain Document objects', async () => { 40 | const documents = await adapter.invoke('TypeScript'); 41 | 42 | expect(documents).toBeInstanceOf(Array); 43 | expect(documents.length).toBeGreaterThan(0); 44 | expect(documents[0]).toBeInstanceOf(Document); 45 | expect(documents[0].pageContent).toBeDefined(); 46 | expect(documents[0].metadata).toBeDefined(); 47 | }); 48 | 49 | it('should include score and rank in metadata', async () => { 50 | const documents = await adapter.invoke('JavaScript'); 51 | 52 | expect(documents.length).toBeGreaterThan(0); 53 | const firstDoc = documents[0]; 54 | expect(firstDoc.metadata.score).toBeDefined(); 55 | expect(firstDoc.metadata.rank).toBeDefined(); 56 | expect(typeof firstDoc.metadata.score).toBe('number'); 57 | expect(typeof firstDoc.metadata.rank).toBe('number'); 58 | }); 59 | 60 | it('should respect topK parameter', async () => { 61 | const documents = await adapter.invoke('programming'); 62 | 63 | expect(documents.length).toBeLessThanOrEqual(2); 64 | }); 65 | 66 | it('should get original retriever', () => { 67 | const originalRetriever = adapter.getOriginalRetriever(); 68 | expect(originalRetriever).toBe(bm25Retriever); 69 | }); 70 | 71 | it('should retrieve with scores', async () => { 72 | const result = await adapter.retrieveWithScores('TypeScript'); 73 | 74 | expect(result).toBeDefined(); 75 | expect(result.chunks).toBeInstanceOf(Array); 76 | expect(result.chunks[0].score).toBeDefined(); 77 | expect(result.chunks[0].rank).toBeDefined(); 78 | }); 79 | 80 | it('should work with invoke method', async () => { 81 | const documents = await adapter.invoke('JavaScript'); 82 | 83 | expect(documents).toBeInstanceOf(Array); 84 | expect(documents.length).toBeGreaterThan(0); 85 | expect(documents[0]).toBeInstanceOf(Document); 86 | }); 87 | }); 88 | -------------------------------------------------------------------------------- /src/lib/rag/rag-module.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-explicit-any */ 2 | /** 3 | * RAG 模块 4 | * 管理文档索引和检索工具 5 | */ 6 | 7 | import { Tool, ToolRegistry, ToolDefinition } from '../tools/base'; 8 | import { KeywordSearchTool } from '../tools/keyword-search-tool'; 9 | import { DocumentChunk } from './bm25-retriever'; 10 | 11 | /** 12 | * RAG 模块配置 13 | */ 14 | export interface RAGModuleConfig { 15 | enableKeywordSearch?: boolean; 16 | keywordSearchConfig?: { 17 | defaultTopK?: number; 18 | maxTopK?: number; 19 | }; 20 | } 21 | 22 | /** 23 | * RAG 模块 24 | * 负责文档管理和检索工具的集成 25 | */ 26 | export class RAGModule { 27 | private toolRegistry: ToolRegistry; 28 | private documents: DocumentChunk[] = []; 29 | private keywordSearchTool: KeywordSearchTool | null = null; 30 | private config: RAGModuleConfig; 31 | 32 | constructor(config: RAGModuleConfig = {}) { 33 | this.config = { 34 | enableKeywordSearch: true, 35 | ...config, 36 | }; 37 | 38 | this.toolRegistry = new ToolRegistry(); 39 | this.initializeTools(); 40 | } 41 | 42 | /** 43 | * 初始化工具 44 | */ 45 | private initializeTools(): void { 46 | // 初始化关键词搜索工具 47 | if (this.config.enableKeywordSearch) { 48 | this.keywordSearchTool = new KeywordSearchTool(this.config.keywordSearchConfig); 49 | this.toolRegistry.register(this.keywordSearchTool); 50 | } 51 | } 52 | 53 | /** 54 | * 添加文档 55 | */ 56 | addDocuments(documents: DocumentChunk[]): void { 57 | this.documents.push(...documents); 58 | 59 | // 更新所有工具的文档索引 60 | if (this.keywordSearchTool) { 61 | this.keywordSearchTool.addDocuments(documents); 62 | } 63 | } 64 | 65 | /** 66 | * 清空文档 67 | */ 68 | clearDocuments(): void { 69 | this.documents = []; 70 | 71 | if (this.keywordSearchTool) { 72 | this.keywordSearchTool.clearDocuments(); 73 | } 74 | } 75 | 76 | /** 77 | * 获取文档数量 78 | */ 79 | getDocumentCount(): number { 80 | return this.documents.length; 81 | } 82 | 83 | /** 84 | * 获取所有文档 85 | */ 86 | getDocuments(): DocumentChunk[] { 87 | return [...this.documents]; 88 | } 89 | 90 | /** 91 | * 获取工具注册表 92 | */ 93 | getToolRegistry(): ToolRegistry { 94 | return this.toolRegistry; 95 | } 96 | 97 | /** 98 | * 获取所有可用的工具定义 99 | */ 100 | getAvailableTools(): ToolDefinition[] { 101 | return this.toolRegistry.getAllDefinitions(); 102 | } 103 | 104 | /** 105 | * 执行工具 106 | */ 107 | async executeTool(toolName: string, params: Record): Promise { 108 | return this.toolRegistry.executeTool(toolName, params); 109 | } 110 | 111 | /** 112 | * 列出所有可用的工具 113 | */ 114 | listTools(): string[] { 115 | return this.toolRegistry.listTools(); 116 | } 117 | 118 | /** 119 | * 获取特定工具 120 | */ 121 | getTool(name: string): Tool | undefined { 122 | return this.toolRegistry.getTool(name); 123 | } 124 | 125 | /** 126 | * 获取关键词搜索工具 127 | */ 128 | getKeywordSearchTool(): KeywordSearchTool | null { 129 | return this.keywordSearchTool; 130 | } 131 | } 132 | 133 | /** 134 | * 创建 RAG 模块实例 135 | */ 136 | export function createRAGModule(config?: RAGModuleConfig): RAGModule { 137 | return new RAGModule(config); 138 | } 139 | -------------------------------------------------------------------------------- /src/lib/rag/langchain-rag-retriever.ts: -------------------------------------------------------------------------------- 1 | import { BaseRetriever } from '@langchain/core/retrievers'; 2 | import { Document } from '@langchain/core/documents'; 3 | import { BM25Retriever } from './bm25-retriever'; 4 | import { LangChainDocumentRAG, LoadedDocument } from './document-loader'; 5 | 6 | /** 7 | * LangChain RAG 检索器 8 | * 结合 LangChain 的文档处理和我们的 BM25 检索 9 | */ 10 | export class LangChainRAGRetriever extends BaseRetriever { 11 | lc_namespace = ['context_engineer', 'retrievers']; 12 | private documentRAG: LangChainDocumentRAG; 13 | private bm25Retriever: BM25Retriever; 14 | private topK: number; 15 | 16 | constructor(topK: number = 3) { 17 | super(); 18 | this.documentRAG = new LangChainDocumentRAG(); 19 | this.bm25Retriever = new BM25Retriever(); 20 | this.topK = topK; 21 | } 22 | 23 | /** 24 | * 添加文档 25 | */ 26 | async addDocuments(documents: LoadedDocument[]): Promise { 27 | this.documentRAG.addDocuments(documents); 28 | const chunks = await this.documentRAG.processAll(); 29 | this.bm25Retriever.addDocuments(chunks); 30 | } 31 | 32 | /** 33 | * 从 Markdown 添加文档 34 | */ 35 | async addMarkdownDocuments( 36 | markdownContents: Array<{ content: string; source: string; title?: string }> 37 | ): Promise { 38 | const documents = markdownContents.map((doc) => ({ 39 | content: doc.content, 40 | metadata: { 41 | source: doc.source, 42 | title: doc.title || doc.source, 43 | }, 44 | })); 45 | await this.addDocuments(documents); 46 | } 47 | 48 | /** 49 | * 从文本数组添加文档 50 | */ 51 | async addTextDocuments(texts: string[], source: string): Promise { 52 | const documents = texts.map((text, index) => ({ 53 | content: text, 54 | metadata: { 55 | source, 56 | title: `${source}-${index}`, 57 | }, 58 | })); 59 | await this.addDocuments(documents); 60 | } 61 | 62 | /** 63 | * 实现 LangChain BaseRetriever 的抽象方法 64 | */ 65 | async _getRelevantDocuments(query: string): Promise { 66 | const result = await this.bm25Retriever.retrieve(query, this.topK); 67 | 68 | return result.chunks.map( 69 | (item) => 70 | new Document({ 71 | pageContent: item.chunk.content, 72 | metadata: { 73 | ...item.chunk.metadata, 74 | score: item.score, 75 | rank: item.rank, 76 | id: item.chunk.id, 77 | }, 78 | }) 79 | ); 80 | } 81 | 82 | /** 83 | * 获取统计信息 84 | */ 85 | getStats() { 86 | return this.documentRAG.getStats(); 87 | } 88 | 89 | /** 90 | * 清空所有文档 91 | */ 92 | clear(): void { 93 | this.documentRAG.clear(); 94 | this.bm25Retriever = new BM25Retriever(); 95 | } 96 | } 97 | 98 | /** 99 | * 便捷函数 - 创建 LangChain RAG 检索器 100 | */ 101 | export async function createLangChainRAGRetriever( 102 | documents: LoadedDocument[], 103 | topK: number = 3 104 | ): Promise { 105 | const retriever = new LangChainRAGRetriever(topK); 106 | await retriever.addDocuments(documents); 107 | return retriever; 108 | } 109 | 110 | /** 111 | * 便捷函数 - 从 Markdown 创建 RAG 检索器 112 | */ 113 | export async function createRAGFromMarkdown( 114 | markdownDocs: Array<{ content: string; source: string; title?: string }>, 115 | topK: number = 3 116 | ): Promise { 117 | const retriever = new LangChainRAGRetriever(topK); 118 | await retriever.addMarkdownDocuments(markdownDocs); 119 | return retriever; 120 | } 121 | -------------------------------------------------------------------------------- /src/lib/hierarchical-graph.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Hierarchical Graph - Structures code relationships as a hierarchy 3 | * Uses graphology for graph representation and supports D3.js Icicle visualization 4 | */ 5 | 6 | import { CodeGraph, GraphData } from './graph-builder'; 7 | 8 | /** 9 | * Hierarchical node structure for Icicle visualization 10 | */ 11 | export interface HierarchicalNode { 12 | name: string; 13 | id: string; 14 | type: string; 15 | color?: string; 16 | size?: number; 17 | children?: HierarchicalNode[]; 18 | metadata?: Record; 19 | } 20 | 21 | /** 22 | * Convert a flat graph to a hierarchical structure 23 | * Groups nodes by type and creates parent-child relationships 24 | */ 25 | export function graphToHierarchy(graph: CodeGraph): HierarchicalNode { 26 | const typeGroups = new Map(); 27 | const nodeMap = new Map(); 28 | 29 | // Group nodes by type 30 | graph.nodes().forEach((nodeId) => { 31 | const attrs = graph.getNodeAttributes(nodeId); 32 | const hierarchicalNode: HierarchicalNode = { 33 | name: attrs.label, 34 | id: nodeId, 35 | type: attrs.type, 36 | color: attrs.color, 37 | size: attrs.size, 38 | metadata: attrs.metadata, 39 | }; 40 | nodeMap.set(nodeId, hierarchicalNode); 41 | 42 | if (!typeGroups.has(attrs.type)) { 43 | typeGroups.set(attrs.type, []); 44 | } 45 | typeGroups.get(attrs.type)!.push(hierarchicalNode); 46 | }); 47 | 48 | // Create type category nodes 49 | const typeNodes: HierarchicalNode[] = []; 50 | typeGroups.forEach((nodes, type) => { 51 | typeNodes.push({ 52 | name: type, 53 | id: `type:${type}`, 54 | type: 'category', 55 | children: nodes, 56 | }); 57 | }); 58 | 59 | // Create root node 60 | const root: HierarchicalNode = { 61 | name: 'Code Structure', 62 | id: 'root', 63 | type: 'root', 64 | children: typeNodes, 65 | }; 66 | 67 | return root; 68 | } 69 | 70 | /** 71 | * Convert GraphData to hierarchical structure for Icicle visualization 72 | */ 73 | export function graphDataToHierarchy(graphData: GraphData): HierarchicalNode { 74 | const typeGroups = new Map(); 75 | 76 | // Group nodes by type 77 | graphData.nodes.forEach((node) => { 78 | const hierarchicalNode: HierarchicalNode = { 79 | name: node.label, 80 | id: node.id, 81 | type: node.type, 82 | color: node.color, 83 | size: node.size, 84 | }; 85 | 86 | if (!typeGroups.has(node.type)) { 87 | typeGroups.set(node.type, []); 88 | } 89 | typeGroups.get(node.type)!.push(hierarchicalNode); 90 | }); 91 | 92 | // Create type category nodes 93 | const typeNodes: HierarchicalNode[] = []; 94 | typeGroups.forEach((nodes, type) => { 95 | typeNodes.push({ 96 | name: type, 97 | id: `type:${type}`, 98 | type: 'category', 99 | children: nodes, 100 | }); 101 | }); 102 | 103 | // Create root node 104 | const root: HierarchicalNode = { 105 | name: 'Code Structure', 106 | id: 'root', 107 | type: 'root', 108 | children: typeNodes, 109 | }; 110 | 111 | return root; 112 | } 113 | 114 | /** 115 | * Build a hierarchical graph from source code 116 | * Combines code parsing with hierarchical structuring 117 | */ 118 | export async function buildHierarchicalGraph( 119 | code: string, 120 | language: string, 121 | buildCodeGraphFn: (code: string, language: string) => Promise 122 | ): Promise { 123 | const graph = await buildCodeGraphFn(code, language); 124 | return graphToHierarchy(graph); 125 | } 126 | -------------------------------------------------------------------------------- /src/lib/tools/keyword-search-tool.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-explicit-any */ 2 | /** 3 | * 关键词搜索工具 4 | * 基于 BM25 算法的关键词检索工具 5 | */ 6 | 7 | import { Tool, ToolDefinition, ToolExecutionResult } from './base'; 8 | import { BM25Retriever, DocumentChunk, RetrievalResult } from '../rag/bm25-retriever'; 9 | 10 | /** 11 | * KeywordSearchTool 配置 12 | */ 13 | export interface KeywordSearchToolConfig { 14 | defaultTopK?: number; 15 | maxTopK?: number; 16 | } 17 | 18 | /** 19 | * 关键词搜索工具 20 | * 使用 BM25 算法进行关键词检索 21 | */ 22 | export class KeywordSearchTool extends Tool { 23 | private retriever: BM25Retriever; 24 | private config: KeywordSearchToolConfig; 25 | 26 | constructor(config: KeywordSearchToolConfig = {}) { 27 | const definition: ToolDefinition = { 28 | name: 'keyword_search', 29 | description: 30 | 'Performs keyword-based search using BM25 algorithm. Use for finding documents with specific terms or keywords.', 31 | parameters: { 32 | type: 'object', 33 | properties: { 34 | query: { 35 | type: 'string', 36 | description: 'The search query containing keywords to find', 37 | }, 38 | topK: { 39 | type: 'number', 40 | description: 'Number of top results to return (default: 3, max: 20)', 41 | default: 3, 42 | }, 43 | }, 44 | required: ['query'], 45 | }, 46 | }; 47 | 48 | super(definition); 49 | this.retriever = new BM25Retriever(); 50 | this.config = { 51 | defaultTopK: 3, 52 | maxTopK: 20, 53 | ...config, 54 | }; 55 | } 56 | 57 | /** 58 | * 添加文档到索引 59 | */ 60 | addDocuments(documents: DocumentChunk[]): void { 61 | this.retriever.addDocuments(documents); 62 | } 63 | 64 | /** 65 | * 清空文档索引 66 | */ 67 | clearDocuments(): void { 68 | this.retriever.clearDocuments(); 69 | } 70 | 71 | /** 72 | * 获取文档数量 73 | */ 74 | getDocumentCount(): number { 75 | return this.retriever.getDocumentCount(); 76 | } 77 | 78 | /** 79 | * 执行关键词搜索 80 | */ 81 | async execute(params: Record): Promise { 82 | const startTime = Date.now(); 83 | const query = params.query as string; 84 | let topK = params.topK as number | undefined; 85 | 86 | // 使用默认值或限制 topK 87 | if (!topK) { 88 | topK = this.config.defaultTopK || 3; 89 | } else if (topK > (this.config.maxTopK || 20)) { 90 | topK = this.config.maxTopK || 20; 91 | } 92 | 93 | try { 94 | const result: RetrievalResult = await this.retriever.retrieve(query, topK); 95 | 96 | return { 97 | success: true, 98 | data: { 99 | query: result.query, 100 | results: result.chunks.map((item) => ({ 101 | id: item.chunk.id, 102 | content: item.chunk.content, 103 | metadata: item.chunk.metadata, 104 | score: item.score, 105 | rank: item.rank, 106 | })), 107 | totalResults: result.chunks.length, 108 | retrieverName: result.retrieverName, 109 | executionTime: result.totalTime, 110 | }, 111 | executionTime: Date.now() - startTime, 112 | metadata: { 113 | query, 114 | topK, 115 | documentCount: this.retriever.getDocumentCount(), 116 | }, 117 | }; 118 | } catch (error) { 119 | return { 120 | success: false, 121 | error: error instanceof Error ? error.message : 'Unknown error during keyword search', 122 | executionTime: Date.now() - startTime, 123 | }; 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/app/api/rag/retrieve/route.ts: -------------------------------------------------------------------------------- 1 | import { NextRequest, NextResponse } from 'next/server'; 2 | import { LangChainRAGRetriever } from '@/lib/rag/langchain-rag-retriever'; 3 | 4 | // 全局 RAG 检索器实例 5 | let ragRetriever: LangChainRAGRetriever | null = null; 6 | 7 | /** 8 | * 初始化 RAG 检索器 9 | */ 10 | function initializeRAG() { 11 | if (!ragRetriever) { 12 | ragRetriever = new LangChainRAGRetriever(3); 13 | 14 | // 添加示例文档 15 | const exampleDocs = [ 16 | { 17 | content: 18 | 'TypeScript is a typed superset of JavaScript that compiles to plain JavaScript. It adds optional static typing to the language.', 19 | metadata: { source: 'typescript-docs', title: 'TypeScript Introduction' }, 20 | }, 21 | { 22 | content: 23 | 'JavaScript is a versatile programming language used for web development, server-side programming with Node.js, and more.', 24 | metadata: { source: 'javascript-docs', title: 'JavaScript Overview' }, 25 | }, 26 | { 27 | content: 28 | 'React is a JavaScript library for building user interfaces with reusable components. It uses a virtual DOM for efficient rendering.', 29 | metadata: { source: 'react-docs', title: 'React Framework' }, 30 | }, 31 | { 32 | content: 33 | 'LangChain is a framework for developing applications powered by language models. It provides tools for chains, agents, and memory.', 34 | metadata: { source: 'langchain-docs', title: 'LangChain Framework' }, 35 | }, 36 | { 37 | content: 38 | 'RAG (Retrieval-Augmented Generation) combines document retrieval with language model generation for more accurate responses.', 39 | metadata: { source: 'rag-docs', title: 'RAG Concept' }, 40 | }, 41 | ]; 42 | 43 | ragRetriever.addDocuments(exampleDocs); 44 | } 45 | 46 | return ragRetriever; 47 | } 48 | 49 | /** 50 | * POST /api/rag/retrieve 51 | * 执行 RAG 检索 52 | */ 53 | export async function POST(request: NextRequest) { 54 | try { 55 | const body = await request.json(); 56 | const { query, topK = 3 } = body; 57 | 58 | if (!query || typeof query !== 'string') { 59 | return NextResponse.json( 60 | { error: 'Query parameter is required and must be a string' }, 61 | { status: 400 } 62 | ); 63 | } 64 | 65 | const retriever = initializeRAG(); 66 | 67 | // 执行检索 68 | const results = await retriever.invoke(query); 69 | 70 | // 限制返回的结果数 71 | const limitedResults = results.slice(0, topK); 72 | 73 | return NextResponse.json({ 74 | success: true, 75 | query, 76 | resultCount: limitedResults.length, 77 | results: limitedResults.map((doc, index) => ({ 78 | rank: index + 1, 79 | content: doc.pageContent, 80 | metadata: doc.metadata, 81 | score: doc.metadata.score, 82 | })), 83 | stats: retriever.getStats(), 84 | }); 85 | } catch (error) { 86 | console.error('RAG retrieval error:', error); 87 | return NextResponse.json( 88 | { 89 | error: error instanceof Error ? error.message : 'Unknown error', 90 | }, 91 | { status: 500 } 92 | ); 93 | } 94 | } 95 | 96 | /** 97 | * GET /api/rag/retrieve 98 | * 获取 RAG 统计信息 99 | */ 100 | export async function GET() { 101 | try { 102 | const retriever = initializeRAG(); 103 | const stats = retriever.getStats(); 104 | 105 | return NextResponse.json({ 106 | success: true, 107 | stats, 108 | message: 'RAG retriever is ready', 109 | }); 110 | } catch (error) { 111 | console.error('RAG stats error:', error); 112 | return NextResponse.json( 113 | { 114 | error: error instanceof Error ? error.message : 'Unknown error', 115 | }, 116 | { status: 500 } 117 | ); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/components/treesitter/QueryResults.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | import type { QueryMatch } from '@/lib/treesitter-utils'; 5 | import { getColorForCaptureName } from '@/lib/treesitter-utils'; 6 | 7 | interface QueryResultsProps { 8 | results: QueryMatch[]; 9 | captureNames: string[]; 10 | onCaptureClick?: (capture: { 11 | startRow: number; 12 | startColumn: number; 13 | endRow: number; 14 | endColumn: number; 15 | }) => void; 16 | } 17 | 18 | export default function QueryResults({ results, captureNames, onCaptureClick }: QueryResultsProps) { 19 | const getCaptureColor = (captureName: string) => { 20 | return getColorForCaptureName(captureName, captureNames, false); 21 | }; 22 | 23 | return ( 24 |
25 |
26 |

Query Results

27 |

28 | Found {results.length} match{results.length !== 1 ? 'es' : ''} 29 |

30 |
31 | 32 |
33 | {results.length === 0 ? ( 34 |

No matches found

35 | ) : ( 36 |
37 | {results.map((match, matchIdx) => ( 38 |
39 |
40 | Pattern #{match.pattern + 1} - Match #{matchIdx + 1} 41 |
42 |
43 | {match.captures.map((capture, captureIdx) => { 44 | const color = getCaptureColor(capture.name); 45 | return ( 46 |
{ 51 | if (onCaptureClick) { 52 | onCaptureClick({ 53 | startRow: capture.startPosition.row, 54 | startColumn: capture.startPosition.column, 55 | endRow: capture.endPosition.row, 56 | endColumn: capture.endPosition.column, 57 | }); 58 | } 59 | }} 60 | > 61 |
62 | @{capture.name} 63 |
64 |
65 | {capture.text} 66 |
67 |
68 | 69 | Type: {capture.type} 70 | 71 | 72 | Position: [{capture.startPosition.row}, {capture.startPosition.column}] 73 | - [{capture.endPosition.row}, {capture.endPosition.column}] 74 | 75 |
76 |
77 | ); 78 | })} 79 |
80 |
81 | ))} 82 |
83 | )} 84 |
85 |
86 | ); 87 | } 88 | -------------------------------------------------------------------------------- /src/__tests__/vector-search.test.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Vector Search Tests 3 | * Tests for vector store, embedding generation, and similarity calculations 4 | */ 5 | 6 | import { generateMockDocuments } from '@/lib/rag/mock-data-generator'; 7 | 8 | describe('Vector Search', () => { 9 | describe('Mock Data Generator', () => { 10 | it('should generate mock documents', () => { 11 | const docs = generateMockDocuments(); 12 | expect(docs).toBeDefined(); 13 | expect(docs.length).toBeGreaterThan(0); 14 | }); 15 | 16 | it('should generate documents with required fields', () => { 17 | const docs = generateMockDocuments(); 18 | docs.forEach((doc) => { 19 | expect(doc.id).toBeDefined(); 20 | expect(doc.title).toBeDefined(); 21 | expect(doc.content).toBeDefined(); 22 | expect(typeof doc.id).toBe('string'); 23 | expect(typeof doc.title).toBe('string'); 24 | expect(typeof doc.content).toBe('string'); 25 | }); 26 | }); 27 | 28 | it('should have unique document IDs', () => { 29 | const docs = generateMockDocuments(); 30 | const ids = docs.map((doc) => doc.id); 31 | const uniqueIds = new Set(ids); 32 | expect(uniqueIds.size).toBe(ids.length); 33 | }); 34 | 35 | it('should have meaningful content', () => { 36 | const docs = generateMockDocuments(); 37 | docs.forEach((doc) => { 38 | expect(doc.content.length).toBeGreaterThan(50); 39 | }); 40 | }); 41 | }); 42 | 43 | describe('Cosine Similarity', () => { 44 | it('should calculate cosine similarity correctly', () => { 45 | // Test vectors 46 | const v1 = [1, 0, 0]; 47 | const v2 = [1, 0, 0]; 48 | const v3 = [0, 1, 0]; 49 | 50 | // Same vectors should have similarity of 1 51 | const similarity1 = cosineSimilarity(v1, v2); 52 | expect(similarity1).toBeCloseTo(1, 5); 53 | 54 | // Orthogonal vectors should have similarity of 0 55 | const similarity2 = cosineSimilarity(v1, v3); 56 | expect(similarity2).toBeCloseTo(0, 5); 57 | }); 58 | 59 | it('should handle normalized vectors', () => { 60 | const v1 = [0.6, 0.8]; 61 | const v2 = [0.6, 0.8]; 62 | 63 | const similarity = cosineSimilarity(v1, v2); 64 | expect(similarity).toBeCloseTo(1, 5); 65 | }); 66 | 67 | it('should throw error for different dimensions', () => { 68 | const v1 = [1, 0, 0]; 69 | const v2 = [1, 0]; 70 | 71 | expect(() => cosineSimilarity(v1, v2)).toThrow('Vectors must have the same dimension'); 72 | }); 73 | }); 74 | 75 | describe('Vector Embedding Consistency', () => { 76 | it('should ensure all embeddings have same dimension', () => { 77 | // This test verifies that when embeddings are generated by GLM API, 78 | // they all have the same dimension (1024 for embedding-3) 79 | const expectedDimension = 1024; 80 | 81 | // Mock embeddings that would come from GLM API 82 | const mockEmbeddings = [ 83 | new Array(expectedDimension).fill(0.1), 84 | new Array(expectedDimension).fill(0.2), 85 | new Array(expectedDimension).fill(0.3), 86 | ]; 87 | 88 | mockEmbeddings.forEach((embedding) => { 89 | expect(embedding.length).toBe(expectedDimension); 90 | }); 91 | }); 92 | }); 93 | }); 94 | 95 | /** 96 | * Helper function: Calculate cosine similarity between two vectors 97 | */ 98 | function cosineSimilarity(a: number[], b: number[]): number { 99 | if (a.length !== b.length) { 100 | throw new Error('Vectors must have the same dimension'); 101 | } 102 | 103 | let dotProduct = 0; 104 | let normA = 0; 105 | let normB = 0; 106 | 107 | for (let i = 0; i < a.length; i++) { 108 | dotProduct += a[i] * b[i]; 109 | normA += a[i] * a[i]; 110 | normB += b[i] * b[i]; 111 | } 112 | 113 | const denominator = Math.sqrt(normA) * Math.sqrt(normB); 114 | if (denominator === 0) { 115 | return 0; 116 | } 117 | 118 | return dotProduct / denominator; 119 | } 120 | -------------------------------------------------------------------------------- /src/lib/rag/document-loader.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-explicit-any */ 2 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 3 | import { DocumentChunk } from './bm25-retriever'; 4 | 5 | /** 6 | * 文档加载器 - 支持多种格式 7 | */ 8 | export interface LoadedDocument { 9 | content: string; 10 | metadata: { 11 | source: string; 12 | title?: string; 13 | url?: string; 14 | [key: string]: any; 15 | }; 16 | } 17 | 18 | /** 19 | * 文档处理器 - 将文档分割成块 20 | */ 21 | export class DocumentProcessor { 22 | private splitter: RecursiveCharacterTextSplitter; 23 | 24 | constructor(chunkSize: number = 1000, chunkOverlap: number = 200) { 25 | this.splitter = new RecursiveCharacterTextSplitter({ 26 | chunkSize, 27 | chunkOverlap, 28 | separators: ['\n\n', '\n', ' ', ''], 29 | }); 30 | } 31 | 32 | /** 33 | * 处理文档 - 分割成块 34 | */ 35 | async processDocuments(documents: LoadedDocument[]): Promise { 36 | const chunks: DocumentChunk[] = []; 37 | let globalChunkIndex = 0; 38 | 39 | for (const doc of documents) { 40 | const texts = await this.splitter.splitText(doc.content); 41 | 42 | texts.forEach((text, index) => { 43 | chunks.push({ 44 | id: `${doc.metadata.source}-chunk-${index}`, 45 | content: text, 46 | metadata: { 47 | source: doc.metadata.source, 48 | chunkIndex: globalChunkIndex, 49 | pageNumber: index, 50 | title: doc.metadata.title, 51 | url: doc.metadata.url, 52 | }, 53 | }); 54 | globalChunkIndex++; 55 | }); 56 | } 57 | 58 | return chunks; 59 | } 60 | 61 | /** 62 | * 从 Markdown 加载文档 63 | */ 64 | static fromMarkdown(content: string, source: string, title?: string): LoadedDocument { 65 | return { 66 | content, 67 | metadata: { 68 | source, 69 | title: title || source, 70 | }, 71 | }; 72 | } 73 | 74 | /** 75 | * 从 JSON 加载文档 76 | */ 77 | static fromJSON(data: any, source: string): LoadedDocument[] { 78 | if (Array.isArray(data)) { 79 | return data.map((item, index) => ({ 80 | content: typeof item === 'string' ? item : JSON.stringify(item), 81 | metadata: { 82 | source, 83 | title: `${source}-${index}`, 84 | }, 85 | })); 86 | } 87 | 88 | return [ 89 | { 90 | content: JSON.stringify(data), 91 | metadata: { 92 | source, 93 | title: source, 94 | }, 95 | }, 96 | ]; 97 | } 98 | 99 | /** 100 | * 从文本数组加载文档 101 | */ 102 | static fromTexts(texts: string[], source: string): LoadedDocument[] { 103 | return texts.map((text, index) => ({ 104 | content: text, 105 | metadata: { 106 | source, 107 | title: `${source}-${index}`, 108 | }, 109 | })); 110 | } 111 | } 112 | 113 | /** 114 | * LangChain 文档 RAG 工具 115 | * 集成 LangChain 的文档加载和处理能力 116 | */ 117 | export class LangChainDocumentRAG { 118 | private processor: DocumentProcessor; 119 | private documents: LoadedDocument[] = []; 120 | private chunks: DocumentChunk[] = []; 121 | 122 | constructor(chunkSize?: number, chunkOverlap?: number) { 123 | this.processor = new DocumentProcessor(chunkSize, chunkOverlap); 124 | } 125 | 126 | /** 127 | * 添加文档 128 | */ 129 | addDocuments(documents: LoadedDocument[]): void { 130 | this.documents.push(...documents); 131 | } 132 | 133 | /** 134 | * 处理所有文档 135 | */ 136 | async processAll(): Promise { 137 | this.chunks = await this.processor.processDocuments(this.documents); 138 | return this.chunks; 139 | } 140 | 141 | /** 142 | * 获取处理后的块 143 | */ 144 | getChunks(): DocumentChunk[] { 145 | return this.chunks; 146 | } 147 | 148 | /** 149 | * 清空文档 150 | */ 151 | clear(): void { 152 | this.documents = []; 153 | this.chunks = []; 154 | } 155 | 156 | /** 157 | * 获取统计信息 158 | */ 159 | getStats() { 160 | return { 161 | documentCount: this.documents.length, 162 | chunkCount: this.chunks.length, 163 | totalContent: this.chunks.reduce((sum, chunk) => sum + chunk.content.length, 0), 164 | }; 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /src/lib/rag/code-documentation-store.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Code Documentation Store - In-memory cache for LLM-generated code documentation 3 | * Caches LLM results to improve performance and reduce API calls 4 | */ 5 | 6 | export interface CodeDocumentation { 7 | id: string; // Unique identifier (hash of code + language) 8 | nodeId: string; // Graph node ID 9 | nodeLabel: string; // Node name (class/function name) 10 | nodeType: string; // Type: 'class', 'function', 'method', etc. 11 | code: string; // Source code snippet 12 | language: string; // Programming language 13 | documentation: string; // LLM-generated documentation 14 | summary: string; // Brief summary 15 | parameters?: Array<{ 16 | name: string; 17 | type: string; 18 | description: string; 19 | }>; 20 | returnType?: string; 21 | returnDescription?: string; 22 | examples?: string[]; 23 | relatedNodes?: string[]; // IDs of related nodes 24 | createdAt: number; 25 | updatedAt: number; 26 | llmModel?: string; // Which LLM model was used 27 | tokensUsed?: number; // Token count for cost tracking 28 | } 29 | 30 | // In-memory cache for storing documentation 31 | const documentationCache = new Map(); 32 | 33 | /** 34 | * Save documentation to cache 35 | */ 36 | export async function saveDocumentation(doc: CodeDocumentation): Promise { 37 | try { 38 | documentationCache.set(doc.id, doc); 39 | } catch (error) { 40 | console.error('Failed to save documentation:', error); 41 | throw error; 42 | } 43 | } 44 | 45 | /** 46 | * Get documentation by node ID 47 | */ 48 | export async function getDocumentationByNodeId(nodeId: string): Promise { 49 | try { 50 | for (const doc of documentationCache.values()) { 51 | if (doc.nodeId === nodeId) { 52 | return doc; 53 | } 54 | } 55 | return null; 56 | } catch (error) { 57 | console.error('Failed to get documentation:', error); 58 | return null; 59 | } 60 | } 61 | 62 | /** 63 | * Get documentation by ID 64 | */ 65 | export async function getDocumentationById(id: string): Promise { 66 | try { 67 | const doc = documentationCache.get(id); 68 | return doc || null; 69 | } catch (error) { 70 | console.error('Failed to get documentation by ID:', error); 71 | return null; 72 | } 73 | } 74 | 75 | /** 76 | * Get all documentation for a language 77 | */ 78 | export async function getDocumentationByLanguage(language: string): Promise { 79 | try { 80 | const docs: CodeDocumentation[] = []; 81 | for (const doc of documentationCache.values()) { 82 | if (doc.language === language) { 83 | docs.push(doc); 84 | } 85 | } 86 | return docs; 87 | } catch (error) { 88 | console.error('Failed to get documentation by language:', error); 89 | return []; 90 | } 91 | } 92 | 93 | /** 94 | * Delete documentation by ID 95 | */ 96 | export async function deleteDocumentation(id: string): Promise { 97 | try { 98 | documentationCache.delete(id); 99 | } catch (error) { 100 | console.error('Failed to delete documentation:', error); 101 | throw error; 102 | } 103 | } 104 | 105 | /** 106 | * Clear all documentation 107 | */ 108 | export async function clearAllDocumentation(): Promise { 109 | try { 110 | documentationCache.clear(); 111 | } catch (error) { 112 | console.error('Failed to clear documentation:', error); 113 | throw error; 114 | } 115 | } 116 | 117 | /** 118 | * Get cache statistics 119 | */ 120 | export async function getCacheStats(): Promise<{ 121 | totalDocuments: number; 122 | languages: string[]; 123 | totalTokensUsed: number; 124 | }> { 125 | try { 126 | const languages = new Set(); 127 | let totalTokensUsed = 0; 128 | 129 | for (const doc of documentationCache.values()) { 130 | languages.add(doc.language); 131 | totalTokensUsed += doc.tokensUsed || 0; 132 | } 133 | 134 | return { 135 | totalDocuments: documentationCache.size, 136 | languages: Array.from(languages), 137 | totalTokensUsed, 138 | }; 139 | } catch (error) { 140 | console.error('Failed to get cache stats:', error); 141 | return { 142 | totalDocuments: 0, 143 | languages: [], 144 | totalTokensUsed: 0, 145 | }; 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/app/api/rag/generate-documents/route.ts: -------------------------------------------------------------------------------- 1 | import { streamText } from 'ai'; 2 | import { createDeepSeek } from '@ai-sdk/deepseek'; 3 | import { NextRequest, NextResponse } from 'next/server'; 4 | 5 | const deepseek = createDeepSeek({ 6 | apiKey: process.env.DEEPSEEK_API_KEY || '', 7 | }); 8 | 9 | /** 10 | * Calculate BM25 score 11 | * BM25 formula: IDF(qi) * (f(qi, D) * (k1 + 1)) / (f(qi, D) + k1 * (1 - b + b * |D| / avgdl)) 12 | */ 13 | function calculateBM25Score(query: string, document: string, avgDocLength: number = 100): number { 14 | const k1 = 1.5; // term frequency saturation parameter 15 | const b = 0.75; // length normalization parameter 16 | 17 | const queryTerms = query.toLowerCase().split(/\s+/); 18 | const docTerms = document.toLowerCase().split(/\s+/); 19 | const docLength = docTerms.length; 20 | 21 | let score = 0; 22 | 23 | for (const term of queryTerms) { 24 | // Count term frequency in document 25 | const termFreq = docTerms.filter((t) => t.includes(term)).length; 26 | 27 | if (termFreq === 0) continue; 28 | 29 | // Calculate IDF (Inverse Document Frequency) 30 | // Simplified: log(1 + termFreq) 31 | const idf = Math.log(1 + termFreq); 32 | 33 | // Calculate BM25 component 34 | const numerator = termFreq * (k1 + 1); 35 | const denominator = termFreq + k1 * (1 - b + b * (docLength / avgDocLength)); 36 | 37 | score += idf * (numerator / denominator); 38 | } 39 | 40 | return Math.min(1, score / 10); // Normalize to 0-1 41 | } 42 | 43 | export async function POST(request: NextRequest) { 44 | try { 45 | const { query } = await request.json(); 46 | 47 | if (!query || typeof query !== 'string') { 48 | return NextResponse.json({ error: 'Query parameter is required' }, { status: 400 }); 49 | } 50 | 51 | // Use AI to generate mock documents 52 | const result = await streamText({ 53 | model: deepseek('deepseek-chat'), 54 | messages: [ 55 | { 56 | role: 'user', 57 | content: `Generate 5 realistic mock documents related to the query: "${query}" 58 | 59 | Each document should be a short paragraph (2-3 sentences) that could be relevant to the query. 60 | Format your response as a JSON array with this structure: 61 | [ 62 | { 63 | "title": "Document Title", 64 | "content": "Document content here..." 65 | }, 66 | ... 67 | ] 68 | 69 | Only return the JSON array, no other text.`, 70 | }, 71 | ], 72 | temperature: 0.7, 73 | }); 74 | 75 | const fullText = await result.text; 76 | 77 | // Parse the JSON response 78 | const jsonMatch = fullText.match(/\[[\s\S]*\]/); 79 | if (!jsonMatch) { 80 | throw new Error('Failed to parse documents response'); 81 | } 82 | 83 | interface Document { 84 | title: string; 85 | content: string; 86 | } 87 | 88 | const documents = JSON.parse(jsonMatch[0]) as Document[]; 89 | 90 | // Calculate BM25 scores for each document 91 | const avgDocLength = 92 | documents.reduce((sum: number, doc: Document) => sum + doc.content.split(/\s+/).length, 0) / 93 | documents.length; 94 | 95 | const scoredDocuments = documents.map((doc: Document, idx: number) => ({ 96 | id: `doc-${idx}`, 97 | title: doc.title, 98 | content: doc.content, 99 | score: calculateBM25Score(query, doc.content, avgDocLength), 100 | metadata: { 101 | source: `generated-${idx}`, 102 | title: doc.title, 103 | }, 104 | })); 105 | 106 | // Sort by score descending 107 | interface ScoredDocument extends Document { 108 | id: string; 109 | score: number; 110 | } 111 | 112 | scoredDocuments.sort((a: ScoredDocument, b: ScoredDocument) => b.score - a.score); 113 | 114 | return NextResponse.json({ 115 | success: true, 116 | query, 117 | documents: scoredDocuments, 118 | bm25Info: { 119 | formula: 'IDF(qi) * (f(qi, D) * (k1 + 1)) / (f(qi, D) + k1 * (1 - b + b * |D| / avgdl))', 120 | k1: 1.5, 121 | b: 0.75, 122 | avgDocLength: Math.round(avgDocLength), 123 | }, 124 | usage: result.usage, 125 | }); 126 | } catch (error) { 127 | console.error('Generate documents error:', error); 128 | return NextResponse.json( 129 | { 130 | error: error instanceof Error ? error.message : 'Unknown error', 131 | }, 132 | { status: 500 } 133 | ); 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/components/treesitter/TreeViewer.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React, { useState, useCallback } from 'react'; 4 | import type { TreeNode } from '@/lib/treesitter-utils'; 5 | 6 | interface TreeViewerProps { 7 | tree: TreeNode | null; 8 | isLoading: boolean; 9 | onNodeClick?: (node: TreeNode) => void; 10 | highlightedNodeId?: number; 11 | } 12 | 13 | function TreeNodeComponent({ 14 | node, 15 | depth = 0, 16 | onNodeClick, 17 | isHighlighted = false, 18 | highlightedNodeId, 19 | }: { 20 | node: TreeNode; 21 | depth?: number; 22 | onNodeClick?: (node: TreeNode) => void; 23 | isHighlighted?: boolean; 24 | highlightedNodeId?: number; 25 | }) { 26 | const [expanded, setExpanded] = useState(depth < 2); 27 | const hasChildren = node.children && node.children.length > 0; 28 | 29 | // Determine node display name and styling 30 | let displayName = node.type; 31 | let nodeClass = 'text-blue-600'; 32 | 33 | if (node.isMissing) { 34 | displayName = `MISSING ${node.isNamed ? node.type : `"${node.type}"`}`; 35 | nodeClass = 'text-red-600 font-semibold'; 36 | } else if (node.isError) { 37 | displayName = 'ERROR'; 38 | nodeClass = 'text-red-600 font-semibold'; 39 | } else if (node.isNamed) { 40 | nodeClass = 'text-blue-600 font-semibold'; 41 | } else if (node.isAnonymous) { 42 | displayName = `"${node.type}"`; 43 | nodeClass = 'text-slate-500'; 44 | } 45 | 46 | const fieldNameDisplay = node.fieldName ? `${node.fieldName}: ` : ''; 47 | const positionInfo = `[${node.startPosition.row}, ${node.startPosition.column}] - [${node.endPosition.row}, ${node.endPosition.column}]`; 48 | 49 | const handleClick = useCallback(() => { 50 | setExpanded(!expanded); 51 | if (onNodeClick) { 52 | onNodeClick(node); 53 | } 54 | }, [expanded, node, onNodeClick]); 55 | 56 | return ( 57 |
58 |
64 | {hasChildren && ( 65 | {expanded ? '▼' : '▶'} 66 | )} 67 | {!hasChildren && } 68 | 69 | {fieldNameDisplay} 70 | {displayName} 71 | {positionInfo} 72 |
73 | 74 | {expanded && hasChildren && ( 75 |
76 | {node.children!.map((child, idx) => ( 77 | 84 | ))} 85 |
86 | )} 87 |
88 | ); 89 | } 90 | 91 | export default function TreeViewer({ 92 | tree, 93 | isLoading, 94 | onNodeClick, 95 | highlightedNodeId, 96 | }: TreeViewerProps) { 97 | return ( 98 |
99 |
100 |

Syntax Tree

101 |

102 | Parsed syntax tree structure with field names and positions 103 |

104 |
105 | 106 |
107 | {isLoading ? ( 108 |
109 |
110 |
111 |

Parsing...

112 |
113 |
114 | ) : tree ? ( 115 | 120 | ) : ( 121 |

No tree to display

122 | )} 123 |
124 |
125 | ); 126 | } 127 | -------------------------------------------------------------------------------- /tests/navigation.test.ts: -------------------------------------------------------------------------------- 1 | import { test, expect } from '@playwright/test'; 2 | 3 | test.describe('Navigation System', () => { 4 | test('should display main navigation on all pages', async ({ page }) => { 5 | // Test main page 6 | await page.goto('/'); 7 | await expect(page.locator('header')).toBeVisible(); 8 | await expect(page.getByText('Context Engineer Workbench')).toBeVisible(); 9 | 10 | // Test navigation items are present 11 | await expect(page.getByText('Workbench')).toBeVisible(); 12 | await expect(page.getByText('Keyword Search')).toBeVisible(); 13 | await expect(page.getByText('Vector Search')).toBeVisible(); 14 | await expect(page.getByText('Graph Search')).toBeVisible(); 15 | await expect(page.getByText('TreeSitter')).toBeVisible(); 16 | }); 17 | 18 | test('should navigate between pages correctly', async ({ page }) => { 19 | await page.goto('/'); 20 | 21 | // Navigate to Keyword Search 22 | await page.getByText('Keyword Search').click(); 23 | await expect(page).toHaveURL('/rag-keyword-playground'); 24 | await expect(page.getByText('RAG Keyword Search Playground')).toBeVisible(); 25 | 26 | // Navigate to Vector Search 27 | await page.getByText('Vector Search').click(); 28 | await expect(page).toHaveURL('/rag-vector-playground'); 29 | await expect(page.getByText('RAG Vector Search Playground')).toBeVisible(); 30 | 31 | // Navigate back to home 32 | await page.getByText('Workbench').click(); 33 | await expect(page).toHaveURL('/'); 34 | }); 35 | 36 | test('should show breadcrumbs on playground pages', async ({ page }) => { 37 | await page.goto('/rag-keyword-playground'); 38 | 39 | // Check breadcrumbs are present 40 | await expect(page.getByText('Home')).toBeVisible(); 41 | await expect(page.getByText('Keyword Search')).toBeVisible(); 42 | 43 | // Test breadcrumb navigation 44 | await page.getByText('Home').click(); 45 | await expect(page).toHaveURL('/'); 46 | }); 47 | 48 | test('should display page headers with flow descriptions', async ({ page }) => { 49 | await page.goto('/rag-keyword-playground'); 50 | 51 | await expect(page.getByText('RAG Keyword Search Playground')).toBeVisible(); 52 | await expect( 53 | page.getByText('Learn how keyword-based retrieval works in RAG systems') 54 | ).toBeVisible(); 55 | await expect( 56 | page.getByText('Flow: Query → Rewrite → Keyword Search → BM25 Scoring → Results') 57 | ).toBeVisible(); 58 | }); 59 | 60 | test('should show quick navigation search', async ({ page }) => { 61 | await page.goto('/'); 62 | 63 | // Look for search button or trigger 64 | const searchButton = page.getByText('Search'); 65 | if (await searchButton.isVisible()) { 66 | await searchButton.click(); 67 | // Quick navigation modal should appear 68 | await expect(page.getByPlaceholder('Search pages...')).toBeVisible(); 69 | } 70 | }); 71 | 72 | test('should have responsive navigation on mobile', async ({ page }) => { 73 | // Set mobile viewport 74 | await page.setViewportSize({ width: 375, height: 667 }); 75 | await page.goto('/'); 76 | 77 | // Navigation should still be functional on mobile 78 | await expect(page.locator('header')).toBeVisible(); 79 | await expect(page.getByText('Context Engineer Workbench')).toBeVisible(); 80 | }); 81 | 82 | test('should show active state for current page', async ({ page }) => { 83 | await page.goto('/rag-keyword-playground'); 84 | 85 | // The Keyword Search navigation item should have active styling 86 | const keywordSearchLink = page.getByText('Keyword Search'); 87 | await expect(keywordSearchLink).toBeVisible(); 88 | 89 | // Check if it has active state classes (this might need adjustment based on actual implementation) 90 | const linkElement = await keywordSearchLink.locator('..').first(); 91 | const classes = await linkElement.getAttribute('class'); 92 | expect(classes).toContain('bg-indigo-100'); // Active state class 93 | }); 94 | }); 95 | 96 | test.describe('Layout System', () => { 97 | test('should use correct layout for workbench page', async ({ page }) => { 98 | await page.goto('/'); 99 | 100 | // Workbench should have sidebar 101 | await expect(page.getByText('Configuration Center')).toBeVisible(); 102 | await expect(page.getByText('Context Assembly View')).toBeVisible(); 103 | await expect(page.getByText('Chat & Interaction')).toBeVisible(); 104 | }); 105 | 106 | test('should use correct layout for playground pages', async ({ page }) => { 107 | await page.goto('/rag-keyword-playground'); 108 | 109 | // Playground pages should have standard layout with page header 110 | await expect(page.getByText('RAG Keyword Search Playground')).toBeVisible(); 111 | await expect(page.getByText('Pipeline & Papers')).toBeVisible(); 112 | }); 113 | }); 114 | -------------------------------------------------------------------------------- /src/lib/metrics.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * LLM API Performance Metrics 3 | * 4 | * Tracks and calculates key performance indicators for LLM API calls: 5 | * - FirstToken Latency: Time to receive first token (TTFT) 6 | * - Total Time: End-to-end latency from request to complete response 7 | * - Throughput: Tokens per second 8 | * - Token counts: Input, output, and total tokens 9 | */ 10 | 11 | export interface APIMetrics { 12 | // Timing metrics (in milliseconds) 13 | firstTokenLatency: number; // Time to first token (TTFT) 14 | totalLatency: number; // Total end-to-end time 15 | 16 | // Token metrics 17 | inputTokens: number; 18 | outputTokens: number; 19 | totalTokens: number; 20 | 21 | // Calculated metrics 22 | tokensPerSecond: number; // Output tokens / (totalLatency / 1000) 23 | averageLatencyPerToken: number; // totalLatency / outputTokens 24 | 25 | // Metadata 26 | timestamp: number; // When the request started 27 | model: string; 28 | provider: string; 29 | } 30 | 31 | export interface MetricsCollector { 32 | startTime: number; 33 | firstTokenTime?: number; 34 | inputTokens: number; 35 | outputTokens: number; 36 | model: string; 37 | provider: string; 38 | } 39 | 40 | /** 41 | * Create a new metrics collector 42 | */ 43 | export function createMetricsCollector(model: string, provider: string): MetricsCollector { 44 | return { 45 | startTime: Date.now(), 46 | inputTokens: 0, 47 | outputTokens: 0, 48 | model, 49 | provider, 50 | }; 51 | } 52 | 53 | /** 54 | * Record the first token received 55 | */ 56 | export function recordFirstToken(collector: MetricsCollector): void { 57 | if (!collector.firstTokenTime) { 58 | collector.firstTokenTime = Date.now(); 59 | } 60 | } 61 | 62 | /** 63 | * Finalize metrics collection and calculate derived metrics 64 | */ 65 | export function finalizeMetrics(collector: MetricsCollector): APIMetrics { 66 | const now = Date.now(); 67 | const totalLatency = now - collector.startTime; 68 | const firstTokenLatency = collector.firstTokenTime 69 | ? collector.firstTokenTime - collector.startTime 70 | : totalLatency; 71 | 72 | const totalTokens = collector.inputTokens + collector.outputTokens; 73 | const tokensPerSecond = 74 | collector.outputTokens > 0 ? collector.outputTokens / (totalLatency / 1000) : 0; 75 | const averageLatencyPerToken = 76 | collector.outputTokens > 0 ? totalLatency / collector.outputTokens : 0; 77 | 78 | return { 79 | firstTokenLatency, 80 | totalLatency, 81 | inputTokens: collector.inputTokens, 82 | outputTokens: collector.outputTokens, 83 | totalTokens, 84 | tokensPerSecond, 85 | averageLatencyPerToken, 86 | timestamp: collector.startTime, 87 | model: collector.model, 88 | provider: collector.provider, 89 | }; 90 | } 91 | 92 | /** 93 | * Format time duration for display 94 | * Converts milliseconds to appropriate unit (ms, s, etc.) 95 | * For durations > 10s, displays in seconds instead of milliseconds 96 | */ 97 | export function formatDuration(ms: number): string { 98 | if (ms < 10000) { 99 | // For durations less than 10 seconds, show in milliseconds 100 | return `${Math.round(ms)}ms`; 101 | } 102 | const seconds = ms / 1000; 103 | if (seconds < 60) { 104 | return `${seconds.toFixed(2)}s`; 105 | } 106 | const minutes = seconds / 60; 107 | return `${minutes.toFixed(2)}m`; 108 | } 109 | 110 | /** 111 | * Format metrics for display 112 | */ 113 | export function formatMetrics(metrics: APIMetrics): Record { 114 | return { 115 | 'First Token Latency': formatDuration(metrics.firstTokenLatency), 116 | 'Total Latency': formatDuration(metrics.totalLatency), 117 | 'Input Tokens': metrics.inputTokens.toString(), 118 | 'Output Tokens': metrics.outputTokens.toString(), 119 | 'Total Tokens': metrics.totalTokens.toString(), 120 | Throughput: `${metrics.tokensPerSecond.toFixed(2)} tokens/s`, 121 | 'Avg Latency/Token': formatDuration(metrics.averageLatencyPerToken), 122 | }; 123 | } 124 | 125 | /** 126 | * Calculate metrics from message metadata 127 | * Useful when metrics are embedded in the response 128 | */ 129 | export function calculateMetricsFromResponse( 130 | startTime: number, 131 | endTime: number, 132 | firstTokenTime: number | undefined, 133 | inputTokens: number, 134 | outputTokens: number, 135 | model: string, 136 | provider: string 137 | ): APIMetrics { 138 | const totalLatency = endTime - startTime; 139 | const firstTokenLatency = firstTokenTime ? firstTokenTime - startTime : totalLatency; 140 | 141 | const totalTokens = inputTokens + outputTokens; 142 | const tokensPerSecond = outputTokens > 0 ? outputTokens / (totalLatency / 1000) : 0; 143 | const averageLatencyPerToken = outputTokens > 0 ? totalLatency / outputTokens : 0; 144 | 145 | return { 146 | firstTokenLatency, 147 | totalLatency, 148 | inputTokens, 149 | outputTokens, 150 | totalTokens, 151 | tokensPerSecond, 152 | averageLatencyPerToken, 153 | timestamp: startTime, 154 | model, 155 | provider, 156 | }; 157 | } 158 | -------------------------------------------------------------------------------- /src/lib/rag/mock-data-generator.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Mock data generator for vector search playground 3 | * Generates realistic sample documents for demonstration 4 | */ 5 | 6 | export interface MockDocument { 7 | id: string; 8 | title: string; 9 | content: string; 10 | } 11 | 12 | /** 13 | * Generate mock documents for vector search 14 | */ 15 | export function generateMockDocuments(): MockDocument[] { 16 | return [ 17 | { 18 | id: 'doc-1', 19 | title: 'TypeScript Introduction', 20 | content: 21 | 'TypeScript is a typed superset of JavaScript that compiles to plain JavaScript. It adds optional static typing to the language. TypeScript enables developers to catch errors at compile time rather than runtime, improving code quality and maintainability. The language supports interfaces, generics, and advanced type features that make large-scale application development more manageable.', 22 | }, 23 | { 24 | id: 'doc-2', 25 | title: 'JavaScript Overview', 26 | content: 27 | 'JavaScript is a versatile programming language used for web development, server-side programming with Node.js, and more. It is the primary language for interactive web pages and has evolved significantly since its inception. JavaScript supports functional programming, object-oriented programming, and event-driven programming paradigms. Modern JavaScript includes features like async/await, destructuring, and modules.', 28 | }, 29 | { 30 | id: 'doc-3', 31 | title: 'React Framework', 32 | content: 33 | 'React is a JavaScript library for building user interfaces with reusable components. It uses a virtual DOM for efficient rendering and updates. React follows a component-based architecture where each component manages its own state and lifecycle. The library emphasizes declarative programming, making it easier to reason about UI changes. React has become the most popular frontend framework in the JavaScript ecosystem.', 34 | }, 35 | { 36 | id: 'doc-4', 37 | title: 'LangChain Framework', 38 | content: 39 | 'LangChain is a framework for developing applications powered by language models. It provides tools for chains, agents, and memory management. LangChain simplifies the process of building complex applications that leverage large language models. The framework includes components for prompt management, output parsing, and integration with various LLM providers. It enables developers to create sophisticated AI applications with minimal boilerplate code.', 40 | }, 41 | { 42 | id: 'doc-5', 43 | title: 'RAG Concept', 44 | content: 45 | 'RAG (Retrieval-Augmented Generation) combines document retrieval with language model generation for more accurate responses. This approach retrieves relevant documents from a knowledge base and uses them as context for generating answers. RAG improves the accuracy and relevance of LLM responses by grounding them in factual information. It reduces hallucinations and enables the model to provide citations for its answers. RAG is particularly useful for question-answering systems and knowledge-intensive applications.', 46 | }, 47 | { 48 | id: 'doc-6', 49 | title: 'Vector Databases', 50 | content: 51 | 'Vector databases are specialized databases designed to store and search high-dimensional vectors efficiently. They use techniques like approximate nearest neighbor search to find similar vectors quickly. Vector databases are essential for semantic search, recommendation systems, and machine learning applications. Popular vector databases include Pinecone, Weaviate, and Milvus. They enable efficient similarity search across millions or billions of vectors.', 52 | }, 53 | { 54 | id: 'doc-7', 55 | title: 'Embeddings and Semantic Search', 56 | content: 57 | 'Embeddings are numerical representations of text that capture semantic meaning. They are generated by neural networks and allow for semantic similarity comparisons. Semantic search uses embeddings to find documents with similar meaning rather than exact keyword matches. This approach is more powerful than traditional keyword-based search for understanding user intent. Embeddings enable applications like recommendation systems, duplicate detection, and semantic clustering.', 58 | }, 59 | { 60 | id: 'doc-8', 61 | title: 'Machine Learning Basics', 62 | content: 63 | 'Machine learning is a subset of artificial intelligence that enables systems to learn from data without being explicitly programmed. It involves training models on datasets to recognize patterns and make predictions. Common machine learning tasks include classification, regression, clustering, and dimensionality reduction. Machine learning powers many modern applications including recommendation systems, image recognition, and natural language processing.', 64 | }, 65 | ]; 66 | } 67 | 68 | /** 69 | * Get a mock document by ID 70 | */ 71 | export function getMockDocumentById(id: string): MockDocument | undefined { 72 | const docs = generateMockDocuments(); 73 | return docs.find((doc) => doc.id === id); 74 | } 75 | 76 | /** 77 | * Get all mock documents 78 | */ 79 | export function getAllMockDocuments(): MockDocument[] { 80 | return generateMockDocuments(); 81 | } 82 | -------------------------------------------------------------------------------- /src/lib/tools/base.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-explicit-any */ 2 | /** 3 | * Tool 抽象基类 4 | * 定义所有工具的基本接口和方法 5 | */ 6 | 7 | /** 8 | * 工具参数的 JSON Schema 定义 9 | */ 10 | export interface ToolParameterSchema { 11 | type: 'object'; 12 | properties: Record< 13 | string, 14 | { 15 | type: string; 16 | description: string; 17 | enum?: string[]; 18 | default?: any; 19 | } 20 | >; 21 | required: string[]; 22 | } 23 | 24 | /** 25 | * 工具定义接口 26 | */ 27 | export interface ToolDefinition { 28 | name: string; 29 | description: string; 30 | parameters: ToolParameterSchema; 31 | } 32 | 33 | /** 34 | * 工具执行结果 35 | */ 36 | export interface ToolExecutionResult { 37 | success: boolean; 38 | data?: any; 39 | error?: string; 40 | executionTime: number; 41 | metadata?: Record; 42 | } 43 | 44 | /** 45 | * Tool 抽象基类 46 | * 所有具体的工具都应该继承这个类 47 | */ 48 | export abstract class Tool { 49 | protected name: string; 50 | protected description: string; 51 | protected parameters: ToolParameterSchema; 52 | 53 | constructor(definition: ToolDefinition) { 54 | this.name = definition.name; 55 | this.description = definition.description; 56 | this.parameters = definition.parameters; 57 | } 58 | 59 | /** 60 | * 获取工具定义 61 | */ 62 | getDefinition(): ToolDefinition { 63 | return { 64 | name: this.name, 65 | description: this.description, 66 | parameters: this.parameters, 67 | }; 68 | } 69 | 70 | /** 71 | * 获取工具名称 72 | */ 73 | getName(): string { 74 | return this.name; 75 | } 76 | 77 | /** 78 | * 获取工具描述 79 | */ 80 | getDescription(): string { 81 | return this.description; 82 | } 83 | 84 | /** 85 | * 获取工具参数模式 86 | */ 87 | getParameters(): ToolParameterSchema { 88 | return this.parameters; 89 | } 90 | 91 | /** 92 | * 验证参数 93 | */ 94 | validateParameters(params: Record): { valid: boolean; errors: string[] } { 95 | const errors: string[] = []; 96 | 97 | // 检查必需参数 98 | for (const required of this.parameters.required) { 99 | if (!(required in params)) { 100 | errors.push(`Missing required parameter: ${required}`); 101 | } 102 | } 103 | 104 | // 检查参数类型 105 | for (const [key, value] of Object.entries(params)) { 106 | if (key in this.parameters.properties) { 107 | const schema = this.parameters.properties[key]; 108 | const actualType = typeof value; 109 | 110 | if (schema.type === 'number' && actualType !== 'number') { 111 | errors.push(`Parameter ${key} should be a number, got ${actualType}`); 112 | } else if (schema.type === 'string' && actualType !== 'string') { 113 | errors.push(`Parameter ${key} should be a string, got ${actualType}`); 114 | } else if (schema.type === 'array' && !Array.isArray(value)) { 115 | errors.push(`Parameter ${key} should be an array, got ${actualType}`); 116 | } 117 | 118 | // 检查枚举值 119 | if (schema.enum && !schema.enum.includes(value)) { 120 | errors.push(`Parameter ${key} must be one of: ${schema.enum.join(', ')}`); 121 | } 122 | } 123 | } 124 | 125 | return { 126 | valid: errors.length === 0, 127 | errors, 128 | }; 129 | } 130 | 131 | /** 132 | * 执行工具(抽象方法,由子类实现) 133 | */ 134 | abstract execute(params: Record): Promise; 135 | 136 | /** 137 | * 调用工具的公共方法 138 | */ 139 | async call(params: Record): Promise { 140 | const startTime = Date.now(); 141 | 142 | // 验证参数 143 | const validation = this.validateParameters(params); 144 | if (!validation.valid) { 145 | return { 146 | success: false, 147 | error: `Parameter validation failed: ${validation.errors.join('; ')}`, 148 | executionTime: Date.now() - startTime, 149 | }; 150 | } 151 | 152 | try { 153 | const result = await this.execute(params); 154 | result.executionTime = Date.now() - startTime; 155 | return result; 156 | } catch (error) { 157 | return { 158 | success: false, 159 | error: error instanceof Error ? error.message : String(error), 160 | executionTime: Date.now() - startTime, 161 | }; 162 | } 163 | } 164 | } 165 | 166 | /** 167 | * 工具注册表 168 | */ 169 | export class ToolRegistry { 170 | private tools: Map = new Map(); 171 | 172 | /** 173 | * 注册工具 174 | */ 175 | register(tool: Tool): void { 176 | this.tools.set(tool.getName(), tool); 177 | } 178 | 179 | /** 180 | * 获取工具 181 | */ 182 | getTool(name: string): Tool | undefined { 183 | return this.tools.get(name); 184 | } 185 | 186 | /** 187 | * 获取所有工具定义 188 | */ 189 | getAllDefinitions(): ToolDefinition[] { 190 | return Array.from(this.tools.values()).map((tool) => tool.getDefinition()); 191 | } 192 | 193 | /** 194 | * 执行工具 195 | */ 196 | async executeTool(name: string, params: Record): Promise { 197 | const tool = this.getTool(name); 198 | if (!tool) { 199 | return { 200 | success: false, 201 | error: `Tool not found: ${name}`, 202 | executionTime: 0, 203 | }; 204 | } 205 | 206 | return tool.call(params); 207 | } 208 | 209 | /** 210 | * 列出所有工具名称 211 | */ 212 | listTools(): string[] { 213 | return Array.from(this.tools.keys()); 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /src/lib/rag/__tests__/langchain-rag.test.ts: -------------------------------------------------------------------------------- 1 | import { LangChainRAGRetriever, createRAGFromMarkdown } from '../langchain-rag-retriever'; 2 | import { DocumentProcessor } from '../document-loader'; 3 | 4 | describe('LangChain RAG Retriever', () => { 5 | let retriever: LangChainRAGRetriever; 6 | 7 | beforeEach(() => { 8 | retriever = new LangChainRAGRetriever(2); 9 | }); 10 | 11 | test('should add documents and retrieve them', async () => { 12 | const documents = [ 13 | { 14 | content: 'TypeScript is a programming language that builds on JavaScript.', 15 | metadata: { source: 'docs', title: 'TypeScript Intro' }, 16 | }, 17 | { 18 | content: 'JavaScript is a versatile language used for web development.', 19 | metadata: { source: 'docs', title: 'JavaScript Intro' }, 20 | }, 21 | ]; 22 | 23 | await retriever.addDocuments(documents); 24 | const stats = retriever.getStats(); 25 | 26 | expect(stats.documentCount).toBe(2); 27 | expect(stats.chunkCount).toBeGreaterThan(0); 28 | }); 29 | 30 | test('should retrieve relevant documents', async () => { 31 | const documents = [ 32 | { 33 | content: 'TypeScript adds static typing to JavaScript.', 34 | metadata: { source: 'docs', title: 'TypeScript' }, 35 | }, 36 | { 37 | content: 'React is a JavaScript library for building UIs.', 38 | metadata: { source: 'docs', title: 'React' }, 39 | }, 40 | { 41 | content: 'Python is used for data science and machine learning.', 42 | metadata: { source: 'docs', title: 'Python' }, 43 | }, 44 | ]; 45 | 46 | await retriever.addDocuments(documents); 47 | const results = await retriever.invoke('TypeScript'); 48 | 49 | expect(results.length).toBeGreaterThan(0); 50 | expect(results[0].pageContent).toContain('TypeScript'); 51 | }); 52 | 53 | test('should include metadata in results', async () => { 54 | const documents = [ 55 | { 56 | content: 'LangChain is a framework for developing applications with LLMs.', 57 | metadata: { source: 'docs', title: 'LangChain', url: 'https://langchain.com' }, 58 | }, 59 | ]; 60 | 61 | await retriever.addDocuments(documents); 62 | const results = await retriever.invoke('LangChain'); 63 | 64 | expect(results.length).toBeGreaterThan(0); 65 | expect(results[0].metadata).toHaveProperty('source'); 66 | expect(results[0].metadata).toHaveProperty('score'); 67 | expect(results[0].metadata).toHaveProperty('rank'); 68 | }); 69 | 70 | test('should handle markdown documents', async () => { 71 | const markdownDocs = [ 72 | { 73 | content: '# TypeScript Guide\n\nTypeScript is a typed superset of JavaScript.', 74 | source: 'typescript-guide', 75 | title: 'TypeScript Guide', 76 | }, 77 | { 78 | content: '# JavaScript Basics\n\nJavaScript is the language of the web.', 79 | source: 'js-basics', 80 | title: 'JavaScript Basics', 81 | }, 82 | ]; 83 | 84 | const ragRetriever = await createRAGFromMarkdown(markdownDocs, 2); 85 | const results = await ragRetriever.invoke('TypeScript'); 86 | 87 | expect(results.length).toBeGreaterThan(0); 88 | expect(results[0].pageContent).toContain('TypeScript'); 89 | }); 90 | 91 | test('should respect topK parameter', async () => { 92 | const documents = [ 93 | { 94 | content: 'Document 1 about TypeScript', 95 | metadata: { source: 'docs', title: 'Doc1' }, 96 | }, 97 | { 98 | content: 'Document 2 about TypeScript', 99 | metadata: { source: 'docs', title: 'Doc2' }, 100 | }, 101 | { 102 | content: 'Document 3 about TypeScript', 103 | metadata: { source: 'docs', title: 'Doc3' }, 104 | }, 105 | ]; 106 | 107 | const topKRetriever = new LangChainRAGRetriever(1); 108 | await topKRetriever.addDocuments(documents); 109 | const results = await topKRetriever.invoke('TypeScript'); 110 | 111 | expect(results.length).toBeLessThanOrEqual(1); 112 | }); 113 | 114 | test('should clear documents', async () => { 115 | const documents = [ 116 | { 117 | content: 'Test document', 118 | metadata: { source: 'docs', title: 'Test' }, 119 | }, 120 | ]; 121 | 122 | await retriever.addDocuments(documents); 123 | let stats = retriever.getStats(); 124 | expect(stats.chunkCount).toBeGreaterThan(0); 125 | 126 | retriever.clear(); 127 | stats = retriever.getStats(); 128 | expect(stats.documentCount).toBe(0); 129 | expect(stats.chunkCount).toBe(0); 130 | }); 131 | 132 | test('should process documents with DocumentProcessor', async () => { 133 | const processor = new DocumentProcessor(500, 100); 134 | const documents = [ 135 | { 136 | content: 'This is a long document. '.repeat(50), 137 | metadata: { source: 'docs', title: 'Long Doc' }, 138 | }, 139 | ]; 140 | 141 | const chunks = await processor.processDocuments(documents); 142 | expect(chunks.length).toBeGreaterThan(1); 143 | expect(chunks[0].metadata.chunkIndex).toBe(0); 144 | }); 145 | 146 | test('should handle empty queries gracefully', async () => { 147 | const documents = [ 148 | { 149 | content: 'Sample document content', 150 | metadata: { source: 'docs', title: 'Sample' }, 151 | }, 152 | ]; 153 | 154 | await retriever.addDocuments(documents); 155 | const results = await retriever.invoke(''); 156 | 157 | expect(Array.isArray(results)).toBe(true); 158 | }); 159 | }); 160 | -------------------------------------------------------------------------------- /src/lib/rag/bm25-retriever.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-explicit-any */ 2 | /** 3 | * BM25 算法实现的关键词检索 4 | * 自实现的 BM25 算法 5 | */ 6 | 7 | /** 8 | * 简单的 BM25 算法实现 9 | */ 10 | class BM25 { 11 | private documents: string[] = []; 12 | private docFreq: Map[] = []; 13 | private avgDocLength: number = 0; 14 | private docLengths: number[] = []; 15 | private k1: number = 1.5; 16 | private b: number = 0.75; 17 | private idf: Map = new Map(); 18 | 19 | constructor(documents: string[]) { 20 | this.documents = documents; 21 | this.buildIndex(); 22 | } 23 | 24 | private buildIndex(): void { 25 | const N = this.documents.length; 26 | let totalLength = 0; 27 | 28 | // 计算文档频率和文档长度 29 | for (const doc of this.documents) { 30 | const tokens = this.tokenize(doc); 31 | this.docLengths.push(tokens.length); 32 | totalLength += tokens.length; 33 | 34 | const freq = new Map(); 35 | for (const token of tokens) { 36 | freq.set(token, (freq.get(token) || 0) + 1); 37 | } 38 | this.docFreq.push(freq); 39 | } 40 | 41 | this.avgDocLength = totalLength / N; 42 | 43 | // 计算 IDF 44 | const docFreqCount = new Map(); 45 | for (const freq of this.docFreq) { 46 | for (const token of freq.keys()) { 47 | docFreqCount.set(token, (docFreqCount.get(token) || 0) + 1); 48 | } 49 | } 50 | 51 | for (const [token, count] of docFreqCount) { 52 | this.idf.set(token, Math.log((N - count + 0.5) / (count + 0.5) + 1)); 53 | } 54 | } 55 | 56 | private tokenize(text: string): string[] { 57 | return text 58 | .toLowerCase() 59 | .split(/\s+/) 60 | .filter((token) => token.length > 0); 61 | } 62 | 63 | search(query: string): number[] { 64 | const tokens = this.tokenize(query); 65 | const scores: number[] = new Array(this.documents.length).fill(0); 66 | 67 | for (const token of tokens) { 68 | const idf = this.idf.get(token) || 0; 69 | 70 | for (let i = 0; i < this.documents.length; i++) { 71 | const freq = this.docFreq[i].get(token) || 0; 72 | const docLength = this.docLengths[i]; 73 | 74 | const numerator = freq * (this.k1 + 1); 75 | const denominator = 76 | freq + this.k1 * (1 - this.b + this.b * (docLength / this.avgDocLength)); 77 | 78 | scores[i] += idf * (numerator / denominator); 79 | } 80 | } 81 | 82 | return scores; 83 | } 84 | } 85 | 86 | /** 87 | * 文档块接口 88 | */ 89 | export interface DocumentChunk { 90 | id: string; 91 | content: string; 92 | metadata: { 93 | source: string; 94 | chunkIndex: number; 95 | pageNumber?: number; 96 | [key: string]: any; 97 | }; 98 | embedding?: number[]; 99 | } 100 | 101 | /** 102 | * 检索结果项 103 | */ 104 | export interface RetrievalResultItem { 105 | chunk: DocumentChunk; 106 | score: number; 107 | rank: number; 108 | } 109 | 110 | /** 111 | * 检索结果 112 | */ 113 | export interface RetrievalResult { 114 | chunks: RetrievalResultItem[]; 115 | query: string; 116 | retrieverName: string; 117 | totalTime: number; 118 | } 119 | 120 | /** 121 | * BM25 检索器 122 | * 使用 BM25 算法进行关键词检索 123 | */ 124 | export class BM25Retriever { 125 | private documents: DocumentChunk[] = []; 126 | private bm25: BM25 | null = null; 127 | private documentTexts: string[] = []; 128 | 129 | /** 130 | * 添加文档到索引 131 | */ 132 | addDocuments(docs: DocumentChunk[]): void { 133 | this.documents.push(...docs); 134 | this.rebuildIndex(); 135 | } 136 | 137 | /** 138 | * 清空所有文档 139 | */ 140 | clearDocuments(): void { 141 | this.documents = []; 142 | this.documentTexts = []; 143 | this.bm25 = null; 144 | } 145 | 146 | /** 147 | * 重建 BM25 索引 148 | */ 149 | private rebuildIndex(): void { 150 | // 提取文档文本 151 | this.documentTexts = this.documents.map((doc) => doc.content); 152 | 153 | // 创建 BM25 实例 154 | this.bm25 = new BM25(this.documentTexts); 155 | } 156 | 157 | /** 158 | * 执行检索 159 | */ 160 | async retrieve(query: string, topK: number = 3): Promise { 161 | const startTime = Date.now(); 162 | 163 | if (!this.bm25 || this.documents.length === 0 || !query.trim()) { 164 | return { 165 | chunks: [], 166 | query, 167 | retrieverName: 'bm25', 168 | totalTime: Date.now() - startTime, 169 | }; 170 | } 171 | 172 | try { 173 | // 使用 BM25 进行检索 174 | const scores = this.bm25.search(query); 175 | 176 | // 按分数排序并取前 topK,只返回分数大于 0 的结果 177 | const results = scores 178 | .map((score, index) => ({ 179 | index, 180 | score, 181 | })) 182 | .filter((result) => result.score > 0) 183 | .sort((a, b) => b.score - a.score) 184 | .slice(0, topK) 185 | .map((result, rank) => ({ 186 | chunk: this.documents[result.index], 187 | score: result.score, 188 | rank: rank + 1, 189 | })); 190 | 191 | return { 192 | chunks: results, 193 | query, 194 | retrieverName: 'bm25', 195 | totalTime: Date.now() - startTime, 196 | }; 197 | } catch (error) { 198 | console.error('BM25 retrieval error:', error); 199 | return { 200 | chunks: [], 201 | query, 202 | retrieverName: 'bm25', 203 | totalTime: Date.now() - startTime, 204 | }; 205 | } 206 | } 207 | 208 | /** 209 | * 获取文档数量 210 | */ 211 | getDocumentCount(): number { 212 | return this.documents.length; 213 | } 214 | 215 | /** 216 | * 获取所有文档 217 | */ 218 | getDocuments(): DocumentChunk[] { 219 | return [...this.documents]; 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /src/app/api/rag/generate-code-documentation/route.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * POST /api/rag/generate-code-documentation 3 | * Generate documentation for code nodes using LLM 4 | * Results are cached in RxDB to improve performance 5 | */ 6 | 7 | import { generateText } from 'ai'; 8 | import { createDeepSeek } from '@ai-sdk/deepseek'; 9 | import { NextRequest, NextResponse } from 'next/server'; 10 | import { saveDocumentation, getDocumentationById } from '@/lib/rag/code-documentation-store'; 11 | import crypto from 'crypto'; 12 | 13 | const deepseek = createDeepSeek({ 14 | apiKey: process.env.DEEPSEEK_API_KEY || '', 15 | }); 16 | 17 | interface DocumentationRequest { 18 | nodeId: string; 19 | nodeLabel: string; 20 | nodeType: string; 21 | code: string; 22 | language: string; 23 | relatedNodes?: string[]; 24 | } 25 | 26 | /** 27 | * Generate a unique ID for documentation based on code content 28 | */ 29 | function generateDocumentationId(code: string, language: string): string { 30 | const hash = crypto.createHash('sha256').update(`${code}:${language}`).digest('hex'); 31 | return `doc_${hash.substring(0, 16)}`; 32 | } 33 | 34 | /** 35 | * Generate documentation prompt based on node type 36 | */ 37 | function buildDocumentationPrompt( 38 | nodeLabel: string, 39 | nodeType: string, 40 | code: string, 41 | language: string 42 | ): string { 43 | const typeDescriptions: Record = { 44 | class: 'a class definition', 45 | function: 'a function definition', 46 | method: 'a method definition', 47 | interface: 'an interface definition', 48 | type: 'a type definition', 49 | enum: 'an enum definition', 50 | }; 51 | 52 | const typeDesc = typeDescriptions[nodeType] || 'a code element'; 53 | 54 | return `Analyze the following ${typeDesc} in ${language} and generate comprehensive documentation. 55 | 56 | Code: 57 | \`\`\`${language} 58 | ${code} 59 | \`\`\` 60 | 61 | Please provide a JSON response with the following structure: 62 | { 63 | "summary": "A brief one-line summary of what this ${nodeType} does", 64 | "documentation": "Detailed documentation explaining the purpose, behavior, and usage", 65 | "parameters": [ 66 | { 67 | "name": "parameter name", 68 | "type": "parameter type", 69 | "description": "parameter description" 70 | } 71 | ], 72 | "returnType": "return type if applicable", 73 | "returnDescription": "description of what is returned", 74 | "examples": [ 75 | "Example usage code snippet 1", 76 | "Example usage code snippet 2" 77 | ], 78 | "notes": "Any important notes or caveats" 79 | } 80 | 81 | Focus on: 82 | 1. Clear, concise explanations 83 | 2. Practical examples 84 | 3. Important edge cases or limitations 85 | 4. Best practices for using this ${nodeType}`; 86 | } 87 | 88 | export async function POST(request: NextRequest) { 89 | try { 90 | const body = (await request.json()) as DocumentationRequest; 91 | const { nodeId, nodeLabel, nodeType, code, language, relatedNodes = [] } = body; 92 | 93 | if (!nodeId || !nodeLabel || !nodeType || !code || !language) { 94 | return NextResponse.json( 95 | { error: 'Missing required fields: nodeId, nodeLabel, nodeType, code, language' }, 96 | { status: 400 } 97 | ); 98 | } 99 | 100 | // Generate documentation ID 101 | const docId = generateDocumentationId(code, language); 102 | 103 | // Check if documentation already exists in cache 104 | const cached = await getDocumentationById(docId); 105 | if (cached) { 106 | return NextResponse.json({ 107 | success: true, 108 | cached: true, 109 | documentation: cached, 110 | timestamp: Date.now(), 111 | }); 112 | } 113 | 114 | // Generate documentation using LLM 115 | const prompt = buildDocumentationPrompt(nodeLabel, nodeType, code, language); 116 | 117 | const { text, usage } = await generateText({ 118 | model: deepseek('deepseek-chat'), 119 | messages: [ 120 | { 121 | role: 'user', 122 | content: prompt, 123 | }, 124 | ], 125 | temperature: 0.3, 126 | }); 127 | 128 | // Parse the response 129 | let parsedDoc; 130 | try { 131 | // Extract JSON from the response 132 | const jsonMatch = text.match(/\{[\s\S]*\}/); 133 | if (!jsonMatch) { 134 | throw new Error('No JSON found in response'); 135 | } 136 | parsedDoc = JSON.parse(jsonMatch[0]); 137 | } catch (parseError) { 138 | console.error('Failed to parse LLM response:', parseError); 139 | parsedDoc = { 140 | summary: nodeLabel, 141 | documentation: text, 142 | examples: [], 143 | }; 144 | } 145 | 146 | // Create documentation object 147 | const documentation = { 148 | id: docId, 149 | nodeId, 150 | nodeLabel, 151 | nodeType, 152 | code, 153 | language, 154 | documentation: parsedDoc.documentation || text, 155 | summary: parsedDoc.summary || nodeLabel, 156 | parameters: parsedDoc.parameters || [], 157 | returnType: parsedDoc.returnType, 158 | returnDescription: parsedDoc.returnDescription, 159 | examples: parsedDoc.examples || [], 160 | relatedNodes, 161 | createdAt: Date.now(), 162 | updatedAt: Date.now(), 163 | llmModel: 'deepseek-chat', 164 | tokensUsed: usage?.totalTokens || 0, 165 | }; 166 | 167 | // Save to RxDB cache 168 | await saveDocumentation(documentation); 169 | 170 | return NextResponse.json({ 171 | success: true, 172 | cached: false, 173 | documentation, 174 | usage, 175 | timestamp: Date.now(), 176 | }); 177 | } catch (error) { 178 | console.error('Documentation generation error:', error); 179 | return NextResponse.json( 180 | { 181 | error: error instanceof Error ? error.message : 'Unknown error', 182 | }, 183 | { status: 500 } 184 | ); 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /src/lib/__tests__/metrics.test.ts: -------------------------------------------------------------------------------- 1 | import { 2 | createMetricsCollector, 3 | recordFirstToken, 4 | finalizeMetrics, 5 | formatMetrics, 6 | calculateMetricsFromResponse, 7 | } from '../metrics'; 8 | 9 | describe('Metrics Module', () => { 10 | describe('createMetricsCollector', () => { 11 | it('should create a metrics collector with initial values', () => { 12 | const collector = createMetricsCollector('gpt-4', 'OpenAI'); 13 | 14 | expect(collector.model).toBe('gpt-4'); 15 | expect(collector.provider).toBe('OpenAI'); 16 | expect(collector.inputTokens).toBe(0); 17 | expect(collector.outputTokens).toBe(0); 18 | expect(collector.startTime).toBeLessThanOrEqual(Date.now()); 19 | expect(collector.firstTokenTime).toBeUndefined(); 20 | }); 21 | }); 22 | 23 | describe('recordFirstToken', () => { 24 | it('should record the first token time', () => { 25 | const collector = createMetricsCollector('gpt-4', 'OpenAI'); 26 | const beforeRecord = Date.now(); 27 | 28 | recordFirstToken(collector); 29 | 30 | const afterRecord = Date.now(); 31 | expect(collector.firstTokenTime).toBeDefined(); 32 | expect(collector.firstTokenTime!).toBeGreaterThanOrEqual(beforeRecord); 33 | expect(collector.firstTokenTime!).toBeLessThanOrEqual(afterRecord); 34 | }); 35 | 36 | it('should not overwrite first token time if already set', () => { 37 | const collector = createMetricsCollector('gpt-4', 'OpenAI'); 38 | 39 | recordFirstToken(collector); 40 | const firstTime = collector.firstTokenTime; 41 | 42 | // Wait a bit and record again 43 | recordFirstToken(collector); 44 | 45 | expect(collector.firstTokenTime).toBe(firstTime); 46 | }); 47 | }); 48 | 49 | describe('finalizeMetrics', () => { 50 | it('should calculate metrics correctly', () => { 51 | const collector = createMetricsCollector('gpt-4', 'OpenAI'); 52 | collector.inputTokens = 100; 53 | collector.outputTokens = 50; 54 | 55 | // Simulate some time passing 56 | const startTime = collector.startTime; 57 | collector.startTime = startTime - 1000; // 1 second ago 58 | 59 | const metrics = finalizeMetrics(collector); 60 | 61 | expect(metrics.inputTokens).toBe(100); 62 | expect(metrics.outputTokens).toBe(50); 63 | expect(metrics.totalTokens).toBe(150); 64 | expect(metrics.model).toBe('gpt-4'); 65 | expect(metrics.provider).toBe('OpenAI'); 66 | expect(metrics.totalLatency).toBeGreaterThanOrEqual(1000); 67 | expect(metrics.tokensPerSecond).toBeGreaterThan(0); 68 | expect(metrics.averageLatencyPerToken).toBeGreaterThan(0); 69 | }); 70 | 71 | it('should handle zero output tokens', () => { 72 | const collector = createMetricsCollector('gpt-4', 'OpenAI'); 73 | collector.inputTokens = 100; 74 | collector.outputTokens = 0; 75 | 76 | const metrics = finalizeMetrics(collector); 77 | 78 | expect(metrics.tokensPerSecond).toBe(0); 79 | expect(metrics.averageLatencyPerToken).toBe(0); 80 | }); 81 | 82 | it('should calculate first token latency correctly', () => { 83 | const collector = createMetricsCollector('gpt-4', 'OpenAI'); 84 | const startTime = Date.now() - 1000; // 1 second ago 85 | 86 | // Simulate first token arriving 200ms after start 87 | collector.startTime = startTime; 88 | collector.firstTokenTime = startTime + 200; 89 | 90 | const metrics = finalizeMetrics(collector); 91 | 92 | expect(metrics.firstTokenLatency).toBeGreaterThanOrEqual(200); 93 | expect(metrics.firstTokenLatency).toBeLessThanOrEqual(metrics.totalLatency); 94 | }); 95 | }); 96 | 97 | describe('formatMetrics', () => { 98 | it('should format metrics for display', () => { 99 | const metrics = { 100 | firstTokenLatency: 150, 101 | totalLatency: 1000, 102 | inputTokens: 100, 103 | outputTokens: 50, 104 | totalTokens: 150, 105 | tokensPerSecond: 50, 106 | averageLatencyPerToken: 20, 107 | timestamp: Date.now(), 108 | model: 'gpt-4', 109 | provider: 'OpenAI', 110 | }; 111 | 112 | const formatted = formatMetrics(metrics); 113 | 114 | expect(formatted['First Token Latency']).toBe('150ms'); 115 | expect(formatted['Total Latency']).toBe('1000ms'); 116 | expect(formatted['Input Tokens']).toBe('100'); 117 | expect(formatted['Output Tokens']).toBe('50'); 118 | expect(formatted['Total Tokens']).toBe('150'); 119 | expect(formatted['Throughput']).toBe('50.00 tokens/s'); 120 | expect(formatted['Avg Latency/Token']).toBe('20ms'); 121 | }); 122 | }); 123 | 124 | describe('calculateMetricsFromResponse', () => { 125 | it('should calculate metrics from response data', () => { 126 | const startTime = Date.now() - 1000; 127 | const endTime = Date.now(); 128 | const firstTokenTime = startTime + 200; 129 | 130 | const metrics = calculateMetricsFromResponse( 131 | startTime, 132 | endTime, 133 | firstTokenTime, 134 | 100, 135 | 50, 136 | 'gpt-4', 137 | 'OpenAI' 138 | ); 139 | 140 | expect(metrics.inputTokens).toBe(100); 141 | expect(metrics.outputTokens).toBe(50); 142 | expect(metrics.totalTokens).toBe(150); 143 | expect(metrics.firstTokenLatency).toBeGreaterThanOrEqual(200); 144 | expect(metrics.totalLatency).toBeGreaterThanOrEqual(1000); 145 | expect(metrics.model).toBe('gpt-4'); 146 | expect(metrics.provider).toBe('OpenAI'); 147 | }); 148 | 149 | it('should handle undefined first token time', () => { 150 | const startTime = Date.now() - 1000; 151 | const endTime = Date.now(); 152 | 153 | const metrics = calculateMetricsFromResponse( 154 | startTime, 155 | endTime, 156 | undefined, 157 | 100, 158 | 50, 159 | 'gpt-4', 160 | 'OpenAI' 161 | ); 162 | 163 | expect(metrics.firstTokenLatency).toBe(metrics.totalLatency); 164 | }); 165 | }); 166 | }); 167 | -------------------------------------------------------------------------------- /src/components/panels/InteractionPanel.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React, { useRef, useEffect } from 'react'; 4 | import type { UIMessage } from 'ai'; 5 | 6 | interface InteractionPanelProps { 7 | messages: UIMessage[]; 8 | input: string; 9 | isLoading: boolean; 10 | error: Error | undefined; 11 | onInputChange: (e: React.ChangeEvent) => void; 12 | onSubmit: ( 13 | e: React.KeyboardEvent | React.FormEvent 14 | ) => void; 15 | } 16 | 17 | export default function InteractionPanel({ 18 | messages, 19 | input, 20 | isLoading, 21 | error, 22 | onInputChange, 23 | onSubmit, 24 | }: InteractionPanelProps) { 25 | const messagesEndRef = useRef(null); 26 | 27 | const scrollToBottom = () => { 28 | messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }); 29 | }; 30 | 31 | useEffect(() => { 32 | scrollToBottom(); 33 | }, [messages]); 34 | 35 | return ( 36 |
37 |
38 |

Chat & Interaction

39 |
40 | 41 | {/* Messages Area */} 42 |
43 | {messages.length === 0 ? ( 44 |
45 | 51 | 57 | 58 |

59 | No messages yet. Start by sending a message below. 60 |

61 |
62 | ) : ( 63 | <> 64 | {messages.map((message) => { 65 | // Extract text content from UIMessage parts 66 | const textContent = 67 | message.parts 68 | ?.filter((part) => part.type === 'text') 69 | .map((part) => part.text) 70 | .join('') || ''; 71 | 72 | return ( 73 |
77 |
84 |
{textContent}
85 |
90 | {new Date().toLocaleTimeString()} 91 |
92 |
93 |
94 | ); 95 | })} 96 | {isLoading && ( 97 |
98 |
99 |
100 |
101 |
105 |
109 |
110 |
111 |
112 | )} 113 |
114 | 115 | )} 116 |
117 | 118 | {/* Error Display */} 119 | {error && ( 120 |
121 |

Error: {error.message}

122 |
123 | )} 124 | 125 | {/* Input Area */} 126 |
127 |
128 |
129 |