├── .dockerignore ├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature-or-enhancement-.md │ └── question.md ├── .gitignore ├── .husky └── commit-msg ├── .npmrc ├── .prettierrc.js ├── .windsurfrules ├── ARCHITECTURE.md ├── Dockerfile ├── README.md ├── README.zh-CN.md ├── app ├── api │ ├── check-update │ │ └── route.js │ ├── llm │ │ └── ollama │ │ │ └── models │ │ │ └── route.js │ ├── projects │ │ ├── [projectId] │ │ │ ├── chunks │ │ │ │ └── [chunkId] │ │ │ │ │ ├── questions │ │ │ │ │ └── route.js │ │ │ │ │ └── route.js │ │ │ ├── config │ │ │ │ └── route.js │ │ │ ├── datasets │ │ │ │ ├── optimize │ │ │ │ │ └── route.js │ │ │ │ └── route.js │ │ │ ├── files │ │ │ │ └── route.js │ │ │ ├── generate-questions │ │ │ │ └── route.js │ │ │ ├── llamaFactory │ │ │ │ ├── checkConfig │ │ │ │ │ └── route.js │ │ │ │ └── generate │ │ │ │ │ └── route.js │ │ │ ├── models │ │ │ │ ├── [modelId] │ │ │ │ │ └── route.js │ │ │ │ └── route.js │ │ │ ├── pdf │ │ │ │ └── route.js │ │ │ ├── playground │ │ │ │ └── chat │ │ │ │ │ ├── route.js │ │ │ │ │ └── stream │ │ │ │ │ └── route.js │ │ │ ├── preview │ │ │ │ └── [fileName] │ │ │ │ │ └── route.js │ │ │ ├── questions │ │ │ │ ├── [questionId] │ │ │ │ │ └── route.js │ │ │ │ ├── batch-delete │ │ │ │ │ └── route.js │ │ │ │ └── route.js │ │ │ ├── route.js │ │ │ ├── split │ │ │ │ └── route.js │ │ │ ├── tags │ │ │ │ └── route.js │ │ │ ├── tasks │ │ │ │ └── route.js │ │ │ └── text-split │ │ │ │ └── route.js │ │ └── route.js │ └── update │ │ └── route.js ├── dataset-square │ └── page.js ├── globals.css ├── layout.js ├── page.js └── projects │ └── [projectId] │ ├── datasets │ ├── [datasetId] │ │ └── page.js │ └── page.js │ ├── layout.js │ ├── page.js │ ├── playground │ └── page.js │ ├── questions │ ├── components │ │ └── QuestionEditDialog.js │ ├── hooks │ │ └── useQuestionEdit.js │ └── page.js │ ├── settings │ ├── components │ │ └── PromptSettings.js │ └── page.js │ └── text-split │ └── page.js ├── commitlint.config.mjs ├── components ├── ExportDatasetDialog.js ├── I18nProvider.js ├── LanguageSwitcher.js ├── ModelSelect.js ├── Navbar.js ├── ThemeRegistry.js ├── UpdateChecker.js ├── dataset-square │ ├── DatasetSearchBar.js │ ├── DatasetSiteCard.js │ └── DatasetSiteList.js ├── home │ ├── CreateProjectDialog.js │ ├── HeroSection.js │ ├── ParticleBackground.js │ ├── ProjectList.js │ └── StatsCard.js ├── playground │ ├── ChatArea.js │ ├── ChatMessage.js │ ├── MessageInput.js │ ├── ModelSelector.js │ └── PlaygroundHeader.js ├── questions │ ├── QuestionListView.js │ └── QuestionTreeView.js ├── settings │ ├── BasicSettings.js │ ├── ModelSettings.js │ └── TaskSettings.js └── text-split │ ├── ChunkCard.js │ ├── ChunkDeleteDialog.js │ ├── ChunkList.js │ ├── ChunkListHeader.js │ ├── ChunkViewDialog.js │ ├── DomainAnalysis.js │ ├── FileUploader.js │ ├── MarkdownViewDialog.js │ └── components │ ├── DeleteConfirmDialog.js │ ├── DirectoryView.js │ ├── DomainTreeView.js │ ├── FileList.js │ ├── PdfProcessingDialog.js │ ├── TabPanel.js │ └── UploadArea.js ├── constant ├── model.js ├── setting.js └── sites.json ├── electron-builder.yml ├── electron ├── entitlements.mac.plist ├── loading.html ├── main.js └── preload.js ├── hooks ├── useModelPlayground.js ├── useSnackbar.js └── useTaskSettings.js ├── jsconfig.json ├── lib ├── db │ ├── base.js │ ├── datasets.js │ ├── index.js │ ├── projects.js │ ├── questions.js │ ├── tags.js │ └── texts.js ├── i18n.js ├── llm │ ├── common │ │ └── util.js │ ├── core │ │ ├── index.js │ │ └── providers │ │ │ ├── base.js │ │ │ ├── ollama.js │ │ │ ├── openai.js │ │ │ ├── openrouter.js │ │ │ └── zhipu.js │ └── prompts │ │ ├── addLabel.js │ │ ├── addLabelEn.js │ │ ├── answer.js │ │ ├── answerEn.js │ │ ├── label.js │ │ ├── labelEn.js │ │ ├── newAnswer.js │ │ ├── newAnswerEn.js │ │ ├── optimalTitle.js │ │ ├── optimalTitleEn.js │ │ ├── optimizeCot.js │ │ ├── optimizeCotEn.js │ │ ├── pdfToMarkdown.js │ │ ├── pdfToMarkdownEn.js │ │ ├── question.js │ │ └── questionEn.js ├── models.js ├── pdf-processing │ ├── core │ │ └── index.js │ └── strategy │ │ ├── default.js │ │ ├── index.js │ │ ├── mineru.js │ │ └── vision.js ├── split-mardown │ ├── core │ │ ├── parser.js │ │ ├── splitter.js │ │ ├── summary.js │ │ └── toc.js │ ├── index.js │ ├── output │ │ ├── fileWriter.js │ │ └── formatter.js │ └── utils │ │ └── common.js ├── text-splitter.js └── util │ ├── async.js │ ├── logger.js │ └── request.js ├── locales ├── en │ └── translation.json └── zh-CN │ └── translation.json ├── next.config.js ├── package-lock.json ├── package.json ├── public └── imgs │ ├── 1.png │ ├── 10.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ ├── 6.png │ ├── 7.png │ ├── 8.png │ ├── 9.png │ ├── aw.jpg │ ├── aws.png │ ├── bg.png │ ├── bg2.png │ ├── cn-arc.png │ ├── default-dataset.png │ ├── en-arc.png │ ├── garden.jpg │ ├── github.png │ ├── google.png │ ├── huggingface.png │ ├── kaggle.png │ ├── linux.png │ ├── lluga.png │ ├── logo.icns │ ├── logo.ico │ ├── logo.png │ ├── logo.svg │ ├── mac.png │ ├── modelscope.png │ ├── opendatalab.png │ └── windows.png └── styles ├── globals.css ├── home.js └── playground.js /.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .next 3 | .git 4 | .github 5 | README.md 6 | README.zh-CN.md 7 | .gitignore 8 | .env.local 9 | .env.development.local 10 | .env.test.local 11 | .env.production.local 12 | /test 13 | /local-db 14 | /video -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[Bug]" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **问题描述** 11 | 清晰、简洁地描述该问题的具体情况。 12 | 13 | **复现步骤** 14 | 重现该问题的操作步骤: 15 | 1. 进入“……”页面。 16 | 2. 点击“……”。 17 | 3. 向下滚动到“……”。 18 | 4. 这时会看到错误提示。 19 | 20 | **预期结果** 21 | 清晰、简洁地描述你原本期望出现的情况。 22 | 23 | **截图** 24 | 如果有必要,请附上截图,以便更好地说明你的问题。 25 | 26 | **桌面设备(请完善以下信息)** 27 | - 操作系统:[例如:、Window、MAC] 28 | - 浏览器:[例如:谷歌浏览器(Chrome),苹果浏览器(Safari)] 29 | - Easy Dataset 版本:[例如:1.2.2] 30 | 31 | **其他相关信息** 32 | 在此处添加关于该问题的其他任何相关背景信息。 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-or-enhancement-.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 'Feature or enhancement ' 3 | about: Suggest an idea for this project 4 | title: "[Feature]" 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **你的功能请求是否与某个问题相关?请描述。** 11 | 清晰、简洁地描述一下存在的问题是什么。例如:当我[具体情况]时,我总是感到很沮丧。 12 | 13 | **描述你期望的解决方案** 14 | 清晰、简洁地描述你希望实现的情况。 15 | 16 | **描述你考虑过的替代方案** 17 | 清晰、简洁地描述你所考虑过的任何其他解决方案或功能。 18 | 19 | **其他相关信息** 20 | 在此处添加与该功能请求相关的其他任何背景信息或截图。 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: Ask questions you want to know 4 | title: "[Question]" 5 | labels: question 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | build 3 | .vscode 4 | website-local.json 5 | ai-local.json 6 | .next 7 | .DS_Store 8 | tsconfig.tsbuildinfo 9 | mock-login-callback.ts 10 | .env.local 11 | /src/test/crawler 12 | /src/test/mock 13 | /local-db 14 | /test 15 | /dist -------------------------------------------------------------------------------- /.husky/commit-msg: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | npx commitlint --edit "$1" 4 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | registry=https://registry.npmjs.org -------------------------------------------------------------------------------- /.prettierrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | semi: true, 3 | trailingComma: 'none', 4 | singleQuote: true, 5 | tabWidth: 2, 6 | useTabs: false, 7 | bracketSpacing: true, 8 | arrowParens: 'avoid', 9 | proseWrap: 'preserve', 10 | jsxBracketSameLine: true, 11 | printWidth: 120, 12 | endOfLine: 'auto' 13 | }; 14 | -------------------------------------------------------------------------------- /.windsurfrules: -------------------------------------------------------------------------------- 1 | # Easy DataSet 项目架构设计 2 | 3 | ## 项目概述 4 | 5 | Easy DataSet 是一个用于创建大模型微调数据集的应用程序。用户可以上传文本文件,系统会自动分割文本并生成问题,最终生成用于微调的数据集。 6 | 7 | ## 技术栈 8 | 9 | - **前端框架**: Next.js 14 (App Router) 10 | - **UI 框架**: Material-UI (MUI) 11 | - **数据存储**: fs 文件系统模拟数据库 12 | - **开发语言**: JavaScript 13 | - **依赖管理**: pnpm 14 | 15 | ## 目录结构 16 | 17 | ``` 18 | easy-dataset/ 19 | ├── app/ # Next.js 应用目录 20 | │ ├── api/ # API 路由 21 | │ │ └── projects/ # 项目相关 API 22 | │ ├── projects/ # 项目相关页面 23 | │ │ ├── [projectId]/ # 项目详情页面 24 | │ └── page.js # 主页 25 | ├── components/ # React 组件 26 | │ ├── home/ # 主页相关组件 27 | │ │ ├── HeroSection.js 28 | │ │ ├── ProjectList.js 29 | │ │ └── StatsCard.js 30 | │ ├── Navbar.js # 导航栏组件 31 | │ └── CreateProjectDialog.js 32 | ├── lib/ # 工具库 33 | │ └── db/ # 数据库模块 34 | │ ├── base.js # 基础工具函数 35 | │ ├── projects.js # 项目管理 36 | │ ├── texts.js # 文本处理 37 | │ ├── datasets.js # 数据集管理 38 | │ └── index.js # 模块导出 39 | ├── styles/ # 样式文件 40 | │ └── home.js # 主页样式 41 | └── local-db/ # 本地数据库目录 42 | ``` 43 | 44 | ## 核心模块设计 45 | 46 | ### 1. 数据库模块 (`lib/db/`) 47 | 48 | #### base.js 49 | - 提供基础的文件操作功能 50 | - 确保数据库目录存在 51 | - 读写 JSON 文件的工具函数 52 | 53 | #### projects.js 54 | - 项目的 CRUD 操作 55 | - 项目配置管理 56 | - 项目目录结构维护 57 | 58 | #### texts.js 59 | - 文献处理功能 60 | - 文本片段存储和检索 61 | - 文件上传处理 62 | 63 | #### datasets.js 64 | - 数据集生成和管理 65 | - 问题列表管理 66 | - 标签树管理 67 | 68 | ### 2. 前端组件 (`components/`) 69 | 70 | #### Navbar.js 71 | - 顶部导航栏 72 | - 项目切换 73 | - 模型选择 74 | - 主题切换 75 | 76 | #### home/ 目录组件 77 | - HeroSection.js: 主页顶部展示区 78 | - ProjectList.js: 项目列表展示 79 | - StatsCard.js: 数据统计展示 80 | - CreateProjectDialog.js: 创建项目的对话框 81 | 82 | ### 3. 页面路由 (`app/`) 83 | 84 | #### 主页 (`page.js`) 85 | - 项目列表展示 86 | - 创建项目入口 87 | - 数据统计展示 88 | 89 | #### 项目详情页 (`projects/[projectId]/`) 90 | - text-split/: 文献处理页面 91 | - questions/: 问题列表页面 92 | - datasets/: 数据集页面 93 | - settings/: 项目设置页面 94 | 95 | #### API 路由 (`api/`) 96 | - projects/: 项目管理 API 97 | - texts/: 文本处理 API 98 | - questions/: 问题生成 API 99 | - datasets/: 数据集管理 API 100 | 101 | ## 数据流设计 102 | 103 | ### 项目创建流程 104 | 1. 用户通过主页或导航栏创建新项目 105 | 2. 填写项目基本信息(名称、描述) 106 | 3. 系统创建项目目录和初始配置文件 107 | 4. 重定向到项目详情页 108 | 109 | ### 文献处理流程 110 | 1. 用户上传 Markdown 文件 111 | 2. 系统保存原始文件到项目目录 112 | 3. 调用文本分割服务,生成片段和目录结构 113 | 4. 展示分割结果和提取的目录 114 | 115 | ### 问题生成流程 116 | 1. 用户选择需要生成问题的文本片段 117 | 2. 系统调用大模型API生成问题 118 | 3. 保存问题到问题列表和标签树 119 | 120 | ### 数据集生成流程 121 | 1. 用户选择需要生成答案的问题 122 | 2. 系统调用大模型API生成答案 123 | 3. 保存数据集结果 124 | 4. 提供导出功能 125 | -------------------------------------------------------------------------------- /ARCHITECTURE.md: -------------------------------------------------------------------------------- 1 | # Easy DataSet 项目架构设计 2 | 3 | ## 项目概述 4 | 5 | Easy DataSet 是一个用于创建大模型微调数据集的应用程序。用户可以上传文本文件,系统会自动分割文本并生成问题,最终生成用于微调的数据集。 6 | 7 | ## 技术栈 8 | 9 | - **前端框架**: Next.js 14 (App Router) 10 | - **UI 框架**: Material-UI (MUI) 11 | - **数据存储**: fs 文件系统模拟数据库 12 | - **开发语言**: JavaScript 13 | 14 | ## 目录结构 15 | 16 | ``` 17 | easy-dataset/ 18 | ├── app/ # Next.js 应用目录 19 | │ ├── api/ # API 路由 20 | │ │ └── projects/ # 项目相关 API 21 | │ ├── projects/ # 项目相关页面 22 | │ │ ├── [projectId]/ # 项目详情页面 23 | │ └── page.js # 主页 24 | ├── components/ # React 组件 25 | │ ├── home/ # 主页相关组件 26 | │ │ ├── HeroSection.js 27 | │ │ ├── ProjectList.js 28 | │ │ └── StatsCard.js 29 | │ ├── Navbar.js # 导航栏组件 30 | │ └── CreateProjectDialog.js 31 | ├── lib/ # 工具库 32 | │ └── db/ # 数据库模块 33 | │ ├── base.js # 基础工具函数 34 | │ ├── projects.js # 项目管理 35 | │ ├── texts.js # 文本处理 36 | │ ├── datasets.js # 数据集管理 37 | │ └── index.js # 模块导出 38 | ├── styles/ # 样式文件 39 | │ └── home.js # 主页样式 40 | └── local-db/ # 本地数据库目录 41 | ``` 42 | 43 | ## 核心模块设计 44 | 45 | ### 1. 数据库模块 (`lib/db/`) 46 | 47 | #### base.js 48 | - 提供基础的文件操作功能 49 | - 确保数据库目录存在 50 | - 读写 JSON 文件的工具函数 51 | 52 | #### projects.js 53 | - 项目的 CRUD 操作 54 | - 项目配置管理 55 | - 项目目录结构维护 56 | 57 | #### texts.js 58 | - 文献处理功能 59 | - 文本片段存储和检索 60 | - 文件上传处理 61 | 62 | #### datasets.js 63 | - 数据集生成和管理 64 | - 问题列表管理 65 | - 标签树管理 66 | 67 | ### 2. 前端组件 (`components/`) 68 | 69 | #### Navbar.js 70 | - 顶部导航栏 71 | - 项目切换 72 | - 模型选择 73 | - 主题切换 74 | 75 | #### home/ 目录组件 76 | - HeroSection.js: 主页顶部展示区 77 | - ProjectList.js: 项目列表展示 78 | - StatsCard.js: 数据统计展示 79 | - CreateProjectDialog.js: 创建项目的对话框 80 | 81 | ### 3. 页面路由 (`app/`) 82 | 83 | #### 主页 (`page.js`) 84 | - 项目列表展示 85 | - 创建项目入口 86 | - 数据统计展示 87 | 88 | #### 项目详情页 (`projects/[projectId]/`) 89 | - text-split/: 文献处理页面 90 | - questions/: 问题列表页面 91 | - datasets/: 数据集页面 92 | - settings/: 项目设置页面 93 | 94 | #### API 路由 (`api/`) 95 | - projects/: 项目管理 API 96 | - texts/: 文本处理 API 97 | - questions/: 问题生成 API 98 | - datasets/: 数据集管理 API 99 | 100 | ## 数据流设计 101 | 102 | ### 项目创建流程 103 | 1. 用户通过主页或导航栏创建新项目 104 | 2. 填写项目基本信息(名称、描述) 105 | 3. 系统创建项目目录和初始配置文件 106 | 4. 重定向到项目详情页 107 | 108 | ### 文献处理流程 109 | 1. 用户上传 Markdown 文件 110 | 2. 系统保存原始文件到项目目录 111 | 3. 调用文本分割服务,生成片段和目录结构 112 | 4. 展示分割结果和提取的目录 113 | 114 | ### 问题生成流程 115 | 1. 用户选择需要生成问题的文本片段 116 | 2. 系统调用大模型API生成问题 117 | 3. 保存问题到问题列表和标签树 118 | 119 | ### 数据集生成流程 120 | 1. 用户选择需要生成答案的问题 121 | 2. 系统调用大模型API生成答案 122 | 3. 保存数据集结果 123 | 4. 提供导出功能 124 | 125 | ## 模型配置 126 | 127 | 支持多种大模型提供商配置: 128 | - Ollama 129 | - OpenAI 130 | - 硅基流动 131 | - 深度求索 132 | - 智谱AI 133 | 134 | 每个提供商支持配置: 135 | - API 地址 136 | - API 密钥 137 | - 模型名称 138 | 139 | ## 未来扩展方向 140 | 141 | 1. 支持更多文件格式(PDF、DOC等) 142 | 2. 增加数据集质量评估功能 143 | 3. 添加数据集版本管理 144 | 4. 实现团队协作功能 145 | 5. 增加更多数据集导出格式 146 | 147 | ## 国际化处理 148 | 149 | ### 技术选型 150 | 151 | - **国际化库**: i18next + react-i18next 152 | - **语言检测**: i18next-browser-languagedetector 153 | - **支持语言**: 英文(en)、简体中文(zh-CN) 154 | 155 | ### 目录结构 156 | 157 | ``` 158 | easy-dataset/ 159 | ├── locales/ # 国际化资源目录 160 | │ ├── en/ # 英文翻译 161 | │ │ └── translation.json 162 | │ └── zh-CN/ # 中文翻译 163 | │ └── translation.json 164 | ├── lib/ 165 | │ └── i18n.js # i18next 配置 166 | ``` 167 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 使用Node.js 18作为基础镜像 2 | FROM docker.1ms.run/library/node:18 3 | 4 | # 设置工作目录 5 | WORKDIR /app 6 | 7 | RUN apt-get update && apt-get install -y \ 8 | build-essential \ 9 | libcairo2-dev \ 10 | libpango1.0-dev \ 11 | libjpeg-dev \ 12 | libgif-dev \ 13 | librsvg2-dev \ 14 | && rm -rf /var/lib/apt/lists/* 15 | 16 | # 复制package.json和package-lock.json 17 | COPY package.json package-lock.json* ./ 18 | 19 | # 安装依赖 20 | RUN npm install 21 | 22 | # 复制所有文件 23 | COPY . . 24 | 25 | # 构建应用 26 | RUN npm run build 27 | 28 | # 暴露端口 29 | EXPOSE 1717 30 | 31 | # 启动应用 32 | CMD ["npm", "start"] -------------------------------------------------------------------------------- /app/api/check-update/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import path from 'path'; 3 | import fs from 'fs'; 4 | 5 | // 获取当前版本 6 | function getCurrentVersion() { 7 | try { 8 | const packageJsonPath = path.join(process.cwd(), 'package.json'); 9 | const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8')); 10 | return packageJson.version; 11 | } catch (error) { 12 | console.error('读取版本信息失败:', error); 13 | return '1.0.0'; 14 | } 15 | } 16 | 17 | // 从 GitHub 获取最新版本 18 | async function getLatestVersion() { 19 | try { 20 | const owner = 'ConardLi'; 21 | const repo = 'easy-dataset'; 22 | const response = await fetch(`https://api.github.com/repos/${owner}/${repo}/releases/latest`); 23 | 24 | if (!response.ok) { 25 | throw new Error(`GitHub API 请求失败: ${response.status}`); 26 | } 27 | 28 | const data = await response.json(); 29 | return data.tag_name.replace('v', ''); 30 | } catch (error) { 31 | console.error('获取最新版本失败:', error); 32 | return null; 33 | } 34 | } 35 | 36 | // 检查是否有更新 37 | export async function GET() { 38 | try { 39 | const currentVersion = getCurrentVersion(); 40 | const latestVersion = await getLatestVersion(); 41 | 42 | if (!latestVersion) { 43 | return NextResponse.json({ 44 | hasUpdate: false, 45 | currentVersion, 46 | latestVersion: null, 47 | error: '获取最新版本失败' 48 | }); 49 | } 50 | 51 | // 简单的版本比较 52 | const hasUpdate = compareVersions(latestVersion, currentVersion) > 0; 53 | 54 | return NextResponse.json({ 55 | hasUpdate, 56 | currentVersion, 57 | latestVersion, 58 | releaseUrl: hasUpdate ? `https://github.com/ConardLi/easy-dataset/releases/tag/v${latestVersion}` : null 59 | }); 60 | } catch (error) { 61 | console.error('检查更新失败:', error); 62 | } 63 | } 64 | 65 | // 简单的版本比较函数 66 | function compareVersions(a, b) { 67 | const partsA = a.split('.').map(Number); 68 | const partsB = b.split('.').map(Number); 69 | 70 | for (let i = 0; i < Math.max(partsA.length, partsB.length); i++) { 71 | const numA = i < partsA.length ? partsA[i] : 0; 72 | const numB = i < partsB.length ? partsB[i] : 0; 73 | 74 | if (numA > numB) return 1; 75 | if (numA < numB) return -1; 76 | } 77 | 78 | return 0; 79 | } 80 | -------------------------------------------------------------------------------- /app/api/llm/ollama/models/route.js: -------------------------------------------------------------------------------- 1 | import {NextResponse} from 'next/server'; 2 | 3 | const OllamaClient = require('@/lib/llm/core/providers/ollama'); 4 | 5 | // 设置为强制动态路由,防止静态生成 6 | export const dynamic = 'force-dynamic'; 7 | 8 | export async function GET(request) { 9 | try { 10 | // 从查询参数中获取 host 和 port 11 | const {searchParams} = new URL(request.url); 12 | const host = searchParams.get('host') || '127.0.0.1'; 13 | const port = searchParams.get('port') || '11434'; 14 | 15 | // 创建 Ollama API 实例 16 | const ollama = new OllamaClient({ 17 | endpoint: `http://${host}:${port}/api` 18 | }); 19 | // 获取模型列表 20 | const models = await ollama.getModels(); 21 | return NextResponse.json(models); 22 | } catch (error) { 23 | // console.error('fetch Ollama models error:', error); 24 | return NextResponse.json({error: 'fetch Models failed'}, {status: 500}); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/chunks/[chunkId]/questions/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { getTextChunk } from '@/lib/db/texts'; 3 | import LLMClient from '@/lib/llm/core/index'; 4 | import getQuestionPrompt from '@/lib/llm/prompts/question'; 5 | import getQuestionEnPrompt from '@/lib/llm/prompts/questionEn'; 6 | import getAddLabelPrompt from '@/lib/llm/prompts/addLabel'; 7 | import getAddLabelEnPrompt from '@/lib/llm/prompts/addLabelEn'; 8 | import { addQuestionsForChunk, getQuestionsForChunk } from '@/lib/db/questions'; 9 | import { extractJsonFromLLMOutput } from '@/lib/llm/common/util'; 10 | import { getTaskConfig, getProject } from '@/lib/db/projects'; 11 | import { getTags } from '@/lib/db/tags'; 12 | import logger from '@/lib/util/logger'; 13 | 14 | // 为指定文本块生成问题 15 | export async function POST(request, { params }) { 16 | try { 17 | const { projectId, chunkId: c } = params; 18 | 19 | // 验证项目ID和文本块ID 20 | if (!projectId || !c) { 21 | return NextResponse.json({ error: 'Project ID or text block ID cannot be empty' }, { status: 400 }); 22 | } 23 | 24 | const chunkId = decodeURIComponent(c); 25 | 26 | // 获取请求体 27 | const { model, language = '中文', number } = await request.json(); 28 | 29 | if (!model) { 30 | return NextResponse.json({ error: 'Model cannot be empty' }, { status: 400 }); 31 | } 32 | 33 | // 获取文本块内容 34 | const chunk = await getTextChunk(projectId, chunkId); 35 | if (!chunk) { 36 | return NextResponse.json({ error: 'Text block does not exist' }, { status: 404 }); 37 | } 38 | 39 | // 获取项目 task-config 信息 40 | const taskConfig = await getTaskConfig(projectId); 41 | const config = await getProject(projectId); 42 | const { questionGenerationLength } = taskConfig; 43 | const { globalPrompt, questionPrompt } = config; 44 | 45 | // 创建LLM客户端 46 | const llmClient = new LLMClient({ 47 | provider: model.provider, 48 | endpoint: model.endpoint, 49 | apiKey: model.apiKey, 50 | model: model.name, 51 | temperature: model.temperature, 52 | maxTokens: model.maxTokens 53 | }); 54 | 55 | // 生成问题的数量,如果未指定,则根据文本长度自动计算 56 | const questionNumber = number || Math.floor(chunk.content.length / questionGenerationLength); 57 | 58 | // 根据语言选择相应的提示词函数 59 | const promptFunc = language === 'en' ? getQuestionEnPrompt : getQuestionPrompt; 60 | // 生成问题 61 | const prompt = promptFunc({ text: chunk.content, number: questionNumber, language, globalPrompt, questionPrompt }); 62 | 63 | const response = await llmClient.getResponse(prompt); 64 | 65 | // 从LLM输出中提取JSON格式的问题列表 66 | const questions = extractJsonFromLLMOutput(response); 67 | 68 | console.log(projectId, chunkId, 'Questions:', questions); 69 | 70 | if (!questions || !Array.isArray(questions)) { 71 | return NextResponse.json({ error: 'Failed to generate questions' }, { status: 500 }); 72 | } 73 | 74 | // 打标签 75 | const tags = await getTags(projectId); 76 | // 根据语言选择相应的标签提示词函数 77 | const labelPromptFunc = language === 'en' ? getAddLabelEnPrompt : getAddLabelPrompt; 78 | const labelPrompt = labelPromptFunc(JSON.stringify(tags), JSON.stringify(questions)); 79 | const labelResponse = await llmClient.getResponse(labelPrompt); 80 | // 从LLM输出中提取JSON格式的问题列表 81 | const labelQuestions = extractJsonFromLLMOutput(labelResponse); 82 | console.log(projectId, chunkId, 'Label Questions:', labelQuestions); 83 | 84 | // 保存问题到数据库 85 | await addQuestionsForChunk(projectId, chunkId, labelQuestions); 86 | 87 | // 返回生成的问题 88 | return NextResponse.json({ 89 | chunkId, 90 | labelQuestions, 91 | total: labelQuestions.length 92 | }); 93 | } catch (error) { 94 | logger.error('Error generating questions:', error); 95 | return NextResponse.json({ error: error.message || 'Error generating questions' }, { status: 500 }); 96 | } 97 | } 98 | 99 | // 获取指定文本块的问题 100 | export async function GET(request, { params }) { 101 | try { 102 | const { projectId, chunkId } = params; 103 | 104 | // 验证项目ID和文本块ID 105 | if (!projectId || !chunkId) { 106 | return NextResponse.json({ error: 'The item ID or text block ID cannot be empty' }, { status: 400 }); 107 | } 108 | 109 | // 获取文本块的问题 110 | const questions = await getQuestionsForChunk(projectId, chunkId); 111 | 112 | // 返回问题列表 113 | return NextResponse.json({ 114 | chunkId, 115 | questions, 116 | total: questions.length 117 | }); 118 | } catch (error) { 119 | console.error('Error getting questions:', error); 120 | return NextResponse.json({ error: error.message || 'Error getting questions' }, { status: 500 }); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/chunks/[chunkId]/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { getChunkContent } from '@/lib/text-splitter'; 3 | import fs from 'fs/promises'; 4 | import path from 'path'; 5 | import { getProjectRoot } from '@/lib/db/base'; 6 | 7 | // 获取文本块内容 8 | export async function GET(request, { params }) { 9 | try { 10 | const { projectId, chunkId: c } = params; 11 | 12 | const chunkId = decodeURIComponent(c); 13 | 14 | // 验证参数 15 | if (!projectId) { 16 | return NextResponse.json({ error: 'Project ID cannot be empty' }, { status: 400 }); 17 | } 18 | 19 | if (!chunkId) { 20 | return NextResponse.json({ error: 'Text block ID cannot be empty' }, { status: 400 }); 21 | } 22 | 23 | // 获取文本块内容 24 | const chunk = await getChunkContent(projectId, chunkId); 25 | 26 | return NextResponse.json(chunk); 27 | } catch (error) { 28 | console.error('Failed to get text block content:', error); 29 | return NextResponse.json({ error: error.message || 'Failed to get text block content' }, { status: 500 }); 30 | } 31 | } 32 | 33 | // 删除文本块 34 | export async function DELETE(request, { params }) { 35 | try { 36 | const { projectId, chunkId: c } = params; 37 | 38 | const chunkId = decodeURIComponent(c); 39 | 40 | // 验证参数 41 | if (!projectId) { 42 | return NextResponse.json({ error: 'Project ID cannot be empty' }, { status: 400 }); 43 | } 44 | 45 | if (!chunkId) { 46 | return NextResponse.json({ error: 'Text block ID cannot be empty' }, { status: 400 }); 47 | } 48 | 49 | // 获取文本块路径 50 | const projectRoot = await getProjectRoot(); 51 | const chunkPath = path.join(projectRoot, projectId, 'chunks', `${chunkId}.txt`); 52 | 53 | // 检查文件是否存在 54 | try { 55 | await fs.access(chunkPath); 56 | } catch (error) { 57 | return NextResponse.json({ error: 'Text block does not exist' }, { status: 404 }); 58 | } 59 | 60 | // 删除文件 61 | await fs.unlink(chunkPath); 62 | 63 | return NextResponse.json({ message: 'Text block deleted successfully' }); 64 | } catch (error) { 65 | console.error('Failed to delete text block:', error); 66 | return NextResponse.json({ error: error.message || 'Failed to delete text block' }, { status: 500 }); 67 | } 68 | } 69 | 70 | // 编辑文本块内容 71 | export async function PATCH(request, { params }) { 72 | try { 73 | const { projectId, chunkId: c } = params; 74 | const chunkId = decodeURIComponent(c); 75 | 76 | // 验证参数 77 | if (!projectId) { 78 | return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 }); 79 | } 80 | 81 | if (!chunkId) { 82 | return NextResponse.json({ error: '文本块ID不能为空' }, { status: 400 }); 83 | } 84 | 85 | // 解析请求体获取新内容 86 | const requestData = await request.json(); 87 | const { content } = requestData; 88 | 89 | if (!content) { 90 | return NextResponse.json({ error: '内容不能为空' }, { status: 400 }); 91 | } 92 | 93 | // 获取文本块路径 94 | const projectRoot = await getProjectRoot(); 95 | const chunkPath = path.join(projectRoot, projectId, 'chunks', `${chunkId}.txt`); 96 | 97 | // 检查文件是否存在 98 | try { 99 | await fs.access(chunkPath); 100 | } catch (error) { 101 | return NextResponse.json({ error: '文本块不存在' }, { status: 404 }); 102 | } 103 | 104 | // 更新文件内容 105 | await fs.writeFile(chunkPath, content, 'utf-8'); 106 | 107 | // 获取更新后的文本块内容 108 | const updatedChunk = await getChunkContent(projectId, chunkId); 109 | 110 | return NextResponse.json(updatedChunk); 111 | } catch (error) { 112 | console.error('编辑文本块失败:', error); 113 | return NextResponse.json({ error: error.message || '编辑文本块失败' }, { status: 500 }); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/config/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { getProject, updateProject } from '@/lib/db/projects'; 3 | 4 | // 获取项目配置 5 | export async function GET(request, { params }) { 6 | try { 7 | const projectId = params.projectId; 8 | const config = await getProject(projectId); 9 | return NextResponse.json(config); 10 | } catch (error) { 11 | console.error('获取项目配置失败:', error); 12 | return NextResponse.json({ error: error.message }, { status: 500 }); 13 | } 14 | } 15 | 16 | // 更新项目配置 17 | export async function PUT(request, { params }) { 18 | try { 19 | const projectId = params.projectId; 20 | const newConfig = await request.json(); 21 | const currentConfig = await getProject(projectId); 22 | 23 | // 只更新 prompts 部分 24 | const updatedConfig = { 25 | ...currentConfig, 26 | ...newConfig.prompts 27 | }; 28 | 29 | const config = await updateProject(projectId, updatedConfig); 30 | return NextResponse.json(config); 31 | } catch (error) { 32 | console.error('更新项目配置失败:', error); 33 | return NextResponse.json({ error: error.message }, { status: 500 }); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/datasets/optimize/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { getDataset, updateDataset } from '@/lib/db/datasets'; 3 | import LLMClient from '@/lib/llm/core/index'; 4 | import getNewAnswerPrompt from '@/lib/llm/prompts/newAnswer'; 5 | import getNewAnswerEnPrompt from '@/lib/llm/prompts/newAnswerEn'; 6 | 7 | import { extractJsonFromLLMOutput } from '@/lib/llm/common/util'; 8 | 9 | // 优化数据集答案 10 | export async function POST(request, { params }) { 11 | try { 12 | const { projectId } = params; 13 | 14 | // 验证项目ID 15 | if (!projectId) { 16 | return NextResponse.json({ error: 'Project ID cannot be empty' }, { status: 400 }); 17 | } 18 | 19 | // 获取请求体 20 | const { datasetId, model, advice, language } = await request.json(); 21 | 22 | if (!datasetId) { 23 | return NextResponse.json({ error: 'Dataset ID cannot be empty' }, { status: 400 }); 24 | } 25 | 26 | if (!model) { 27 | return NextResponse.json({ error: 'Model cannot be empty' }, { status: 400 }); 28 | } 29 | 30 | if (!advice) { 31 | return NextResponse.json({ error: 'Please provide optimization suggestions' }, { status: 400 }); 32 | } 33 | 34 | // 获取数据集内容 35 | const dataset = await getDataset(projectId, datasetId); 36 | if (!dataset) { 37 | return NextResponse.json({ error: 'Dataset does not exist' }, { status: 404 }); 38 | } 39 | 40 | // 创建LLM客户端 41 | const llmClient = new LLMClient({ 42 | provider: model.provider, 43 | endpoint: model.endpoint, 44 | apiKey: model.apiKey, 45 | model: model.name, 46 | temperature: model.temperature, 47 | maxTokens: model.maxTokens 48 | }); 49 | 50 | // 生成优化后的答案和思维链 51 | const prompt = 52 | language === 'en' 53 | ? getNewAnswerEnPrompt(dataset.question, dataset.answer || '', dataset.cot || '', advice) 54 | : getNewAnswerPrompt(dataset.question, dataset.answer || '', dataset.cot || '', advice); 55 | 56 | const response = await llmClient.getResponse(prompt); 57 | 58 | // 从LLM输出中提取JSON格式的优化结果 59 | const optimizedResult = extractJsonFromLLMOutput(response); 60 | 61 | if (!optimizedResult || !optimizedResult.answer) { 62 | return NextResponse.json({ error: 'Failed to optimize answer, please try again' }, { status: 500 }); 63 | } 64 | 65 | // 更新数据集 66 | const updatedDataset = { 67 | ...dataset, 68 | answer: optimizedResult.answer, 69 | cot: optimizedResult.cot || dataset.cot 70 | }; 71 | 72 | await updateDataset(projectId, datasetId, updatedDataset); 73 | 74 | // 返回优化后的数据集 75 | return NextResponse.json({ 76 | success: true, 77 | dataset: updatedDataset 78 | }); 79 | } catch (error) { 80 | console.error('Failed to optimize answer:', error); 81 | return NextResponse.json({ error: error.message || 'Failed to optimize answer' }, { status: 500 }); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/generate-questions/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { getProjectChunks } from '@/lib/text-splitter'; 3 | import { getTextChunk } from '@/lib/db/texts'; 4 | import LLMClient from '@/lib/llm/core/index'; 5 | import getQuestionPrompt from '@/lib/llm/prompts/question'; 6 | import getQuestionEnPrompt from '@/lib/llm/prompts/questionEn'; 7 | import { addQuestionsForChunk } from '@/lib/db/questions'; 8 | import { getTaskConfig } from '@/lib/db/projects'; 9 | 10 | const { extractJsonFromLLMOutput } = require('@/lib/llm/common/util'); 11 | 12 | // 批量生成问题 13 | export async function POST(request, { params }) { 14 | try { 15 | const { projectId } = params; 16 | 17 | // 验证项目ID 18 | if (!projectId) { 19 | return NextResponse.json({ error: 'The project ID cannot be empty' }, { status: 400 }); 20 | } 21 | 22 | // 获取请求体 23 | const { model, chunkIds, language = '中文' } = await request.json(); 24 | 25 | if (!model) { 26 | return NextResponse.json({ error: 'The model cannot be empty' }, { status: 400 }); 27 | } 28 | 29 | // 如果没有指定文本块ID,则获取所有文本块 30 | let chunks = []; 31 | if (!chunkIds || chunkIds.length === 0) { 32 | const result = await getProjectChunks(projectId); 33 | chunks = result.chunks || []; 34 | } else { 35 | // 获取指定的文本块 36 | chunks = await Promise.all( 37 | chunkIds.map(async chunkId => { 38 | const chunk = await getTextChunk(projectId, chunkId); 39 | if (chunk) { 40 | return { 41 | id: chunk.id, 42 | content: chunk.content, 43 | length: chunk.content.length 44 | }; 45 | } 46 | return null; 47 | }) 48 | ); 49 | chunks = chunks.filter(Boolean); // 过滤掉不存在的文本块 50 | } 51 | 52 | if (chunks.length === 0) { 53 | return NextResponse.json({ error: 'No valid text blocks found' }, { status: 404 }); 54 | } 55 | 56 | const llmClient = new LLMClient({ 57 | provider: model.provider, 58 | endpoint: model.endpoint, 59 | apiKey: model.apiKey, 60 | model: model.name, 61 | temperature: model.temperature, 62 | maxTokens: model.maxTokens 63 | }); 64 | 65 | const results = []; 66 | const errors = []; 67 | 68 | // 获取项目 task-config 信息 69 | const taskConfig = await getTaskConfig(projectId); 70 | const { questionGenerationLength } = taskConfig; 71 | 72 | for (const chunk of chunks) { 73 | try { 74 | // 根据文本长度自动计算问题数量 75 | const questionNumber = Math.floor(chunk.length / questionGenerationLength); 76 | 77 | // 根据语言选择相应的提示词函数 78 | const promptFunc = language === 'en' ? getQuestionEnPrompt : getQuestionPrompt; 79 | // 生成问题 80 | const prompt = promptFunc(chunk.content, questionNumber, language); 81 | const response = await llmClient.getResponse(prompt); 82 | 83 | // 从LLM输出中提取JSON格式的问题列表 84 | const questions = extractJsonFromLLMOutput(response); 85 | 86 | if (questions && Array.isArray(questions)) { 87 | // 保存问题到数据库 88 | await addQuestionsForChunk(projectId, chunk.id, questions); 89 | 90 | results.push({ 91 | chunkId: chunk.id, 92 | success: true, 93 | questions, 94 | total: questions.length 95 | }); 96 | } else { 97 | errors.push({ 98 | chunkId: chunk.id, 99 | error: 'Failed to parse questions' 100 | }); 101 | } 102 | } catch (error) { 103 | console.error(`Failed to generate questions for text block ${chunk.id}:`, error); 104 | errors.push({ 105 | chunkId: chunk.id, 106 | error: error.message || 'Failed to generate questions' 107 | }); 108 | } 109 | } 110 | 111 | // 返回生成结果 112 | return NextResponse.json({ 113 | results, 114 | errors, 115 | totalSuccess: results.length, 116 | totalErrors: errors.length, 117 | totalChunks: chunks.length 118 | }); 119 | } catch (error) { 120 | console.error('Failed to generate questions:', error); 121 | return NextResponse.json({ error: error.message || 'Failed to generate questions' }, { status: 500 }); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/llamaFactory/checkConfig/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import path from 'path'; 3 | import fs from 'fs'; 4 | import { getProjectRoot } from '@/lib/db/base'; 5 | 6 | export async function GET(request, { params }) { 7 | try { 8 | const { projectId } = params; 9 | if (!projectId) { 10 | return NextResponse.json({ error: 'The project ID cannot be empty' }, { status: 400 }); 11 | } 12 | 13 | const projectRoot = await getProjectRoot(); 14 | const projectPath = path.join(projectRoot, projectId); 15 | const configPath = path.join(projectPath, 'dataset_info.json'); 16 | 17 | const exists = fs.existsSync(configPath); 18 | 19 | return NextResponse.json({ 20 | exists, 21 | configPath: exists ? configPath : null 22 | }); 23 | } catch (error) { 24 | console.error('Error checking Llama Factory config:', error); 25 | return NextResponse.json({ error: error.message }, { status: 500 }); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/llamaFactory/generate/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import path from 'path'; 3 | import fs from 'fs'; 4 | import { getProjectRoot } from '@/lib/db/base'; 5 | import { getDatasets } from '@/lib/db/datasets'; 6 | 7 | export async function POST(request, { params }) { 8 | try { 9 | const { projectId } = params; 10 | const { formatType, systemPrompt, confirmedOnly, includeCOT } = await request.json(); 11 | 12 | if (!projectId) { 13 | return NextResponse.json({ error: 'The project ID cannot be empty' }, { status: 400 }); 14 | } 15 | 16 | // 获取项目根目录 17 | const projectRoot = await getProjectRoot(); 18 | const projectPath = path.join(projectRoot, projectId); 19 | const configPath = path.join(projectPath, 'dataset_info.json'); 20 | const alpacaPath = path.join(projectPath, 'alpaca.json'); 21 | const sharegptPath = path.join(projectPath, 'sharegpt.json'); 22 | 23 | // 获取数据集 24 | let datasets = await getDatasets(projectId); 25 | 26 | // 如果只导出已确认的数据集 27 | if (confirmedOnly) { 28 | datasets = datasets.filter(dataset => dataset.confirmed); 29 | } 30 | 31 | // 创建 dataset_info.json 配置 32 | const config = { 33 | [`[Easy Dataset] [${projectId}] Alpaca`]: { 34 | file_name: 'alpaca.json', 35 | columns: { 36 | prompt: 'instruction', 37 | query: 'input', 38 | response: 'output', 39 | system: 'system' 40 | } 41 | }, 42 | [`[Easy Dataset] [${projectId}] ShareGPT`]: { 43 | file_name: 'sharegpt.json', 44 | formatting: 'sharegpt', 45 | columns: { 46 | messages: 'messages' 47 | }, 48 | tags: { 49 | role_tag: 'role', 50 | content_tag: 'content', 51 | user_tag: 'user', 52 | assistant_tag: 'assistant', 53 | system_tag: 'system' 54 | } 55 | } 56 | }; 57 | 58 | // 生成数据文件 59 | const alpacaData = datasets.map(({ question, answer, cot }) => ({ 60 | instruction: question, 61 | input: '', 62 | output: cot && includeCOT ? `${cot}\n${answer}` : answer, 63 | system: systemPrompt || '' 64 | })); 65 | 66 | const sharegptData = datasets.map(({ question, answer, cot }) => { 67 | const messages = []; 68 | if (systemPrompt) { 69 | messages.push({ 70 | role: 'system', 71 | content: systemPrompt 72 | }); 73 | } 74 | messages.push({ 75 | role: 'user', 76 | content: question 77 | }); 78 | messages.push({ 79 | role: 'assistant', 80 | content: cot && includeCOT ? `${cot}\n${answer}` : answer 81 | }); 82 | return { messages }; 83 | }); 84 | 85 | // 写入文件 86 | await fs.promises.writeFile(configPath, JSON.stringify(config, null, 2)); 87 | await fs.promises.writeFile(alpacaPath, JSON.stringify(alpacaData, null, 2)); 88 | await fs.promises.writeFile(sharegptPath, JSON.stringify(sharegptData, null, 2)); 89 | 90 | return NextResponse.json({ 91 | success: true, 92 | configPath, 93 | files: [ 94 | { path: alpacaPath, format: 'alpaca' }, 95 | { path: sharegptPath, format: 'sharegpt' } 96 | ] 97 | }); 98 | } catch (error) { 99 | console.error('Error generating Llama Factory config:', error); 100 | return NextResponse.json({ error: error.message }, { status: 500 }); 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/models/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import path from 'path'; 3 | import fs from 'fs/promises'; 4 | import { getProjectRoot } from '@/lib/db/base'; 5 | 6 | // 获取模型配置 7 | export async function GET(request, { params }) { 8 | try { 9 | const { projectId } = params; 10 | 11 | // 验证项目 ID 12 | if (!projectId) { 13 | return NextResponse.json({ error: 'The project ID cannot be empty' }, { status: 400 }); 14 | } 15 | 16 | // 获取项目根目录 17 | const projectRoot = await getProjectRoot(); 18 | const projectPath = path.join(projectRoot, projectId); 19 | 20 | // 检查项目是否存在 21 | try { 22 | await fs.access(projectPath); 23 | } catch (error) { 24 | return NextResponse.json({ error: 'The project does not exist' }, { status: 404 }); 25 | } 26 | 27 | // 获取模型配置文件路径 28 | const modelConfigPath = path.join(projectPath, 'model-config.json'); 29 | 30 | // 检查模型配置文件是否存在 31 | try { 32 | await fs.access(modelConfigPath); 33 | } catch (error) { 34 | // 如果配置文件不存在,返回默认配置 35 | return NextResponse.json([]); 36 | } 37 | 38 | // 读取模型配置文件 39 | const modelConfigData = await fs.readFile(modelConfigPath, 'utf-8'); 40 | const modelConfig = JSON.parse(modelConfigData); 41 | 42 | return NextResponse.json(modelConfig); 43 | } catch (error) { 44 | console.error('Error obtaining model configuration:', error); 45 | return NextResponse.json({ error: 'Failed to obtain model configuration' }, { status: 500 }); 46 | } 47 | } 48 | 49 | // 更新模型配置 50 | export async function PUT(request, { params }) { 51 | try { 52 | const { projectId } = params; 53 | 54 | // 验证项目 ID 55 | if (!projectId) { 56 | return NextResponse.json({ error: 'The project ID cannot be empty' }, { status: 400 }); 57 | } 58 | 59 | // 获取请求体 60 | const modelConfig = await request.json(); 61 | 62 | // 验证请求体 63 | if (!modelConfig || !Array.isArray(modelConfig)) { 64 | return NextResponse.json({ error: 'The model configuration must be an array' }, { status: 400 }); 65 | } 66 | 67 | // 获取项目根目录 68 | const projectRoot = await getProjectRoot(); 69 | const projectPath = path.join(projectRoot, projectId); 70 | 71 | // 检查项目是否存在 72 | try { 73 | await fs.access(projectPath); 74 | } catch (error) { 75 | return NextResponse.json({ error: 'The project does not exist' }, { status: 404 }); 76 | } 77 | 78 | // 获取模型配置文件路径 79 | const modelConfigPath = path.join(projectPath, 'model-config.json'); 80 | 81 | // 写入模型配置文件 82 | await fs.writeFile(modelConfigPath, JSON.stringify(modelConfig, null, 2), 'utf-8'); 83 | 84 | return NextResponse.json({ message: 'Model configuration updated successfully' }); 85 | } catch (error) { 86 | console.error('Error updating model configuration:', error); 87 | return NextResponse.json({ error: 'Failed to update model configuration' }, { status: 500 }); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/pdf/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { deleteFile } from '@/lib/db/texts'; 3 | import PdfProcessor from '@/lib/pdf-processing/core'; 4 | import { getProject, updateProject } from '@/lib/db/index'; 5 | 6 | 7 | 8 | // Replace the deprecated config export with the new export syntax 9 | export const dynamic = 'force-dynamic'; 10 | // This tells Next.js not to parse the request body automatically 11 | export const bodyParser = false; 12 | 13 | // 处理PDF文件 14 | export async function GET(request, { params }) { 15 | try { 16 | const { projectId } = params; 17 | 18 | const fileName = request.nextUrl.searchParams.get('fileName'); 19 | 20 | let strategy = request.nextUrl.searchParams.get('strategy'); 21 | 22 | const currentLanguage = request.nextUrl.searchParams.get('currentLanguage'); 23 | 24 | const visionModel = request.nextUrl.searchParams.get('modelId'); 25 | 26 | // 验证项目ID 27 | if (!projectId) { 28 | return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 }); 29 | } 30 | if (!fileName) { 31 | return NextResponse.json({ error: '文件名不能为空' }, { status: 400 }); 32 | } 33 | 34 | //如果没有正确获取到strategy字段,则使用默认配置 35 | if (!strategy) { 36 | strategy = 'default'; 37 | } 38 | 39 | // 获取项目信息 40 | const project = await getProject(projectId); 41 | 42 | // 创建处理器 43 | const processor = new PdfProcessor(strategy); 44 | 45 | // 使用当前策略处理 46 | const result = await processor.process(projectId, fileName, { language: currentLanguage, visionModelId: visionModel}); 47 | 48 | //准换完成后删除pdf文件 49 | deleteFile(projectId, fileName); 50 | 51 | // 更新项目配置,移除已删除的文件 52 | const uploadedFiles = project.uploadedFiles || []; 53 | const updatedFiles = uploadedFiles.filter(f => f !== fileName); 54 | await updateProject(projectId, { 55 | ...project, 56 | uploadedFiles: updatedFiles 57 | }); 58 | //先检查PDF转换是否成功,再将转换后的文件写入配置 59 | if (!result.success) { 60 | throw new Error(result.error); 61 | } 62 | //将转换后文件加入到配置中 63 | if (!updatedFiles.includes(fileName)) { 64 | updatedFiles.push(fileName.replace('.pdf', '.md')); 65 | } 66 | await updateProject(projectId, { 67 | ...project, 68 | uploadedFiles: updatedFiles 69 | }); 70 | 71 | return NextResponse.json({ 72 | projectId, 73 | project, 74 | uploadedFiles: updatedFiles, 75 | batch_id: result.data 76 | }); 77 | } catch (error) { 78 | console.error('PDF处理流程出错:', error); 79 | return NextResponse.json({ error: error.message || 'PDF处理流程' }, { status: 500 }); 80 | } 81 | } -------------------------------------------------------------------------------- /app/api/projects/[projectId]/playground/chat/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import LLMClient from '@/lib/llm/core/index'; 3 | 4 | export async function POST(request, { params }) { 5 | try { 6 | const { projectId } = params; 7 | 8 | // 验证项目ID 9 | if (!projectId) { 10 | return NextResponse.json({ error: 'The project ID cannot be empty' }, { status: 400 }); 11 | } 12 | 13 | // 获取请求体 14 | const { model, messages } = await request.json(); 15 | 16 | // 验证请求参数 17 | if (!model) { 18 | return NextResponse.json({ error: 'The model parameters cannot be empty' }, { status: 400 }); 19 | } 20 | 21 | if (!Array.isArray(messages) || messages.length === 0) { 22 | return NextResponse.json({ error: 'The message list cannot be empty' }, { status: 400 }); 23 | } 24 | 25 | // 使用自定义的LLM客户端 26 | const llmClient = new LLMClient({ 27 | provider: model.provider, 28 | endpoint: model.endpoint, 29 | apiKey: model.apiKey, 30 | model: model.name, 31 | temperature: model.temperature, 32 | maxTokens: model.maxTokens, 33 | type: model.type // 添加模型类型,用于区分语言模型和视觉模型 34 | }); 35 | 36 | // 格式化消息历史 37 | const formattedMessages = messages.map(msg => { 38 | // 处理纯文本消息 39 | if (typeof msg.content === 'string') { 40 | return { 41 | role: msg.role, 42 | content: msg.content 43 | }; 44 | } 45 | // 处理包含图片的复合消息(用于视觉模型) 46 | else if (Array.isArray(msg.content)) { 47 | return { 48 | role: msg.role, 49 | content: msg.content 50 | }; 51 | } 52 | // 默认情况 53 | return { 54 | role: msg.role, 55 | content: msg.content 56 | }; 57 | }); 58 | 59 | // 调用LLM API 60 | let response = ''; 61 | try { 62 | response = await llmClient.getResponse(formattedMessages); 63 | } catch (error) { 64 | console.error('Failed to call LLM API:', error); 65 | return NextResponse.json( 66 | { 67 | error: `Failed to call ${model.provider} model: ${error.message}` 68 | }, 69 | { status: 500 } 70 | ); 71 | } 72 | 73 | return NextResponse.json({ response }); 74 | } catch (error) { 75 | console.error('Failed to process chat request:', error); 76 | return NextResponse.json({ error: `Failed to process chat request: ${error.message}` }, { status: 500 }); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/playground/chat/stream/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import LLMClient from '@/lib/llm/core/index'; 3 | 4 | /** 5 | * 流式输出的聊天接口 6 | */ 7 | export async function POST(request, { params }) { 8 | const { projectId } = params; 9 | 10 | try { 11 | const body = await request.json(); 12 | const { model, messages } = body; 13 | 14 | if (!model || !messages) { 15 | return NextResponse.json({ error: 'Missing necessary parameters' }, { status: 400 }); 16 | } 17 | 18 | // 创建 LLM 客户端 19 | const llmClient = new LLMClient({ 20 | provider: model.provider, 21 | endpoint: model.endpoint, 22 | apiKey: model.apiKey, 23 | model: model.name, 24 | temperature: model.temperature, 25 | maxTokens: model.maxTokens, 26 | type: model.type // 添加模型类型,用于区分语言模型和视觉模型 27 | }); 28 | 29 | // 格式化消息历史 30 | const formattedMessages = messages.map(msg => { 31 | // 处理纯文本消息 32 | if (typeof msg.content === 'string') { 33 | return { 34 | role: msg.role, 35 | content: msg.content 36 | }; 37 | } 38 | // 处理包含图片的复合消息(用于视觉模型) 39 | else if (Array.isArray(msg.content)) { 40 | return { 41 | role: msg.role, 42 | content: msg.content 43 | }; 44 | } 45 | // 默认情况 46 | return { 47 | role: msg.role, 48 | content: msg.content 49 | }; 50 | }); 51 | 52 | try { 53 | // 调用流式 API 54 | const stream = await llmClient.chatStream(formattedMessages); 55 | // 返回流式响应 56 | return stream; 57 | } catch (error) { 58 | console.error('Failed to call LLM API:', error); 59 | return NextResponse.json( 60 | { 61 | error: `Failed to call ${model.provider} model: ${error.message}` 62 | }, 63 | { status: 500 } 64 | ); 65 | } 66 | } catch (error) { 67 | console.error('Failed to process stream chat request:', error); 68 | return NextResponse.json({ error: `Failed to process stream chat request: ${error.message}` }, { status: 500 }); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/preview/[fileName]/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import fs from 'fs'; 3 | import path from 'path'; 4 | import { getProjectRoot } from '@/lib/db/base'; 5 | 6 | // 获取文件内容 7 | export async function GET(request, { params }) { 8 | try { 9 | const { projectId, fileName: f } = params; 10 | 11 | const fileName = decodeURIComponent(f); 12 | 13 | // 验证参数 14 | if (!projectId) { 15 | return NextResponse.json({ error: 'Project ID cannot be empty' }, { status: 400 }); 16 | } 17 | 18 | if (!fileName) { 19 | return NextResponse.json({ error: 'file Name cannot be empty' }, { status: 400 }); 20 | } 21 | 22 | // 获取项目根目录 23 | const projectRoot = await getProjectRoot(); 24 | const projectPath = path.join(projectRoot, projectId); 25 | 26 | // 获取文件路径 27 | const filePath = path.join(projectPath, 'files', fileName); 28 | 29 | //获取文件 30 | const buffer = fs.readFileSync(filePath); 31 | 32 | const text = buffer.toString('utf-8'); 33 | 34 | return NextResponse.json({fileName:fileName,content:text}); 35 | } catch (error) { 36 | console.error('Failed to get text block content:', error); 37 | return NextResponse.json({ error: error.message || 'Failed to get text block content' }, { status: 500 }); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/questions/[questionId]/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { deleteQuestion } from '@/lib/db/questions'; 3 | 4 | // 删除单个问题 5 | export async function DELETE(request, { params }) { 6 | try { 7 | const { projectId, questionId } = params; 8 | 9 | // 验证参数 10 | if (!projectId) { 11 | return NextResponse.json({ error: 'Project ID is required' }, { status: 400 }); 12 | } 13 | 14 | if (!questionId) { 15 | return NextResponse.json({ error: 'Question ID is required' }, { status: 400 }); 16 | } 17 | 18 | // 从请求体中获取 chunkId 19 | const { chunkId } = await request.json(); 20 | 21 | if (!chunkId) { 22 | return NextResponse.json({ error: 'Chunk ID is required' }, { status: 400 }); 23 | } 24 | 25 | // 删除问题 26 | await deleteQuestion(projectId, questionId, chunkId); 27 | 28 | return NextResponse.json({ success: true, message: 'Delete successful' }); 29 | } catch (error) { 30 | console.error('Delete failed:', error); 31 | return NextResponse.json({ error: error.message || 'Delete failed' }, { status: 500 }); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/questions/batch-delete/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { batchDeleteQuestions } from '@/lib/db/questions'; 3 | 4 | // 批量删除问题 5 | export async function DELETE(request, { params }) { 6 | try { 7 | const { projectId } = params; 8 | 9 | // 验证项目ID 10 | if (!projectId) { 11 | return NextResponse.json({ error: 'Project ID is required' }, { status: 400 }); 12 | } 13 | 14 | // 从请求体中获取要删除的问题列表 15 | const { questions } = await request.json(); 16 | 17 | if (!questions || !Array.isArray(questions) || questions.length === 0) { 18 | return NextResponse.json({ error: 'Questions list is required' }, { status: 400 }); 19 | } 20 | 21 | // 验证每个问题都有必要的字段 22 | for (const question of questions) { 23 | if (!question.questionId || !question.chunkId) { 24 | return NextResponse.json( 25 | { error: 'Question information is incomplete, must include questionId and chunkId' }, 26 | { status: 400 } 27 | ); 28 | } 29 | } 30 | 31 | // 批量删除问题 32 | await batchDeleteQuestions(projectId, questions); 33 | 34 | return NextResponse.json({ 35 | success: true, 36 | message: `Successfully deleted ${questions.length} questions` 37 | }); 38 | } catch (error) { 39 | console.error('Failed to batch delete questions:', error); 40 | return NextResponse.json({ error: error.message || 'Failed to batch delete questions' }, { status: 500 }); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/route.js: -------------------------------------------------------------------------------- 1 | import { getProject, updateProject, deleteProject } from '@/lib/db/index'; 2 | 3 | // 获取项目详情 4 | export async function GET(request, { params }) { 5 | try { 6 | const { projectId } = params; 7 | const project = await getProject(projectId); 8 | 9 | if (!project) { 10 | return Response.json({ error: '项目不存在' }, { status: 404 }); 11 | } 12 | 13 | return Response.json(project); 14 | } catch (error) { 15 | console.error('获取项目详情出错:', error); 16 | return Response.json({ error: error.message }, { status: 500 }); 17 | } 18 | } 19 | 20 | // 更新项目 21 | export async function PUT(request, { params }) { 22 | try { 23 | const { projectId } = params; 24 | const projectData = await request.json(); 25 | 26 | // 验证必要的字段 27 | if (!projectData.name) { 28 | return Response.json({ error: '项目名称不能为空' }, { status: 400 }); 29 | } 30 | 31 | const updatedProject = await updateProject(projectId, projectData); 32 | 33 | if (!updatedProject) { 34 | return Response.json({ error: '项目不存在' }, { status: 404 }); 35 | } 36 | 37 | return Response.json(updatedProject); 38 | } catch (error) { 39 | console.error('更新项目出错:', error); 40 | return Response.json({ error: error.message }, { status: 500 }); 41 | } 42 | } 43 | 44 | // 删除项目 45 | export async function DELETE(request, { params }) { 46 | try { 47 | const { projectId } = params; 48 | const success = await deleteProject(projectId); 49 | 50 | if (!success) { 51 | return Response.json({ error: '项目不存在' }, { status: 404 }); 52 | } 53 | 54 | return Response.json({ success: true }); 55 | } catch (error) { 56 | console.error('删除项目出错:', error); 57 | return Response.json({ error: error.message }, { status: 500 }); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/split/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { splitProjectFile, getProjectChunks } from '@/lib/text-splitter'; 3 | import LLMClient from '@/lib/llm/core/index'; 4 | import getLabelPrompt from '@/lib/llm/prompts/label'; 5 | import getLabelEnPrompt from '@/lib/llm/prompts/labelEn'; 6 | import { deleteFile } from '@/lib/db/texts'; 7 | import { getProject, updateProject } from '@/lib/db/projects'; 8 | import { saveTags, getTags } from '@/lib/db/tags'; 9 | const { extractJsonFromLLMOutput } = require('@/lib/llm/common/util'); 10 | 11 | // 处理文本分割请求 12 | export async function POST(request, { params }) { 13 | try { 14 | const { projectId } = params; 15 | 16 | // 验证项目ID 17 | if (!projectId) { 18 | return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 }); 19 | } 20 | 21 | // 获取请求体 22 | const { fileName, model, language } = await request.json(); 23 | 24 | if (!model) { 25 | return NextResponse.json({ error: '请选择模型' }, { status: 400 }); 26 | } 27 | 28 | // 验证文件名 29 | if (!fileName) { 30 | return NextResponse.json({ error: '文件名不能为空' }, { status: 400 }); 31 | } 32 | const project = await getProject(projectId); 33 | const { globalPrompt, domainTreePrompt } = project; 34 | 35 | // 分割文本 36 | const result = await splitProjectFile(projectId, fileName); 37 | 38 | const { toc } = result; 39 | const llmClient = new LLMClient({ 40 | provider: model.provider, 41 | endpoint: model.endpoint, 42 | apiKey: model.apiKey, 43 | model: model.name, 44 | temperature: model.temperature, 45 | maxTokens: model.maxTokens 46 | }); 47 | // 生成领域树 48 | console.log(projectId, fileName, 'Text split completed, starting to build domain tree'); 49 | const promptFunc = language === 'en' ? getLabelEnPrompt : getLabelPrompt; 50 | const prompt = promptFunc({ text: toc, globalPrompt, domainTreePrompt }); 51 | const response = await llmClient.getResponse(prompt); 52 | const tags = extractJsonFromLLMOutput(response); 53 | 54 | if (!response || !tags) { 55 | // 删除前面生成的文件 56 | await deleteFile(projectId, fileName); 57 | const uploadedFiles = project.uploadedFiles || []; 58 | const updatedFiles = uploadedFiles.filter(f => f !== fileName); 59 | await updateProject(projectId, { 60 | ...project, 61 | uploadedFiles: updatedFiles 62 | }); 63 | return NextResponse.json( 64 | { error: 'AI analysis failed, please check model configuration, delete file and retry!' }, 65 | { status: 400 } 66 | ); 67 | } 68 | console.log(projectId, fileName, 'Domain tree built:', tags); 69 | await saveTags(projectId, tags); 70 | 71 | return NextResponse.json({ ...result, tags }); 72 | } catch (error) { 73 | console.error('Text split error:', error); 74 | return NextResponse.json({ error: error.message || 'Text split failed' }, { status: 500 }); 75 | } 76 | } 77 | 78 | // 获取项目中的所有文本块 79 | export async function GET(request, { params }) { 80 | try { 81 | const { projectId } = params; 82 | 83 | // 验证项目ID 84 | if (!projectId) { 85 | return NextResponse.json({ error: 'The project ID cannot be empty' }, { status: 400 }); 86 | } 87 | 88 | // 获取文本块详细信息 89 | const result = await getProjectChunks(projectId); 90 | 91 | const tags = await getTags(projectId); 92 | 93 | // 返回详细的文本块信息和文件结果(单个文件) 94 | return NextResponse.json({ 95 | chunks: result.chunks, 96 | ...result.fileResult, // 单个文件结果,而不是数组 97 | tags 98 | }); 99 | } catch (error) { 100 | console.error('Failed to get text chunks:', error); 101 | return NextResponse.json({ error: error.message || 'Failed to get text chunks' }, { status: 500 }); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/tags/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { saveTags, getTags } from '@/lib/db/tags'; 3 | 4 | // 获取项目的标签树 5 | export async function GET(request, { params }) { 6 | try { 7 | const { projectId } = params; 8 | 9 | // 验证项目ID 10 | if (!projectId) { 11 | return NextResponse.json({ error: 'Project ID is required' }, { status: 400 }); 12 | } 13 | 14 | // 获取标签树 15 | const tags = await getTags(projectId); 16 | 17 | return NextResponse.json({ tags }); 18 | } catch (error) { 19 | console.error('Failed to obtain the label tree:', error); 20 | return NextResponse.json({ error: error.message || 'Failed to obtain the label tree' }, { status: 500 }); 21 | } 22 | } 23 | 24 | // 更新项目的标签树 25 | export async function PUT(request, { params }) { 26 | try { 27 | const { projectId } = params; 28 | 29 | // 验证项目ID 30 | if (!projectId) { 31 | return NextResponse.json({ error: 'Project ID is required' }, { status: 400 }); 32 | } 33 | 34 | // 获取请求体 35 | const { tags } = await request.json(); 36 | 37 | // 验证标签数据 38 | if (!tags || !Array.isArray(tags)) { 39 | return NextResponse.json({ error: 'Tag data format is incorrect' }, { status: 400 }); 40 | } 41 | 42 | // 保存更新后的标签树 43 | const updatedTags = await saveTags(projectId, tags); 44 | 45 | return NextResponse.json({ tags: updatedTags }); 46 | } catch (error) { 47 | console.error('Failed to update tags:', error); 48 | return NextResponse.json({ error: error.message || 'Failed to update tags' }, { status: 500 }); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/tasks/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import path from 'path'; 3 | import fs from 'fs/promises'; 4 | import { getProjectRoot } from '@/lib/db/base'; 5 | import { getTaskConfig } from '@/lib/db/projects'; 6 | 7 | // 获取任务配置 8 | export async function GET(request, { params }) { 9 | try { 10 | const { projectId } = params; 11 | 12 | // 验证项目 ID 13 | if (!projectId) { 14 | return NextResponse.json({ error: 'Project ID is required' }, { status: 400 }); 15 | } 16 | 17 | // 获取项目根目录 18 | const projectRoot = await getProjectRoot(); 19 | const projectPath = path.join(projectRoot, projectId); 20 | 21 | // 检查项目是否存在 22 | try { 23 | await fs.access(projectPath); 24 | } catch (error) { 25 | return NextResponse.json({ error: 'Project does not exist' }, { status: 404 }); 26 | } 27 | 28 | const taskConfig = await getTaskConfig(projectId); 29 | return NextResponse.json(taskConfig); 30 | } catch (error) { 31 | console.error('Failed to obtain task configuration:', error); 32 | return NextResponse.json({ error: 'Failed to obtain task configuration' }, { status: 500 }); 33 | } 34 | } 35 | 36 | // 更新任务配置 37 | export async function PUT(request, { params }) { 38 | try { 39 | const { projectId } = params; 40 | 41 | // 验证项目 ID 42 | if (!projectId) { 43 | return NextResponse.json({ error: 'Project ID is required' }, { status: 400 }); 44 | } 45 | 46 | // 获取请求体 47 | const taskConfig = await request.json(); 48 | 49 | // 验证请求体 50 | if (!taskConfig) { 51 | return NextResponse.json({ error: 'Task configuration cannot be empty' }, { status: 400 }); 52 | } 53 | 54 | // 获取项目根目录 55 | const projectRoot = await getProjectRoot(); 56 | const projectPath = path.join(projectRoot, projectId); 57 | 58 | // 检查项目是否存在 59 | try { 60 | await fs.access(projectPath); 61 | } catch (error) { 62 | return NextResponse.json({ error: 'Project does not exist' }, { status: 404 }); 63 | } 64 | 65 | // 获取任务配置文件路径 66 | const taskConfigPath = path.join(projectPath, 'task-config.json'); 67 | 68 | // 写入任务配置文件 69 | await fs.writeFile(taskConfigPath, JSON.stringify(taskConfig, null, 2), 'utf-8'); 70 | 71 | return NextResponse.json({ message: 'Task configuration updated successfully' }); 72 | } catch (error) { 73 | console.error('Failed to update task configuration:', error); 74 | return NextResponse.json({ error: 'Failed to update task configuration' }, { status: 500 }); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /app/api/projects/route.js: -------------------------------------------------------------------------------- 1 | import { createProject, getProjects, getProjectModelConfig } from '@/lib/db/index'; 2 | 3 | export async function POST(request) { 4 | try { 5 | const projectData = await request.json(); 6 | 7 | // 验证必要的字段 8 | if (!projectData.name) { 9 | return Response.json({ error: '项目名称不能为空' }, { status: 400 }); 10 | } 11 | 12 | // 如果指定了要复用的项目配置 13 | if (projectData.reuseConfigFrom) { 14 | projectData.modelConfig = await getProjectModelConfig(projectData.reuseConfigFrom); 15 | } 16 | 17 | // 创建项目 18 | const newProject = await createProject(projectData); 19 | return Response.json(newProject, { status: 201 }); 20 | } catch (error) { 21 | console.error('创建项目出错:', error); 22 | return Response.json({ error: error.message }, { status: 500 }); 23 | } 24 | } 25 | 26 | export async function GET(request) { 27 | try { 28 | // 获取所有项目 29 | const userProjects = await getProjects(); 30 | 31 | // 为每个项目添加问题数量和数据集数量 32 | const projectsWithStats = await Promise.all( 33 | userProjects.map(async project => { 34 | // 获取问题数量 35 | const questions = (await import('@/lib/db/questions').then(module => module.getQuestions(project.id))) || []; 36 | const ques = questions.map(q => q.questions).flat(); 37 | const questionsCount = ques.length; 38 | 39 | // 获取数据集数量 40 | const datasets = await import('@/lib/db/datasets').then(module => module.getDatasets(project.id)); 41 | const datasetsCount = Array.isArray(datasets) ? datasets.length : 0; 42 | 43 | // 添加最后更新时间 44 | const lastUpdated = new Date().toLocaleDateString('zh-CN'); 45 | 46 | return { 47 | ...project, 48 | questionsCount, 49 | datasetsCount, 50 | lastUpdated 51 | }; 52 | }) 53 | ); 54 | 55 | return Response.json(projectsWithStats); 56 | } catch (error) { 57 | console.error('获取项目列表出错:', error); 58 | return Response.json({ error: error.message }, { status: 500 }); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /app/api/update/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { exec } from 'child_process'; 3 | import path from 'path'; 4 | import fs from 'fs'; 5 | 6 | // 执行更新脚本 7 | export async function POST() { 8 | try { 9 | // 检查是否在客户端环境中运行 10 | const desktopDir = path.join(process.cwd(), 'desktop'); 11 | const updaterPath = path.join(desktopDir, 'scripts', 'updater.js'); 12 | 13 | if (!fs.existsSync(updaterPath)) { 14 | return NextResponse.json( 15 | { 16 | success: false, 17 | message: '更新功能仅在客户端环境中可用' 18 | }, 19 | { status: 400 } 20 | ); 21 | } 22 | 23 | // 执行更新脚本 24 | return new Promise(resolve => { 25 | const updaterProcess = exec(`node "${updaterPath}"`, { cwd: process.cwd() }); 26 | 27 | let output = ''; 28 | 29 | updaterProcess.stdout.on('data', data => { 30 | output += data.toString(); 31 | console.log(`Update output: ${data}`); 32 | }); 33 | 34 | updaterProcess.stderr.on('data', data => { 35 | output += data.toString(); 36 | console.error(`Update error: ${data}`); 37 | }); 38 | 39 | updaterProcess.on('close', code => { 40 | console.log(`Update process exit, exit code: ${code}`); 41 | 42 | if (code === 0) { 43 | resolve( 44 | NextResponse.json({ 45 | success: true, 46 | message: 'Update successful, application will restart' 47 | }) 48 | ); 49 | } else { 50 | resolve( 51 | NextResponse.json( 52 | { 53 | success: false, 54 | message: `Update failed, exit code: ${code}, output: ${output}` 55 | }, 56 | { status: 500 } 57 | ) 58 | ); 59 | } 60 | }); 61 | }); 62 | } catch (error) { 63 | console.error('Failed to execute update:', error); 64 | return NextResponse.json( 65 | { 66 | success: false, 67 | message: `Failed to execute update: ${error.message}` 68 | }, 69 | { status: 500 } 70 | ); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /app/globals.css: -------------------------------------------------------------------------------- 1 | * { 2 | box-sizing: border-box; 3 | padding: 0; 4 | margin: 0; 5 | } 6 | 7 | html, 8 | body { 9 | max-width: 100vw; 10 | overflow-x: hidden; 11 | height: 100%; 12 | -webkit-font-smoothing: antialiased; 13 | -moz-osx-font-smoothing: grayscale; 14 | } 15 | 16 | a { 17 | color: inherit; 18 | text-decoration: none; 19 | } 20 | 21 | /* 渐变文本样式 */ 22 | .gradient-text { 23 | background: linear-gradient(90deg, #2a5caa 0%, #8b5cf6 100%); 24 | -webkit-background-clip: text; 25 | -webkit-text-fill-color: transparent; 26 | background-clip: text; 27 | text-fill-color: transparent; 28 | } 29 | 30 | /* 页面容器下间距 */ 31 | main { 32 | min-height: calc(100vh - 64px); 33 | } 34 | 35 | /* 自定义滚动条 */ 36 | ::-webkit-scrollbar { 37 | width: 8px; 38 | height: 8px; 39 | } 40 | 41 | ::-webkit-scrollbar-track { 42 | background: transparent; 43 | } 44 | 45 | ::-webkit-scrollbar-thumb { 46 | background-color: rgba(0, 0, 0, 0.2); 47 | border-radius: 4px; 48 | } 49 | 50 | ::-webkit-scrollbar-thumb:hover { 51 | background-color: rgba(0, 0, 0, 0.3); 52 | } 53 | 54 | /* 暗色模式滚动条 */ 55 | [data-theme='dark'] ::-webkit-scrollbar-thumb { 56 | background-color: rgba(255, 255, 255, 0.2); 57 | } 58 | 59 | [data-theme='dark'] ::-webkit-scrollbar-thumb:hover { 60 | background-color: rgba(255, 255, 255, 0.3); 61 | } 62 | 63 | /* 方便的间距类 */ 64 | .mt-1 { 65 | margin-top: 8px; 66 | } 67 | .mt-2 { 68 | margin-top: 16px; 69 | } 70 | .mt-3 { 71 | margin-top: 24px; 72 | } 73 | .mt-4 { 74 | margin-top: 32px; 75 | } 76 | .mb-1 { 77 | margin-bottom: 8px; 78 | } 79 | .mb-2 { 80 | margin-bottom: 16px; 81 | } 82 | .mb-3 { 83 | margin-bottom: 24px; 84 | } 85 | .mb-4 { 86 | margin-bottom: 32px; 87 | } 88 | 89 | /* 响应式样式 */ 90 | @media (max-width: 600px) { 91 | .hide-on-mobile { 92 | display: none !important; 93 | } 94 | } 95 | 96 | /* 输入框和选择框边框简化 */ 97 | .plain-select .MuiOutlinedInput-notchedOutline, 98 | .plain-input .MuiOutlinedInput-notchedOutline { 99 | border-color: transparent !important; 100 | } 101 | 102 | /* 卡片悬停效果 */ 103 | .hover-card { 104 | transition: 105 | transform 0.2s ease, 106 | box-shadow 0.2s ease; 107 | } 108 | 109 | .hover-card:hover { 110 | transform: translateY(-4px); 111 | box-shadow: 0 12px 20px rgba(0, 0, 0, 0.1); 112 | } 113 | 114 | [data-theme='dark'] .hover-card:hover { 115 | box-shadow: 0 12px 20px rgba(0, 0, 0, 0.3); 116 | } 117 | -------------------------------------------------------------------------------- /app/layout.js: -------------------------------------------------------------------------------- 1 | import './globals.css'; 2 | import ThemeRegistry from '@/components/ThemeRegistry'; 3 | import I18nProvider from '@/components/I18nProvider'; 4 | 5 | export const metadata = { 6 | title: 'Easy Dataset', 7 | description: '一个强大的 LLM 数据集生成工具', 8 | icons: { 9 | icon: '/imgs/logo.ico' // 更新为正确的文件名 10 | } 11 | }; 12 | 13 | export default function RootLayout({ children }) { 14 | return ( 15 | 16 | 17 | 18 | {children} 19 | 20 | 21 | 22 | ); 23 | } 24 | -------------------------------------------------------------------------------- /app/page.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useState, useEffect } from 'react'; 4 | import { Container, Box, Typography, CircularProgress, Stack, useTheme } from '@mui/material'; 5 | import ErrorOutlineIcon from '@mui/icons-material/ErrorOutline'; 6 | import Navbar from '@/components/Navbar'; 7 | import HeroSection from '@/components/home/HeroSection'; 8 | import StatsCard from '@/components/home/StatsCard'; 9 | import ProjectList from '@/components/home/ProjectList'; 10 | import CreateProjectDialog from '@/components/home/CreateProjectDialog'; 11 | import { motion } from 'framer-motion'; 12 | import { useTranslation } from 'react-i18next'; 13 | 14 | export default function Home() { 15 | const { t } = useTranslation(); 16 | const [projects, setProjects] = useState([]); 17 | const [loading, setLoading] = useState(true); 18 | const [error, setError] = useState(null); 19 | const [createDialogOpen, setCreateDialogOpen] = useState(false); 20 | 21 | useEffect(() => { 22 | async function fetchProjects() { 23 | try { 24 | setLoading(true); 25 | // 获取用户创建的项目详情 26 | const response = await fetch(`/api/projects`); 27 | 28 | if (!response.ok) { 29 | throw new Error(t('projects.fetchFailed')); 30 | } 31 | 32 | const data = await response.json(); 33 | setProjects(data); 34 | } catch (error) { 35 | console.error(t('projects.fetchError'), error); 36 | setError(error.message); 37 | } finally { 38 | setLoading(false); 39 | } 40 | } 41 | 42 | fetchProjects(); 43 | }, []); 44 | 45 | const theme = useTheme(); 46 | 47 | return ( 48 |
49 | 50 | 51 | setCreateDialogOpen(true)} /> 52 | 53 | 62 | {/* */} 63 | 64 | {loading && ( 65 | 76 | 77 | 78 | {t('projects.loading')} 79 | 80 | 81 | )} 82 | 83 | {error && !loading && ( 84 | 97 | 98 | 99 | 100 | {t('projects.fetchFailed')}: {error} 101 | 102 | 103 | 104 | )} 105 | 106 | {!loading && ( 107 | 112 | setCreateDialogOpen(true)} /> 113 | 114 | )} 115 | 116 | 117 | setCreateDialogOpen(false)} /> 118 |
119 | ); 120 | } 121 | -------------------------------------------------------------------------------- /app/projects/[projectId]/page.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useEffect } from 'react'; 4 | import { useRouter } from 'next/navigation'; 5 | 6 | export default function ProjectPage({ params }) { 7 | const router = useRouter(); 8 | const { projectId } = params; 9 | 10 | // 默认重定向到文本分割页面 11 | useEffect(() => { 12 | router.push(`/projects/${projectId}/text-split`); 13 | }, [projectId, router]); 14 | 15 | return null; 16 | } 17 | -------------------------------------------------------------------------------- /app/projects/[projectId]/playground/page.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | import { Box, Typography, Paper, Alert } from '@mui/material'; 5 | import { useParams } from 'next/navigation'; 6 | import { useTheme } from '@mui/material/styles'; 7 | import ChatArea from '@/components/playground/ChatArea'; 8 | import MessageInput from '@/components/playground/MessageInput'; 9 | import PlaygroundHeader from '@/components/playground/PlaygroundHeader'; 10 | import useModelPlayground from '@/hooks/useModelPlayground'; 11 | import { playgroundStyles } from '@/styles/playground'; 12 | import { useTranslation } from 'react-i18next'; 13 | 14 | export default function ModelPlayground() { 15 | const theme = useTheme(); 16 | const params = useParams(); 17 | const { projectId } = params; 18 | const styles = playgroundStyles(theme); 19 | const { t } = useTranslation(); 20 | 21 | const { 22 | availableModels, 23 | selectedModels, 24 | loading, 25 | userInput, 26 | conversations, 27 | error, 28 | outputMode, 29 | uploadedImage, 30 | handleModelSelection, 31 | handleInputChange, 32 | handleImageUpload, 33 | handleRemoveImage, 34 | handleSendMessage, 35 | handleClearConversations, 36 | handleOutputModeChange, 37 | getModelName 38 | } = useModelPlayground(projectId); 39 | 40 | return ( 41 | 42 | 43 | {t('playground.title')} 44 | 45 | 46 | {error && ( 47 | 48 | {error} 49 | 50 | )} 51 | 52 | 53 | 62 | 63 | 69 | 70 | 81 | 82 | 83 | ); 84 | } 85 | -------------------------------------------------------------------------------- /app/projects/[projectId]/questions/hooks/useQuestionEdit.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useState } from 'react'; 4 | import { useTranslation } from 'react-i18next'; 5 | import request from '@/lib/util/request'; 6 | 7 | export function useQuestionEdit(projectId, onSuccess) { 8 | const { t } = useTranslation(); 9 | const [editDialogOpen, setEditDialogOpen] = useState(false); 10 | const [editMode, setEditMode] = useState('create'); 11 | const [editingQuestion, setEditingQuestion] = useState(null); 12 | 13 | const handleOpenCreateDialog = () => { 14 | setEditMode('create'); 15 | setEditingQuestion(null); 16 | setEditDialogOpen(true); 17 | }; 18 | 19 | const handleOpenEditDialog = question => { 20 | setEditMode('edit'); 21 | setEditingQuestion(question); 22 | setEditDialogOpen(true); 23 | }; 24 | 25 | const handleCloseDialog = () => { 26 | setEditDialogOpen(false); 27 | setEditingQuestion(null); 28 | }; 29 | 30 | const handleSubmitQuestion = async formData => { 31 | try { 32 | const response = await request(`/api/projects/${projectId}/questions`, { 33 | method: editMode === 'create' ? 'POST' : 'PUT', 34 | headers: { 35 | 'Content-Type': 'application/json' 36 | }, 37 | body: JSON.stringify( 38 | editMode === 'create' 39 | ? { 40 | question: formData.question, 41 | chunkId: formData.chunkId, 42 | label: formData.label 43 | } 44 | : { 45 | question: formData.question, 46 | oldQuestion: editingQuestion.question, 47 | chunkId: formData.chunkId, 48 | label: formData.label, 49 | oldChunkId: editingQuestion.chunkId 50 | } 51 | ) 52 | }); 53 | 54 | if (!response.ok) { 55 | const errorData = await response.json(); 56 | throw new Error(errorData.error || t('questions.operationFailed')); 57 | } 58 | 59 | // 获取更新后的问题数据 60 | const updatedQuestion = await response.json(); 61 | 62 | // 直接更新问题列表中的数据,而不是重新获取整个列表 63 | if (onSuccess) { 64 | onSuccess(updatedQuestion); 65 | } 66 | handleCloseDialog(); 67 | } catch (error) { 68 | console.error('操作失败:', error); 69 | } 70 | }; 71 | 72 | return { 73 | editDialogOpen, 74 | editMode, 75 | editingQuestion, 76 | handleOpenCreateDialog, 77 | handleOpenEditDialog, 78 | handleCloseDialog, 79 | handleSubmitQuestion 80 | }; 81 | } 82 | -------------------------------------------------------------------------------- /app/projects/[projectId]/settings/page.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useState, useEffect } from 'react'; 4 | import { Container, Typography, Box, Tabs, Tab, Paper, Alert, CircularProgress } from '@mui/material'; 5 | import { useSearchParams, useRouter } from 'next/navigation'; 6 | import { useTranslation } from 'react-i18next'; 7 | 8 | // 导入设置组件 9 | import BasicSettings from '@/components/settings/BasicSettings'; 10 | import ModelSettings from '@/components/settings/ModelSettings'; 11 | import TaskSettings from '@/components/settings/TaskSettings'; 12 | import PromptSettings from './components/PromptSettings'; 13 | 14 | // 定义 TAB 枚举 15 | const TABS = { 16 | BASIC: 'basic', 17 | MODEL: 'model', 18 | TASK: 'task', 19 | PROMPTS: 'prompts' 20 | }; 21 | 22 | export default function SettingsPage({ params }) { 23 | const { t } = useTranslation(); 24 | const { projectId } = params; 25 | const searchParams = useSearchParams(); 26 | const router = useRouter(); 27 | const [activeTab, setActiveTab] = useState(TABS.BASIC); 28 | const [projectExists, setProjectExists] = useState(true); 29 | const [loading, setLoading] = useState(true); 30 | const [error, setError] = useState(null); 31 | 32 | // 从 URL hash 中获取当前 tab 33 | useEffect(() => { 34 | const tab = searchParams.get('tab'); 35 | if (tab && Object.values(TABS).includes(tab)) { 36 | setActiveTab(tab); 37 | } 38 | }, [searchParams]); 39 | 40 | // 检查项目是否存在 41 | useEffect(() => { 42 | async function checkProject() { 43 | try { 44 | setLoading(true); 45 | const response = await fetch(`/api/projects/${projectId}`); 46 | 47 | if (!response.ok) { 48 | if (response.status === 404) { 49 | setProjectExists(false); 50 | } else { 51 | throw new Error(t('projects.fetchFailed')); 52 | } 53 | } else { 54 | setProjectExists(true); 55 | } 56 | } catch (error) { 57 | console.error('获取项目详情出错:', error); 58 | setError(error.message); 59 | } finally { 60 | setLoading(false); 61 | } 62 | } 63 | 64 | checkProject(); 65 | }, [projectId, t]); 66 | 67 | // 处理 tab 切换 68 | const handleTabChange = (event, newValue) => { 69 | setActiveTab(newValue); 70 | // 更新 URL hash 71 | router.push(`/projects/${projectId}/settings?tab=${newValue}`); 72 | }; 73 | 74 | if (loading) { 75 | return ( 76 | 77 | 78 | 79 | ); 80 | } 81 | 82 | if (!projectExists) { 83 | return ( 84 | 85 | {t('projects.notExist')} 86 | 87 | ); 88 | } 89 | 90 | if (error) { 91 | return ( 92 | 93 | {error} 94 | 95 | ); 96 | } 97 | 98 | return ( 99 | 100 | 101 | {t('settings.title')} 102 | 103 | 104 | 105 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | {activeTab === TABS.BASIC && } 121 | 122 | {activeTab === TABS.MODEL && } 123 | 124 | {activeTab === TABS.TASK && } 125 | 126 | {activeTab === TABS.PROMPTS && } 127 | 128 | ); 129 | } 130 | -------------------------------------------------------------------------------- /commitlint.config.mjs: -------------------------------------------------------------------------------- 1 | export default { extends: ['@commitlint/config-conventional'] }; 2 | -------------------------------------------------------------------------------- /components/I18nProvider.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useEffect } from 'react'; 4 | import i18n from '@/lib/i18n'; 5 | import { I18nextProvider } from 'react-i18next'; 6 | 7 | export default function I18nProvider({ children }) { 8 | useEffect(() => { 9 | // 确保i18n只在客户端初始化 10 | if (typeof window !== 'undefined') { 11 | // 这里可以添加任何客户端特定的i18n初始化逻辑 12 | } 13 | }, []); 14 | 15 | return {children}; 16 | } 17 | -------------------------------------------------------------------------------- /components/LanguageSwitcher.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useTranslation } from 'react-i18next'; 4 | import { IconButton, Tooltip, useTheme, Typography } from '@mui/material'; 5 | 6 | export default function LanguageSwitcher() { 7 | const { i18n } = useTranslation(); 8 | const theme = useTheme(); 9 | 10 | const toggleLanguage = () => { 11 | const newLang = i18n.language === 'zh-CN' ? 'en' : 'zh-CN'; 12 | i18n.changeLanguage(newLang); 13 | }; 14 | 15 | return ( 16 | 17 | 30 | 31 | {i18n.language === 'zh-CN' ? 'EN' : '中'} 32 | 33 | 34 | 35 | ); 36 | } 37 | -------------------------------------------------------------------------------- /components/ModelSelect.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | import { FormControl, Select, MenuItem, useTheme } from '@mui/material'; 5 | import { useTranslation } from 'react-i18next'; 6 | 7 | export default function ModelSelect({ models = [], selectedModel, onChange, size = 'small', minWidth = 180 }) { 8 | const theme = useTheme(); 9 | const { t } = useTranslation(); 10 | 11 | const handleModelChange = event => { 12 | if (!event || !event.target) return; 13 | const newModelId = event.target.value; 14 | 15 | // 找到选中的模型对象 16 | const selectedModelObj = models.find(model => model.id === newModelId); 17 | 18 | if (selectedModelObj) { 19 | // 将完整的模型信息存储到 localStorage 20 | localStorage.setItem('selectedModelInfo', JSON.stringify(selectedModelObj)); 21 | } else { 22 | // 如果没有找到对应模型,则只存储ID 23 | localStorage.removeItem('selectedModelInfo'); 24 | } 25 | 26 | // 通知父组件 27 | onChange?.(event); 28 | 29 | // 触发模型选择变化事件 30 | const modelChangeEvent = new CustomEvent('model-selection-changed'); 31 | window.dispatchEvent(modelChangeEvent); 32 | }; 33 | 34 | return ( 35 | 36 | 82 | 83 | ); 84 | } 85 | -------------------------------------------------------------------------------- /components/home/StatsCard.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Paper, Grid, Box, Typography, useMediaQuery, Avatar } from '@mui/material'; 4 | import { styles } from '@/styles/home'; 5 | import { useTheme } from '@mui/material'; 6 | import { motion } from 'framer-motion'; 7 | import FolderOpenIcon from '@mui/icons-material/FolderOpen'; 8 | import QuestionAnswerIcon from '@mui/icons-material/QuestionAnswer'; 9 | import StorageIcon from '@mui/icons-material/Storage'; 10 | import MemoryIcon from '@mui/icons-material/Memory'; 11 | 12 | // 默认模型列表 13 | const mockModels = [ 14 | { id: 'deepseek-r1', provider: 'Ollama', name: 'DeepSeek-R1' }, 15 | { id: 'gpt-3.5-turbo-openai', provider: 'OpenAI', name: 'gpt-3.5-turbo' }, 16 | { id: 'gpt-3.5-turbo-guiji', provider: 'Guiji', name: 'gpt-3.5-turbo' }, 17 | { id: 'glm-4-flash', provider: 'Zhipu AI', name: 'GLM-4-Flash' } 18 | ]; 19 | 20 | export default function StatsCard({ projects }) { 21 | const theme = useTheme(); 22 | const isMobile = useMediaQuery(theme.breakpoints.down('sm')); 23 | 24 | // 统计卡片数据 25 | const statsItems = [ 26 | { 27 | value: projects.length, 28 | label: t('stats.ongoingProjects'), 29 | color: 'primary', 30 | icon: 31 | }, 32 | { 33 | value: projects.reduce((sum, project) => sum + (project.questionsCount || 0), 0), 34 | label: t('stats.questionCount'), 35 | color: 'secondary', 36 | icon: 37 | }, 38 | { 39 | value: projects.reduce((sum, project) => sum + (project.datasetsCount || 0), 0), 40 | label: t('stats.generatedDatasets'), 41 | color: 'success', 42 | icon: 43 | }, 44 | { 45 | value: mockModels.length, 46 | label: t('stats.supportedModels'), 47 | color: 'warning', 48 | icon: 49 | } 50 | ]; 51 | 52 | return ( 53 | 61 | 62 | {statsItems.map((item, index) => ( 63 | 64 | 82 | 92 | {item.icon} 93 | 94 | 107 | {item.value} 108 | 109 | 110 | {item.label} 111 | 112 | 113 | 114 | ))} 115 | 116 | 117 | ); 118 | } 119 | -------------------------------------------------------------------------------- /components/playground/ChatArea.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React, { useRef, useEffect } from 'react'; 4 | import { Box, Typography, Paper, Grid, CircularProgress } from '@mui/material'; 5 | import { useTheme } from '@mui/material/styles'; 6 | import ChatMessage from './ChatMessage'; 7 | import { playgroundStyles } from '@/styles/playground'; 8 | import { useTranslation } from 'react-i18next'; 9 | 10 | const ChatArea = ({ selectedModels, conversations, loading, getModelName }) => { 11 | const theme = useTheme(); 12 | const styles = playgroundStyles(theme); 13 | const { t } = useTranslation(); 14 | 15 | // 为每个模型创建独立的引用 16 | const chatContainerRefs = { 17 | model1: useRef(null), 18 | model2: useRef(null), 19 | model3: useRef(null) 20 | }; 21 | 22 | // 为每个模型的聊天容器自动滚动到底部 23 | useEffect(() => { 24 | Object.values(chatContainerRefs).forEach(ref => { 25 | if (ref.current) { 26 | ref.current.scrollTop = ref.current.scrollHeight; 27 | } 28 | }); 29 | }, [conversations]); 30 | 31 | if (selectedModels.length === 0) { 32 | return ( 33 | 34 | {t('playground.selectModelFirst')} 35 | 36 | ); 37 | } 38 | 39 | return ( 40 | 41 | {selectedModels.map((modelId, index) => { 42 | const modelConversation = conversations[modelId] || []; 43 | const isLoading = loading[modelId]; 44 | const refKey = `model${index + 1}`; 45 | 46 | return ( 47 | 1 ? 12 / selectedModels.length : 12} 51 | key={modelId} 52 | style={{ maxHeight: 'calc(100vh - 300px)' }} 53 | > 54 | 55 | 56 | {getModelName(modelId)} 57 | {isLoading && } 58 | 59 | 60 | 61 | {modelConversation.length === 0 ? ( 62 | 63 | 64 | {t('playground.sendFirstMessage')} 65 | 66 | 67 | ) : ( 68 | modelConversation.map((message, msgIndex) => ( 69 | 70 | 71 | 72 | )) 73 | )} 74 | 75 | 76 | 77 | ); 78 | })} 79 | 80 | ); 81 | }; 82 | 83 | export default ChatArea; 84 | -------------------------------------------------------------------------------- /components/playground/MessageInput.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React, { useState } from 'react'; 4 | import { Box, TextField, Button, IconButton, Badge, Tooltip } from '@mui/material'; 5 | import SendIcon from '@mui/icons-material/Send'; 6 | import ImageIcon from '@mui/icons-material/Image'; 7 | import CancelIcon from '@mui/icons-material/Cancel'; 8 | import { useTheme } from '@mui/material/styles'; 9 | import { playgroundStyles } from '@/styles/playground'; 10 | import { useTranslation } from 'react-i18next'; 11 | 12 | const MessageInput = ({ userInput, handleInputChange, handleSendMessage, loading, selectedModels, uploadedImage, handleImageUpload, handleRemoveImage, availableModels }) => { 13 | const theme = useTheme(); 14 | const styles = playgroundStyles(theme); 15 | const { t } = useTranslation(); 16 | 17 | const isDisabled = Object.values(loading).some(value => value) || selectedModels.length === 0; 18 | const isSendDisabled = isDisabled || (!userInput.trim() && !uploadedImage); 19 | 20 | // 检查是否有视觉模型被选中 21 | const hasVisionModel = selectedModels.some(modelId => { 22 | const model = availableModels.find(m => m.id === modelId); 23 | return model && model.type === 'vision'; 24 | }); 25 | 26 | return ( 27 | 28 | {uploadedImage && ( 29 | 30 | 37 | 38 | 39 | } 40 | sx={{ width: '100%' }} 41 | overlap="rectangular" 42 | anchorOrigin={{ vertical: 'top', horizontal: 'right' }} 43 | > 44 | 上传图片 49 | 50 | 51 | )} 52 | 53 | { 61 | if (e.key === 'Enter' && !e.shiftKey) { 62 | e.preventDefault(); 63 | handleSendMessage(); 64 | } 65 | }} 66 | multiline 67 | maxRows={4} 68 | /> 69 | {hasVisionModel && ( 70 | 71 | 72 | 78 | 84 | 85 | 86 | 87 | 88 | )} 89 | 99 | 100 | 101 | ); 102 | }; 103 | 104 | export default MessageInput; 105 | -------------------------------------------------------------------------------- /components/playground/ModelSelector.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { 3 | FormControl, 4 | InputLabel, 5 | Select, 6 | MenuItem, 7 | OutlinedInput, 8 | Box, 9 | Chip, 10 | Checkbox, 11 | ListItemText 12 | } from '@mui/material'; 13 | import { useTranslation } from 'react-i18next'; 14 | 15 | const ITEM_HEIGHT = 48; 16 | const ITEM_PADDING_TOP = 8; 17 | const MenuProps = { 18 | PaperProps: { 19 | style: { 20 | maxHeight: ITEM_HEIGHT * 4.5 + ITEM_PADDING_TOP, 21 | width: 250 22 | } 23 | } 24 | }; 25 | 26 | /** 27 | * 模型选择组件 28 | * @param {Object} props 29 | * @param {Array} props.models - 可用模型列表 30 | * @param {Array} props.selectedModels - 已选择的模型ID列表 31 | * @param {Function} props.onChange - 选择改变时的回调函数 32 | */ 33 | export default function ModelSelector({ models, selectedModels, onChange }) { 34 | // 获取模型名称 35 | const getModelName = modelId => { 36 | const model = models.find(m => m.id === modelId); 37 | return model ? `${model.provider}: ${model.name}` : modelId; 38 | }; 39 | const { t } = useTranslation(); 40 | 41 | return ( 42 | 43 | {t('playground.selectModelMax3')} 44 | 79 | 80 | ); 81 | } 82 | -------------------------------------------------------------------------------- /components/playground/PlaygroundHeader.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | import { Grid, Button, Divider, FormControl, InputLabel, Select, MenuItem } from '@mui/material'; 5 | import DeleteIcon from '@mui/icons-material/Delete'; 6 | import { useTheme } from '@mui/material/styles'; 7 | import ModelSelector from './ModelSelector'; 8 | import { playgroundStyles } from '@/styles/playground'; 9 | import { useTranslation } from 'react-i18next'; 10 | 11 | const PlaygroundHeader = ({ 12 | availableModels, 13 | selectedModels, 14 | handleModelSelection, 15 | handleClearConversations, 16 | conversations, 17 | outputMode, 18 | handleOutputModeChange 19 | }) => { 20 | const theme = useTheme(); 21 | const styles = playgroundStyles(theme); 22 | const { t } = useTranslation(); 23 | 24 | const isClearDisabled = selectedModels.length === 0 || Object.values(conversations).every(conv => conv.length === 0); 25 | 26 | return ( 27 | <> 28 | 29 | 30 | 31 | 32 | 33 | 34 | {t('playground.outputMode')} 35 | 45 | 46 | 47 | 48 | 58 | 59 | 60 | 61 | 62 | 63 | ); 64 | }; 65 | 66 | export default PlaygroundHeader; 67 | -------------------------------------------------------------------------------- /components/text-split/ChunkDeleteDialog.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Dialog, DialogTitle, DialogContent, DialogContentText, DialogActions, Button } from '@mui/material'; 4 | import { useTranslation } from 'react-i18next'; 5 | 6 | export default function ChunkDeleteDialog({ open, onClose, onConfirm }) { 7 | const { t } = useTranslation(); 8 | return ( 9 | 15 | {t('common.confirmDelete')}? 16 | 17 | {t('common.confirmDelete')}? 18 | 19 | 20 | 21 | 24 | 25 | 26 | ); 27 | } 28 | -------------------------------------------------------------------------------- /components/text-split/ChunkListHeader.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Box, Typography, Checkbox, Button, Select, MenuItem } from '@mui/material'; 4 | import QuizIcon from '@mui/icons-material/Quiz'; 5 | import { useTranslation } from 'react-i18next'; 6 | 7 | export default function ChunkListHeader({ 8 | totalChunks, 9 | selectedChunks, 10 | onSelectAll, 11 | onBatchGenerateQuestions, 12 | questionFilter, 13 | onQuestionFilterChange 14 | }) { 15 | const { t } = useTranslation(); 16 | return ( 17 | 18 | 19 | 0 && selectedChunks.length < totalChunks} 22 | onChange={onSelectAll} 23 | /> 24 | 25 | {t('textSplit.selectedCount', { count: selectedChunks.length })} , 26 | {t('textSplit.totalCount', { count: totalChunks })} 27 | 28 | 29 | 30 | 31 | 36 | 37 | 46 | 47 | 48 | ); 49 | } 50 | -------------------------------------------------------------------------------- /components/text-split/ChunkViewDialog.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Box, Button, Dialog, DialogTitle, DialogContent, DialogActions, CircularProgress } from '@mui/material'; 4 | import ReactMarkdown from 'react-markdown'; 5 | import { useTranslation } from 'react-i18next'; 6 | 7 | export default function ChunkViewDialog({ open, chunk, onClose }) { 8 | const { t } = useTranslation(); 9 | return ( 10 | 11 | {t('textSplit.chunkDetails', { chunkId: chunk?.id })} 12 | 13 | {chunk ? ( 14 | 15 | {chunk.content} 16 | 17 | ) : ( 18 | 19 | 20 | 21 | )} 22 | 23 | 24 | 25 | 26 | 27 | ); 28 | } 29 | -------------------------------------------------------------------------------- /components/text-split/MarkdownViewDialog.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Box, Button, Dialog, DialogTitle, DialogContent, DialogActions, CircularProgress } from '@mui/material'; 4 | import ReactMarkdown from 'react-markdown'; 5 | import { useTranslation } from 'react-i18next'; 6 | 7 | export default function MarkdownViewDialog({ open, text, onClose }) { 8 | const { t } = useTranslation(); 9 | return ( 10 | 11 | {text ? text.fileName : ""} 12 | 13 | {text ? ( 14 | 15 | {text.content} 16 | 17 | ) : ( 18 | 19 | 20 | 21 | )} 22 | 23 | 24 | 25 | 26 | 27 | ); 28 | } 29 | -------------------------------------------------------------------------------- /components/text-split/components/DeleteConfirmDialog.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Dialog, DialogTitle, DialogContent, DialogContentText, DialogActions, Button } from '@mui/material'; 4 | import { useTranslation } from 'react-i18next'; 5 | 6 | export default function DeleteConfirmDialog({ open, fileName, onClose, onConfirm }) { 7 | const { t } = useTranslation(); 8 | return ( 9 | 15 | 16 | {t('common.confirmDelete')}「{fileName}」? 17 | 18 | 19 | {t('common.confirmDeleteDescription')} 20 | 21 | 22 | 25 | 28 | 29 | 30 | ); 31 | } 32 | -------------------------------------------------------------------------------- /components/text-split/components/DirectoryView.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Box, List, ListItem, ListItemIcon, ListItemText, Collapse, IconButton } from '@mui/material'; 4 | import FolderIcon from '@mui/icons-material/Folder'; 5 | import ArticleIcon from '@mui/icons-material/Article'; 6 | import ExpandLess from '@mui/icons-material/ExpandLess'; 7 | import ExpandMore from '@mui/icons-material/ExpandMore'; 8 | import { useTheme } from '@mui/material/styles'; 9 | 10 | /** 11 | * 目录结构组件 12 | * @param {Object} props 13 | * @param {Array} props.items - 目录项数组 14 | * @param {Object} props.expandedItems - 展开状态对象 15 | * @param {Function} props.onToggleItem - 展开/折叠回调 16 | * @param {number} props.level - 当前层级 17 | * @param {string} props.parentId - 父级ID 18 | */ 19 | export default function DirectoryView({ items, expandedItems, onToggleItem, level = 0, parentId = '' }) { 20 | const theme = useTheme(); 21 | 22 | if (!items || items.length === 0) return null; 23 | 24 | return ( 25 | 0 ? 2 : 0 }}> 26 | {items.map((item, index) => { 27 | const itemId = `${parentId}-${index}`; 28 | const hasChildren = item.children && item.children.length > 0; 29 | const isExpanded = expandedItems[itemId] || false; 30 | 31 | return ( 32 | 33 | 0 ? `1px solid ${theme.palette.divider}` : 'none', 37 | ml: level > 0 ? 1 : 0 38 | }} 39 | > 40 | 41 | {hasChildren ? : } 42 | 43 | 50 | {hasChildren && ( 51 | onToggleItem(itemId)}> 52 | {isExpanded ? : } 53 | 54 | )} 55 | 56 | 57 | {hasChildren && ( 58 | 59 | 66 | 67 | )} 68 | 69 | ); 70 | })} 71 | 72 | ); 73 | } 74 | -------------------------------------------------------------------------------- /components/text-split/components/DomainTreeView.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Box } from '@mui/material'; 4 | import { TreeView, TreeItem } from '@mui/lab'; 5 | import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; 6 | import ChevronRightIcon from '@mui/icons-material/ChevronRight'; 7 | 8 | /** 9 | * 领域知识树组件 10 | * @param {Object} props 11 | * @param {Array} props.nodes - 树节点数组 12 | */ 13 | export default function DomainTreeView({ nodes = [] }) { 14 | if (!nodes || nodes.length === 0) return null; 15 | 16 | const renderTreeItems = nodes => { 17 | return nodes.map((node, index) => ( 18 | 19 | {node.children && node.children.length > 0 && renderTreeItems(node.children)} 20 | 21 | )); 22 | }; 23 | 24 | return ( 25 | } 27 | defaultExpandIcon={} 28 | sx={{ flexGrow: 1, overflowY: 'auto' }} 29 | > 30 | {renderTreeItems(nodes)} 31 | 32 | ); 33 | } 34 | -------------------------------------------------------------------------------- /components/text-split/components/TabPanel.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Box } from '@mui/material'; 4 | 5 | /** 6 | * 标签页面板组件 7 | * @param {Object} props 8 | * @param {number} props.value - 当前激活的标签索引 9 | * @param {number} props.index - 当前面板对应的索引 10 | * @param {ReactNode} props.children - 子组件 11 | */ 12 | export default function TabPanel({ value, index, children }) { 13 | return ( 14 | 23 | ); 24 | } 25 | -------------------------------------------------------------------------------- /components/text-split/components/UploadArea.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Box, Button, Typography, List, ListItem, ListItemText, Divider, CircularProgress } from '@mui/material'; 4 | import UploadFileIcon from '@mui/icons-material/UploadFile'; 5 | import DeleteIcon from '@mui/icons-material/Delete'; 6 | import { alpha } from '@mui/material/styles'; 7 | import { useTranslation } from 'react-i18next'; 8 | 9 | export default function UploadArea({ theme, files, uploading, uploadedFiles, onFileSelect, onRemoveFile, onUpload }) { 10 | const { t } = useTranslation(); 11 | 12 | return ( 13 | 31 | 32 | {t('textSplit.uploadNewDocument')} 33 | 34 | 35 | 52 | 53 | 54 | {uploadedFiles.length > 0 ? t('textSplit.mutilFileMessage') : t('textSplit.supportedFormats')} 55 | 56 | 57 | {files.length > 0 && ( 58 | 59 | 60 | {t('textSplit.selectedFiles', { count: files.length })} 61 | 62 | 63 | 64 | {files.map((file, index) => ( 65 | 66 | } 72 | onClick={() => onRemoveFile(index)} 73 | disabled={uploading} 74 | > 75 | {t('common.delete')} 76 | 77 | } 78 | > 79 | 80 | 81 | {index < files.length - 1 && } 82 | 83 | ))} 84 | 85 | 86 | 87 | 90 | 91 | 92 | )} 93 | 94 | ); 95 | } 96 | -------------------------------------------------------------------------------- /constant/model.js: -------------------------------------------------------------------------------- 1 | export const MODEL_PROVIDERS = [ 2 | { 3 | id: 'ollama', 4 | name: 'Ollama', 5 | defaultEndpoint: 'http://127.0.0.1:11434/api', 6 | defaultModels: [] 7 | }, 8 | { 9 | id: 'openai', 10 | name: 'OpenAI', 11 | defaultEndpoint: 'https://api.openai.com/v1/', 12 | defaultModels: ['gpt-4o', 'gpt-4o-mini', 'o1-mini'] 13 | }, 14 | { 15 | id: 'siliconflow', 16 | name: '硅基流动', 17 | defaultEndpoint: 'https://api.siliconflow.cn/v1/', 18 | defaultModels: [ 19 | 'deepseek-ai/DeepSeek-R1', 20 | 'deepseek-ai/DeepSeek-V3', 21 | 'Qwen2.5-7B-Instruct', 22 | 'meta-llama/Llama-3.3-70B-Instruct' 23 | ] 24 | }, 25 | { 26 | id: 'deepseek', 27 | name: 'DeepSeek', 28 | defaultEndpoint: 'https://api.deepseek.com/v1/', 29 | defaultModels: ['deepseek-chat', 'deepseek-reasoner'] 30 | }, 31 | { 32 | id: '302ai', 33 | name: '302.AI', 34 | defaultEndpoint: 'https://api.302.ai/v1/', 35 | defaultModels: ['Doubao-pro-128k', 'deepseek-r1', 'kimi-latest', 'qwen-max'] 36 | }, 37 | { 38 | id: 'zhipu', 39 | name: '智谱AI', 40 | defaultEndpoint: 'https://open.bigmodel.cn/api/paas/v4/', 41 | defaultModels: ['glm-4-flash', 'glm-4-flashx', 'glm-4-plus', 'glm-4-long'] 42 | }, 43 | { 44 | id: 'huoshan', 45 | name: '火山引擎', 46 | defaultEndpoint: 'https://ark.cn-beijing.volces.com/api/v3/', 47 | defaultModels: [] 48 | }, 49 | { 50 | id: 'groq', 51 | name: 'Groq', 52 | defaultEndpoint: 'https://api.groq.com/openai', 53 | defaultModels: ['Gemma 7B', 'LLaMA3 8B', 'LLaMA3 70B'] 54 | }, 55 | { 56 | id: 'grok', 57 | name: 'Grok', 58 | defaultEndpoint: 'https://api.x.ai', 59 | defaultModels: ['Grok Beta'] 60 | }, 61 | { 62 | id: 'OpenRouter', 63 | name: 'OpenRouter', 64 | defaultEndpoint: 'https://openrouter.ai/api/v1/', 65 | defaultModels: [ 66 | 'google/gemma-2-9b-it:free', 67 | 'meta-llama/llama-3-8b-instruct:free', 68 | 'microsoft/phi-3-mini-128k-instruct:free' 69 | ] 70 | }, 71 | { 72 | id: 'alibailian', 73 | name: '阿里云百炼', 74 | defaultEndpoint: 'https://dashscope.aliyuncs.com/compatible-mode/v1', 75 | defaultModels: ['qwen-max-latest', 'qwen-max-2025-01-25'] 76 | } 77 | ]; 78 | 79 | export const DEFAULT_MODEL_SETTINGS = { 80 | temperature: 0.7, 81 | maxTokens: 8192 82 | }; 83 | -------------------------------------------------------------------------------- /constant/setting.js: -------------------------------------------------------------------------------- 1 | // 默认项目任务配置 2 | export const DEFAULT_SETTINGS = { 3 | textSplitMinLength: 1500, 4 | textSplitMaxLength: 2000, 5 | questionGenerationLength: 240, 6 | huggingfaceToken: '', 7 | concurrencyLimit: 5, 8 | visionConcurrencyLimit: 5 9 | }; 10 | -------------------------------------------------------------------------------- /electron-builder.yml: -------------------------------------------------------------------------------- 1 | appId: com.easydataset.app 2 | productName: Easy Dataset 3 | copyright: Copyright 2025 Easy Dataset 4 | description: A application for creating large model fine-tuning datasets 5 | 6 | directories: 7 | output: dist 8 | buildResources: public 9 | 10 | files: 11 | - .next/**/* 12 | - '!.next/cache/**/*' 13 | - public/**/* 14 | - locales/**/* 15 | - package.json 16 | - electron/**/* 17 | - node_modules/**/* 18 | - '!node_modules/.cache/**/*' 19 | - '!node_modules/.bin/**/*' 20 | - '!node_modules/.vite/**/*' 21 | - '!**/*.{md,d.ts,map}' 22 | - '!**/node_modules/*/{CHANGELOG.md,README.md,README,readme.md,readme}' 23 | 24 | extraResources: 25 | - from: locales 26 | to: locales 27 | 28 | asar: false 29 | compression: maximum 30 | 31 | afterbuild: ['rm -rf node_modules/.cache', 'rm -rf node_modules/.vite'] 32 | 33 | mac: 34 | category: public.app-category.developer-tools 35 | target: 36 | - target: dmg 37 | arch: [arm64, x64] # 同时支持 M 芯片和 Intel 芯片 38 | icon: public/imgs/logo.icns 39 | hardenedRuntime: true 40 | gatekeeperAssess: false 41 | darkModeSupport: true 42 | electronLanguages: ['zh_CN', 'en'] 43 | identity: 'Developer ID Application: Conard LI (JRL9ZUXR95)' 44 | entitlements: 'electron/entitlements.mac.plist' 45 | entitlementsInherit: 'electron/entitlements.mac.plist' 46 | 47 | # 添加 Windows 配置 48 | win: 49 | icon: public/imgs/logo.ico 50 | target: 51 | - target: nsis 52 | arch: [x64] 53 | 54 | # 添加 Linux 配置 55 | linux: 56 | icon: public/imgs/logo.png 57 | target: [AppImage, deb] 58 | category: Development 59 | maintainer: '1009903985@qq.com' 60 | 61 | dmg: 62 | contents: 63 | - x: 130 64 | y: 220 65 | - x: 410 66 | y: 220 67 | type: link 68 | path: /Applications 69 | 70 | publish: 71 | provider: github 72 | owner: ConardLi 73 | repo: easy-dataset 74 | releaseType: release 75 | private: false 76 | publishAutoUpdate: true 77 | -------------------------------------------------------------------------------- /electron/entitlements.mac.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | com.apple.security.cs.allow-jit 6 | 7 | com.apple.security.cs.allow-unsigned-executable-memory 8 | 9 | com.apple.security.cs.allow-dyld-environment-variables 10 | 11 | com.apple.security.network.client 12 | 13 | com.apple.security.files.user-selected.read-write 14 | 15 | 16 | -------------------------------------------------------------------------------- /electron/loading.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Easy Dataset Loading... 7 | 90 | 91 | 92 |
93 | 99 |

Easy Dataset

100 |

101 | The first startup may take a bit longer to load. Please be patient. ... 102 |

103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 | 111 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /electron/preload.js: -------------------------------------------------------------------------------- 1 | const { contextBridge, ipcRenderer } = require('electron'); 2 | 3 | // 在渲染进程中暴露安全的 API 4 | contextBridge.exposeInMainWorld('electron', { 5 | // 获取应用版本 6 | getAppVersion: () => ipcRenderer.invoke('get-app-version'), 7 | 8 | // 获取当前语言 9 | getLanguage: () => { 10 | // 尝试从本地存储获取语言设置 11 | const storedLang = localStorage.getItem('i18nextLng'); 12 | // 如果存在则返回,否则返回系统语言或默认为中文 13 | return storedLang || navigator.language.startsWith('zh') ? 'zh' : 'en'; 14 | }, 15 | 16 | // 获取用户数据目录 17 | getUserDataPath: () => { 18 | try { 19 | return ipcRenderer.sendSync('get-user-data-path'); 20 | } catch (error) { 21 | console.error('获取用户数据目录失败:', error); 22 | return null; 23 | } 24 | }, 25 | 26 | // 更新相关 API 27 | updater: { 28 | // 检查更新 29 | checkForUpdates: () => ipcRenderer.invoke('check-update'), 30 | 31 | // 下载更新 32 | downloadUpdate: () => ipcRenderer.invoke('download-update'), 33 | 34 | // 安装更新 35 | installUpdate: () => ipcRenderer.invoke('install-update'), 36 | 37 | // 监听更新事件 38 | onUpdateAvailable: (callback) => { 39 | const handler = (_, info) => callback(info); 40 | ipcRenderer.on('update-available', handler); 41 | return () => ipcRenderer.removeListener('update-available', handler); 42 | }, 43 | 44 | onUpdateNotAvailable: (callback) => { 45 | const handler = () => callback(); 46 | ipcRenderer.on('update-not-available', handler); 47 | return () => ipcRenderer.removeListener('update-not-available', handler); 48 | }, 49 | 50 | onUpdateError: (callback) => { 51 | const handler = (_, error) => callback(error); 52 | ipcRenderer.on('update-error', handler); 53 | return () => ipcRenderer.removeListener('update-error', handler); 54 | }, 55 | 56 | onDownloadProgress: (callback) => { 57 | const handler = (_, progress) => callback(progress); 58 | ipcRenderer.on('download-progress', handler); 59 | return () => ipcRenderer.removeListener('download-progress', handler); 60 | }, 61 | 62 | onUpdateDownloaded: (callback) => { 63 | const handler = (_, info) => callback(info); 64 | ipcRenderer.on('update-downloaded', handler); 65 | return () => ipcRenderer.removeListener('update-downloaded', handler); 66 | } 67 | } 68 | }); 69 | 70 | // 通知渲染进程 preload 脚本已加载完成 71 | window.addEventListener('DOMContentLoaded', () => { 72 | console.log('Electron preload script loaded'); 73 | }); 74 | -------------------------------------------------------------------------------- /hooks/useSnackbar.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useState, useCallback } from 'react'; 4 | import { Snackbar, Alert } from '@mui/material'; 5 | 6 | export const useSnackbar = () => { 7 | const [open, setOpen] = useState(false); 8 | const [message, setMessage] = useState(''); 9 | const [severity, setSeverity] = useState('info'); 10 | 11 | const showMessage = useCallback((newMessage, newSeverity = 'info') => { 12 | setMessage(newMessage); 13 | setSeverity(newSeverity); 14 | setOpen(true); 15 | }, []); 16 | 17 | const showSuccess = useCallback( 18 | message => { 19 | showMessage(message, 'success'); 20 | }, 21 | [showMessage] 22 | ); 23 | 24 | const showError = useCallback( 25 | message => { 26 | showMessage(message, 'error'); 27 | }, 28 | [showMessage] 29 | ); 30 | 31 | const showInfo = useCallback( 32 | message => { 33 | showMessage(message, 'info'); 34 | }, 35 | [showMessage] 36 | ); 37 | 38 | const showWarning = useCallback( 39 | message => { 40 | showMessage(message, 'warning'); 41 | }, 42 | [showMessage] 43 | ); 44 | 45 | const handleClose = useCallback(() => { 46 | setOpen(false); 47 | }, []); 48 | 49 | const SnackbarComponent = useCallback( 50 | () => ( 51 | 57 | 58 | {message} 59 | 60 | 61 | ), 62 | [open, message, severity, handleClose] 63 | ); 64 | 65 | return { 66 | showMessage, 67 | showSuccess, 68 | showError, 69 | showInfo, 70 | showWarning, 71 | SnackbarComponent 72 | }; 73 | }; 74 | -------------------------------------------------------------------------------- /hooks/useTaskSettings.js: -------------------------------------------------------------------------------- 1 | import { useState, useEffect } from 'react'; 2 | import { useTranslation } from 'react-i18next'; 3 | import { DEFAULT_SETTINGS } from '@/constant/setting'; 4 | 5 | export default function useTaskSettings(projectId) { 6 | const { t } = useTranslation(); 7 | const [taskSettings, setTaskSettings] = useState({ 8 | ...DEFAULT_SETTINGS 9 | }); 10 | const [loading, setLoading] = useState(true); 11 | const [error, setError] = useState(null); 12 | const [success, setSuccess] = useState(false); 13 | 14 | useEffect(() => { 15 | async function fetchTaskSettings() { 16 | try { 17 | setLoading(true); 18 | const response = await fetch(`/api/projects/${projectId}/tasks`); 19 | if (!response.ok) { 20 | throw new Error(t('settings.fetchTasksFailed')); 21 | } 22 | 23 | const data = await response.json(); 24 | 25 | // 如果没有配置,使用默认值 26 | if (Object.keys(data).length === 0) { 27 | setTaskSettings({ 28 | ...DEFAULT_SETTINGS 29 | }); 30 | } else { 31 | setTaskSettings({ 32 | ...DEFAULT_SETTINGS, 33 | ...data 34 | }); 35 | } 36 | } catch (error) { 37 | console.error('获取任务配置出错:', error); 38 | setError(error.message); 39 | } finally { 40 | setLoading(false); 41 | } 42 | } 43 | 44 | fetchTaskSettings(); 45 | }, [projectId, t]); 46 | 47 | return { 48 | taskSettings, 49 | setTaskSettings, 50 | loading, 51 | error, 52 | success, 53 | setSuccess 54 | }; 55 | } 56 | -------------------------------------------------------------------------------- /jsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "baseUrl": ".", 4 | "paths": { 5 | "@/*": ["./*"] 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /lib/db/base.js: -------------------------------------------------------------------------------- 1 | 'use server'; 2 | 3 | import fs from 'fs'; 4 | import path from 'path'; 5 | import os from 'os'; 6 | 7 | // 获取适合的数据存储目录 8 | function getDbDirectory() { 9 | // 检查是否在浏览器环境中运行 10 | if (typeof window !== 'undefined') { 11 | // 检查是否在 Electron 渲染进程中运行 12 | if (window.electron && window.electron.getUserDataPath) { 13 | // 使用 preload 脚本中暴露的 API 获取用户数据目录 14 | const userDataPath = window.electron.getUserDataPath(); 15 | if (userDataPath) { 16 | return path.join(userDataPath, 'local-db'); 17 | } 18 | } 19 | 20 | // 如果不是 Electron 或获取失败,则使用开发环境的路径 21 | return path.join(process.cwd(), 'local-db'); 22 | } else if (process.versions && process.versions.electron) { 23 | // 在 Electron 主进程中运行 24 | try { 25 | const { app } = require('electron'); 26 | return path.join(app.getPath('userData'), 'local-db'); 27 | } catch (error) { 28 | console.error('Failed to get user data directory:', error); 29 | // 降级处理,使用临时目录 30 | return path.join(os.homedir(), '.easy-dataset-db'); 31 | } 32 | } else { 33 | // 在普通 Node.js 环境中运行(开发模式) 34 | return path.join(process.cwd(), 'local-db'); 35 | } 36 | } 37 | 38 | // 项目根目录 39 | const PROJECT_ROOT = getDbDirectory(); 40 | 41 | // 获取项目根目录 42 | export async function getProjectRoot() { 43 | return PROJECT_ROOT; 44 | } 45 | 46 | // 确保数据库目录存在 47 | export async function ensureDbExists() { 48 | try { 49 | await fs.promises.access(PROJECT_ROOT); 50 | } catch (error) { 51 | await fs.promises.mkdir(PROJECT_ROOT, { recursive: true }); 52 | } 53 | } 54 | 55 | // 读取JSON文件 56 | export async function readJsonFile(filePath) { 57 | try { 58 | await fs.promises.access(filePath); 59 | const data = await fs.promises.readFile(filePath, 'utf8'); 60 | return JSON.parse(data); 61 | } catch (error) { 62 | return null; 63 | } 64 | } 65 | 66 | // 写入JSON文件 67 | export async function writeJsonFile(filePath, data) { 68 | // 使用临时文件策略,避免写入中断导致文件损坏 69 | const tempFilePath = `${filePath}_${Date.now()}.tmp`; 70 | try { 71 | // 序列化为JSON字符串 72 | const jsonString = JSON.stringify(data, null, 2); 73 | // 先写入临时文件 74 | await fs.promises.writeFile(tempFilePath, jsonString, 'utf8'); 75 | 76 | // 从临时文件读取内容并验证 77 | try { 78 | const writtenContent = await fs.promises.readFile(tempFilePath, 'utf8'); 79 | JSON.parse(writtenContent); // 验证JSON是否有效 80 | // 验证通过后,原子性地重命名文件替换原文件 81 | await fs.promises.rename(tempFilePath, filePath); 82 | } catch (validationError) { 83 | // 验证失败,删除临时文件并抛出错误 84 | await fs.promises.unlink(tempFilePath).catch(() => {}); 85 | throw new Error(`写入的JSON文件内容无效: ${validationError.message}`); 86 | } 87 | return data; 88 | } catch (error) { 89 | console.error(`写入JSON文件 ${filePath} 失败:`, error); 90 | throw error; 91 | } finally { 92 | // 确保临时文件被删除 93 | await fs.promises.unlink(tempFilePath).catch(() => {}); 94 | } 95 | } 96 | 97 | // 确保目录存在 98 | export async function ensureDir(dirPath) { 99 | try { 100 | await fs.promises.access(dirPath); 101 | } catch (error) { 102 | await fs.promises.mkdir(dirPath, { recursive: true }); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /lib/db/datasets.js: -------------------------------------------------------------------------------- 1 | 'use server'; 2 | 3 | import path from 'path'; 4 | import { getProjectRoot, readJsonFile, writeJsonFile } from './base'; 5 | 6 | // 获取问题列表 7 | export async function getQuestions(projectId) { 8 | const projectRoot = await getProjectRoot(); 9 | const projectPath = path.join(projectRoot, projectId); 10 | const questionsPath = path.join(projectPath, 'questions.json'); 11 | await readJsonFile(questionsPath); 12 | } 13 | 14 | // 保存问题列表 15 | export async function saveQuestions(projectId, questions) { 16 | const projectRoot = await getProjectRoot(); 17 | const projectPath = path.join(projectRoot, projectId); 18 | const questionsPath = path.join(projectPath, 'questions.json'); 19 | 20 | await writeJsonFile(questionsPath, questions); 21 | return questions; 22 | } 23 | 24 | // 获取标签树 25 | export async function getTags(projectId) { 26 | const projectRoot = await getProjectRoot(); 27 | const projectPath = path.join(projectRoot, projectId); 28 | const tagsPath = path.join(projectPath, 'tags.json'); 29 | 30 | try { 31 | return (await readJsonFile(tagsPath)) || []; 32 | } catch (error) { 33 | return []; 34 | } 35 | } 36 | 37 | // 保存标签树 38 | export async function saveTags(projectId, tags) { 39 | const projectRoot = await getProjectRoot(); 40 | const projectPath = path.join(projectRoot, projectId); 41 | const tagsPath = path.join(projectPath, 'tags.json'); 42 | 43 | await writeJsonFile(tagsPath, tags); 44 | return tags; 45 | } 46 | 47 | // 获取数据集列表 48 | export async function getDatasets(projectId) { 49 | const projectRoot = await getProjectRoot(); 50 | const projectPath = path.join(projectRoot, projectId); 51 | const datasetsPath = path.join(projectPath, 'datasets.json'); 52 | return await readJsonFile(datasetsPath); 53 | } 54 | 55 | export async function getDataset(projectId, datasetId) { 56 | const datasets = await getDatasets(projectId); 57 | return datasets.find(dataset => dataset.id === datasetId); 58 | } 59 | 60 | export async function updateDataset(projectId, datasetId, updatedDataset) { 61 | const datasets = await getDatasets(projectId); 62 | const index = datasets.findIndex(dataset => dataset.id === datasetId); 63 | if (index !== -1) { 64 | datasets[index] = { ...datasets[index], ...updatedDataset }; 65 | await saveDatasets(projectId, datasets); 66 | } 67 | } 68 | 69 | // 保存数据集列表 70 | export async function saveDatasets(projectId, datasets) { 71 | const projectRoot = await getProjectRoot(); 72 | const projectPath = path.join(projectRoot, projectId); 73 | const datasetsPath = path.join(projectPath, 'datasets.json'); 74 | await writeJsonFile(datasetsPath, datasets); 75 | return datasets; 76 | } 77 | -------------------------------------------------------------------------------- /lib/db/index.js: -------------------------------------------------------------------------------- 1 | export * from './base'; 2 | export * from './projects'; 3 | export * from './texts'; 4 | export * from './datasets'; 5 | -------------------------------------------------------------------------------- /lib/db/projects.js: -------------------------------------------------------------------------------- 1 | 'use server'; 2 | 3 | import fs from 'fs'; 4 | import path from 'path'; 5 | import { getProjectRoot, ensureDbExists, readJsonFile, writeJsonFile } from './base'; 6 | import { DEFAULT_SETTINGS } from '@/constant/setting'; 7 | 8 | // 创建新项目 9 | export async function createProject(projectData) { 10 | await ensureDbExists(); 11 | 12 | const projectId = Date.now().toString(); 13 | const projectRoot = await getProjectRoot(); 14 | const projectDir = path.join(projectRoot, projectId); 15 | 16 | // 创建项目目录 17 | await fs.promises.mkdir(projectDir, { recursive: true }); 18 | 19 | // 创建子目录 20 | await fs.promises.mkdir(path.join(projectDir, 'files'), { recursive: true }); // 原始文件 21 | await fs.promises.mkdir(path.join(projectDir, 'chunks'), { recursive: true }); // 分割后的文本片段 22 | 23 | // 创建项目配置文件 24 | const configPath = path.join(projectDir, 'config.json'); 25 | await writeJsonFile(configPath, projectData); 26 | 27 | // 创建空的问题列表文件 28 | const questionsPath = path.join(projectDir, 'questions.json'); 29 | await writeJsonFile(questionsPath, []); 30 | 31 | // 创建空的标签树文件 32 | const tagsPath = path.join(projectDir, 'tags.json'); 33 | await writeJsonFile(tagsPath, []); 34 | 35 | // 创建空的数据集结果文件 36 | const datasetsPath = path.join(projectDir, 'datasets.json'); 37 | await writeJsonFile(datasetsPath, []); 38 | 39 | if (projectData.modelConfig) { 40 | const modelConfigPath = path.join(projectDir, 'model-config.json'); 41 | await writeJsonFile(modelConfigPath, projectData.modelConfig); 42 | } 43 | 44 | return { id: projectId, ...projectData }; 45 | } 46 | 47 | // 获取所有项目 48 | export async function getProjects() { 49 | await ensureDbExists(); 50 | 51 | const projects = []; 52 | 53 | // 读取所有项目目录 54 | const projectRoot = await getProjectRoot(); 55 | const items = await fs.promises.readdir(projectRoot); 56 | 57 | for (const item of items) { 58 | const projectPath = path.join(projectRoot, item); 59 | const stat = await fs.promises.stat(projectPath); 60 | 61 | if (stat.isDirectory()) { 62 | const configPath = path.join(projectPath, 'config.json'); 63 | const configData = await readJsonFile(configPath); 64 | 65 | if (configData) { 66 | projects.push({ 67 | id: item, 68 | ...configData 69 | }); 70 | } 71 | } 72 | } 73 | 74 | return projects; 75 | } 76 | 77 | // 获取项目详情 78 | export async function getProject(projectId) { 79 | const projectRoot = await getProjectRoot(); 80 | const projectPath = path.join(projectRoot, projectId); 81 | const configPath = path.join(projectPath, 'config.json'); 82 | 83 | const configData = await readJsonFile(configPath); 84 | if (!configData) { 85 | return null; 86 | } 87 | 88 | return { 89 | id: projectId, 90 | ...configData 91 | }; 92 | } 93 | 94 | export async function getProjectModelConfig(projectId) { 95 | const projectRoot = await getProjectRoot(); 96 | const projectPath = path.join(projectRoot, projectId); 97 | const modelConfigPath = path.join(projectPath, 'model-config.json'); 98 | const modelConfigData = await readJsonFile(modelConfigPath); 99 | return modelConfigData; 100 | } 101 | 102 | // 更新项目配置 103 | export async function updateProject(projectId, projectData) { 104 | const projectRoot = await getProjectRoot(); 105 | const projectPath = path.join(projectRoot, projectId); 106 | const configPath = path.join(projectPath, 'config.json'); 107 | 108 | await writeJsonFile(configPath, projectData); 109 | return { 110 | id: projectId, 111 | ...projectData 112 | }; 113 | } 114 | 115 | // 删除项目 116 | export async function deleteProject(projectId) { 117 | const projectRoot = await getProjectRoot(); 118 | const projectPath = path.join(projectRoot, projectId); 119 | 120 | try { 121 | await fs.promises.rm(projectPath, { recursive: true }); 122 | return true; 123 | } catch (error) { 124 | return false; 125 | } 126 | } 127 | 128 | // 获取任务配置 129 | export async function getTaskConfig(projectId) { 130 | const projectRoot = await getProjectRoot(); 131 | const projectPath = path.join(projectRoot, projectId); 132 | const taskConfigPath = path.join(projectPath, 'task-config.json'); 133 | const taskData = await readJsonFile(taskConfigPath); 134 | if (!taskData) { 135 | return DEFAULT_SETTINGS; 136 | } 137 | return taskData; 138 | } 139 | -------------------------------------------------------------------------------- /lib/db/tags.js: -------------------------------------------------------------------------------- 1 | 'use server'; 2 | 3 | import path from 'path'; 4 | import { getProjectRoot, readJsonFile, writeJsonFile } from './base'; 5 | 6 | // 获取标签树 7 | export async function getTags(projectId) { 8 | const projectRoot = await getProjectRoot(); 9 | const projectPath = path.join(projectRoot, projectId); 10 | const tagsPath = path.join(projectPath, 'tags.json'); 11 | 12 | try { 13 | return (await readJsonFile(tagsPath)) || []; 14 | } catch (error) { 15 | return []; 16 | } 17 | } 18 | 19 | // 保存整个标签树 20 | export async function saveTags(projectId, tags) { 21 | const projectRoot = await getProjectRoot(); 22 | const projectPath = path.join(projectRoot, projectId); 23 | const tagsPath = path.join(projectPath, 'tags.json'); 24 | await writeJsonFile(tagsPath, tags); 25 | return tags; 26 | } 27 | -------------------------------------------------------------------------------- /lib/i18n.js: -------------------------------------------------------------------------------- 1 | import i18n from 'i18next'; 2 | import { initReactI18next } from 'react-i18next'; 3 | import LanguageDetector from 'i18next-browser-languagedetector'; 4 | 5 | // 导入翻译文件 6 | import enTranslation from '../locales/en/translation.json'; 7 | import zhCNTranslation from '../locales/zh-CN/translation.json'; 8 | 9 | // 避免在服务器端重复初始化 10 | const isServer = typeof window === 'undefined'; 11 | const i18nInstance = i18n.createInstance(); 12 | 13 | // 仅在客户端初始化 i18next 14 | if (!isServer && !i18n.isInitialized) { 15 | i18nInstance 16 | // 检测用户语言 17 | .use(LanguageDetector) 18 | // 将 i18n 实例传递给 react-i18next 19 | .use(initReactI18next) 20 | // 初始化 21 | .init({ 22 | resources: { 23 | en: { 24 | translation: enTranslation 25 | }, 26 | 'zh-CN': { 27 | translation: zhCNTranslation 28 | } 29 | }, 30 | fallbackLng: 'en', 31 | debug: process.env.NODE_ENV === 'development', 32 | 33 | interpolation: { 34 | escapeValue: false // 不转义 HTML 35 | }, 36 | 37 | // 检测用户语言的选项 38 | detection: { 39 | order: ['localStorage', 'navigator'], 40 | lookupLocalStorage: 'i18nextLng', 41 | caches: ['localStorage'] 42 | } 43 | }); 44 | } 45 | 46 | export default i18nInstance; 47 | -------------------------------------------------------------------------------- /lib/llm/common/util.js: -------------------------------------------------------------------------------- 1 | // 从 LLM 输出中提取 JSON 2 | function extractJsonFromLLMOutput(output) { 3 | // 先尝试直接 parse 4 | try { 5 | const json = JSON.parse(output); 6 | return json; 7 | } catch {} 8 | const jsonStart = output.indexOf('```json'); 9 | const jsonEnd = output.lastIndexOf('```'); 10 | if (jsonStart !== -1 && jsonEnd !== -1) { 11 | const jsonString = output.substring(jsonStart + 7, jsonEnd); 12 | try { 13 | const json = JSON.parse(jsonString); 14 | return json; 15 | } catch (error) { 16 | console.error('解析 JSON 时出错:', { error, llmResponse: output }); 17 | } 18 | } else { 19 | console.error('模型未按标准格式输出:', output); 20 | return undefined; 21 | } 22 | } 23 | 24 | function extractThinkChain(text) { 25 | const startTags = ['', '']; 26 | const endTags = ['', '']; 27 | let startIndex = -1; 28 | let endIndex = -1; 29 | let usedStartTag = ''; 30 | let usedEndTag = ''; 31 | 32 | for (let i = 0; i < startTags.length; i++) { 33 | const currentStartIndex = text.indexOf(startTags[i]); 34 | if (currentStartIndex !== -1) { 35 | startIndex = currentStartIndex; 36 | usedStartTag = startTags[i]; 37 | usedEndTag = endTags[i]; 38 | break; 39 | } 40 | } 41 | 42 | if (startIndex === -1) { 43 | return ''; 44 | } 45 | 46 | endIndex = text.indexOf(usedEndTag, startIndex + usedStartTag.length); 47 | 48 | if (endIndex === -1) { 49 | return ''; 50 | } 51 | 52 | return text.slice(startIndex + usedStartTag.length, endIndex).trim(); 53 | } 54 | 55 | function extractAnswer(text) { 56 | const startTags = ['', '']; 57 | const endTags = ['', '']; 58 | for (let i = 0; i < startTags.length; i++) { 59 | const start = startTags[i]; 60 | const end = endTags[i]; 61 | if (text.includes(start) && text.includes(end)) { 62 | const partsBefore = text.split(start); 63 | const partsAfter = partsBefore[1].split(end); 64 | return (partsBefore[0].trim() + ' ' + partsAfter[1].trim()).trim(); 65 | } 66 | } 67 | return text; 68 | } 69 | 70 | module.exports = { 71 | extractJsonFromLLMOutput, 72 | extractThinkChain, 73 | extractAnswer 74 | }; 75 | -------------------------------------------------------------------------------- /lib/llm/core/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * LLM API 统一调用工具类 3 | * 支持多种模型提供商:OpenAI、Ollama、智谱AI等 4 | * 支持普通输出和流式输出 5 | */ 6 | import { DEFAULT_MODEL_SETTINGS } from '@/constant/model'; 7 | import { extractThinkChain, extractAnswer } from '@/lib/llm/common/util'; 8 | const OllamaClient = require('./providers/ollama'); // 导入 OllamaClient 9 | const OpenAIClient = require('./providers/openai'); // 导入 OpenAIClient 10 | const ZhiPuClient = require('./providers/zhipu'); // 导入 ZhiPuClient 11 | const OpenRouterClient = require('./providers/openrouter'); 12 | 13 | class LLMClient { 14 | /** 15 | * 创建 LLM 客户端实例 16 | * @param {Object} config - 配置信息 17 | * @param {string} config.provider - 提供商名称,如 'openai', 'ollama', 'zhipu' 等 18 | * @param {string} config.endpoint - API 端点,如 'https://api.openai.com/v1/' 19 | * @param {string} config.apiKey - API 密钥(如果需要) 20 | * @param {string} config.model - 模型名称,如 'gpt-3.5-turbo', 'llama2' 等 21 | * @param {number} config.temperature - 温度参数 22 | */ 23 | constructor(config = {}) { 24 | this.config = { 25 | provider: config.provider || 'openai', 26 | endpoint: this._handleEndpoint(config.provider, config.endpoint) || '', 27 | apiKey: config.apiKey || '', 28 | model: config.model || '', 29 | temperature: config.temperature || DEFAULT_MODEL_SETTINGS.temperature, 30 | maxTokens: config.maxTokens || DEFAULT_MODEL_SETTINGS.maxTokens 31 | }; 32 | this.client = this._createClient(this.config.provider, this.config); 33 | } 34 | 35 | /** 36 | * 兼容之前版本的用户配置 37 | */ 38 | _handleEndpoint(provider, endpoint) { 39 | if (provider.toLowerCase() === 'ollama') { 40 | if (endpoint.endsWith('v1/') || endpoint.endsWith('v1')) { 41 | return endpoint.replace('v1', 'api'); 42 | } 43 | } 44 | if (endpoint.includes('/chat/completions')) { 45 | return endpoint.replace('/chat/completions', ''); 46 | } 47 | return endpoint; 48 | } 49 | 50 | _createClient(provider, config) { 51 | const clientMap = { 52 | ollama: OllamaClient, 53 | openai: OpenAIClient, 54 | siliconflow: OpenAIClient, 55 | deepseek: OpenAIClient, 56 | zhipu: ZhiPuClient, 57 | openrouter: OpenRouterClient 58 | }; 59 | const ClientClass = clientMap[provider.toLowerCase()] || OpenAIClient; 60 | return new ClientClass(config); 61 | } 62 | 63 | async _callClientMethod(method, ...args) { 64 | try { 65 | return await this.client[method](...args); 66 | } catch (error) { 67 | console.error(`${this.config.provider} API 调用出错:`, error); 68 | throw error; 69 | } 70 | } 71 | /** 72 | * 生成对话响应 73 | * @param {string|Array} prompt - 用户输入的提示词或对话历史 74 | * @param {Object} options - 可选参数 75 | * @returns {Promise} 返回模型响应 76 | */ 77 | async chat(prompt, options = {}) { 78 | const messages = Array.isArray(prompt) ? prompt : [{ role: 'user', content: prompt }]; 79 | options = { 80 | ...options, 81 | ...this.config 82 | }; 83 | return this._callClientMethod('chat', messages, options); 84 | } 85 | 86 | /** 87 | * 流式生成对话响应 88 | * @param {string|Array} prompt - 用户输入的提示词或对话历史 89 | * @param {Object} options - 可选参数 90 | * @returns {ReadableStream} 返回可读流 91 | */ 92 | async chatStream(prompt, options = {}) { 93 | const messages = Array.isArray(prompt) ? prompt : [{ role: 'user', content: prompt }]; 94 | options = { 95 | ...options, 96 | ...this.config 97 | }; 98 | return this._callClientMethod('chatStream', messages, options); 99 | } 100 | 101 | // 获取模型响应 102 | async getResponse(prompt, options = {}) { 103 | const llmRes = await this.chat(prompt, options); 104 | return llmRes.text || llmRes.response.messages || ''; 105 | } 106 | 107 | async getResponseWithCOT(prompt, options = {}) { 108 | const llmRes = await this.chat(prompt, options); 109 | let answer = llmRes.text || llmRes.response.messages || ''; 110 | let cot = ''; 111 | if (answer.startsWith('') || answer.startsWith('')) { 112 | cot = extractThinkChain(answer); 113 | answer = extractAnswer(answer); 114 | } else { 115 | cot = llmRes.text || llmRes.response.messages || ''; 116 | } 117 | if (answer.startsWith('\n\n')) { 118 | answer = answer.slice(2); 119 | } 120 | if (cot.endsWith('\n\n')) { 121 | cot = cot.slice(0, -2); 122 | } 123 | return { answer, cot }; 124 | } 125 | } 126 | 127 | module.exports = LLMClient; 128 | -------------------------------------------------------------------------------- /lib/llm/core/providers/base.js: -------------------------------------------------------------------------------- 1 | import { generateText, streamText } from 'ai'; 2 | 3 | class BaseClient { 4 | constructor(config) { 5 | this.endpoint = config.endpoint || ''; 6 | this.apiKey = config.apiKey || ''; 7 | this.model = config.model || ''; 8 | this.modelConfig = { 9 | temperature: config.temperature || 0.7, 10 | top_p: config.top_p || 0.9, 11 | max_tokens: config.max_tokens || 8192 12 | }; 13 | } 14 | 15 | /** 16 | * chat(普通输出) 17 | */ 18 | async chat(messages, options) { 19 | const lastMessage = messages[messages.length - 1]; 20 | const prompt = lastMessage.content; 21 | const model = this._getModel(); 22 | return await generateText({ 23 | model, 24 | prompt, 25 | temperature: options.temperature || this.modelConfig.temperature, 26 | topP: options.top_p || this.modelConfig.top_p, 27 | maxTokens: options.max_tokens || this.modelConfig.max_tokens 28 | }); 29 | } 30 | 31 | /** 32 | * chat(流式输出) 33 | */ 34 | async chatStream(messages, options) { 35 | const lastMessage = messages[messages.length - 1]; 36 | const prompt = lastMessage.content; 37 | const model = this._getModel(); 38 | const stream = streamText({ 39 | model, 40 | prompt, 41 | temperature: options.temperature || this.modelConfig.temperature, 42 | topP: options.top_p || this.modelConfig.top_p, 43 | maxTokens: options.max_tokens || this.modelConfig.max_tokens 44 | }); 45 | console.log('stream', stream) 46 | return stream.toTextStreamResponse(); 47 | } 48 | 49 | // 抽象方法 50 | _getModel() { 51 | throw new Error('_getModel 子类方法必须实现'); 52 | } 53 | } 54 | 55 | module.exports = BaseClient; 56 | -------------------------------------------------------------------------------- /lib/llm/core/providers/ollama.js: -------------------------------------------------------------------------------- 1 | import { createOllama } from 'ollama-ai-provider'; 2 | import BaseClient from './base.js'; 3 | 4 | class OllamaClient extends BaseClient { 5 | constructor(config) { 6 | super(config); 7 | this.ollama = createOllama({ 8 | baseURL: this.endpoint, 9 | apiKey: this.apiKey 10 | }); 11 | } 12 | 13 | _getModel() { 14 | return this.ollama(this.model); 15 | } 16 | 17 | /** 18 | * 获取本地可用的模型列表 19 | * @returns {Promise} 返回模型列表 20 | */ 21 | async getModels() { 22 | try { 23 | const response = await fetch(this.endpoint + '/tags'); 24 | const data = await response.json(); 25 | // 处理响应,提取模型名称 26 | if (data && data.models) { 27 | return data.models.map(model => ({ 28 | name: model.name, 29 | modified_at: model.modified_at, 30 | size: model.size 31 | })); 32 | } 33 | return []; 34 | } catch (error) { 35 | console.error('Fetch error:', error); 36 | } 37 | } 38 | } 39 | 40 | module.exports = OllamaClient; 41 | -------------------------------------------------------------------------------- /lib/llm/core/providers/openai.js: -------------------------------------------------------------------------------- 1 | import { createOpenAI } from '@ai-sdk/openai'; 2 | import BaseClient from './base.js'; 3 | 4 | class OpenAIClient extends BaseClient { 5 | constructor(config) { 6 | super(config); 7 | this.openai = createOpenAI({ 8 | baseURL: this.endpoint, 9 | apiKey: this.apiKey 10 | }); 11 | } 12 | 13 | _getModel() { 14 | return this.openai(this.model); 15 | } 16 | } 17 | 18 | module.exports = OpenAIClient; 19 | -------------------------------------------------------------------------------- /lib/llm/core/providers/openrouter.js: -------------------------------------------------------------------------------- 1 | import { createOpenRouter } from '@openrouter/ai-sdk-provider'; 2 | 3 | import BaseClient from './base.js'; 4 | 5 | class OpenRouterClient extends BaseClient { 6 | constructor(config) { 7 | super(config); 8 | this.openrouter = createOpenRouter({ 9 | baseURL: this.endpoint, 10 | apiKey: this.apiKey 11 | }); 12 | } 13 | 14 | _getModel() { 15 | return this.openrouter(this.model); 16 | } 17 | } 18 | 19 | module.exports = OpenRouterClient; 20 | -------------------------------------------------------------------------------- /lib/llm/core/providers/zhipu.js: -------------------------------------------------------------------------------- 1 | import { createZhipu } from 'zhipu-ai-provider'; 2 | 3 | import BaseClient from './base.js'; 4 | 5 | class ZhiPuClient extends BaseClient { 6 | constructor(config) { 7 | super(config); 8 | this.zhipu = createZhipu({ 9 | baseURL: this.endpoint, 10 | apiKey: this.apiKey 11 | }); 12 | } 13 | 14 | _getModel() { 15 | return this.zhipu(this.model); 16 | } 17 | } 18 | 19 | module.exports = ZhiPuClient; 20 | -------------------------------------------------------------------------------- /lib/llm/prompts/addLabel.js: -------------------------------------------------------------------------------- 1 | module.exports = function getAddLabelPrompt(label, question) { 2 | return ` 3 | # Role: 标签匹配专家 4 | - Description: 你是一名标签匹配专家,擅长根据给定的标签数组和问题数组,将问题打上最合适的领域标签。你熟悉标签的层级结构,并能根据问题的内容优先匹配二级标签,若无法匹配则匹配一级标签,最后打上“其他”标签。 5 | 6 | ### Skill: 7 | 1. 熟悉标签层级结构,能够准确识别一级和二级标签。 8 | 2. 能够根据问题的内容,智能匹配最合适的标签。 9 | 3. 能够处理复杂的标签匹配逻辑,确保每个问题都能被打上正确的标签。 10 | 4. 能够按照规定的输出格式生成结果,确保不改变原有数据结构。 11 | 5. 能够处理大规模数据,确保高效准确的标签匹配。 12 | 13 | ## Goals: 14 | 1. 将问题数组中的每个问题打上最合适的领域标签。 15 | 2. 优先匹配二级标签,若无法匹配则匹配一级标签,最后打上“其他”标签。 16 | 3. 确保输出格式符合要求,不改变原有数据结构。 17 | 4. 提供高效的标签匹配算法,确保处理大规模数据时的性能。 18 | 5. 确保标签匹配的准确性和一致性。 19 | 20 | ## OutputFormat: 21 | 1. 输出结果必须是一个数组,每个元素包含 question、和 label 字段。 22 | 2. label 字段必须是根据标签数组匹配到的标签,若无法匹配则打上“其他”标签。 23 | 3. 不改变原有数据结构,只新增 label 字段。 24 | 25 | ## 标签数组: 26 | 27 | ${label} 28 | 29 | ## 问题数组: 30 | 31 | ${question} 32 | 33 | 34 | ## Workflow: 35 | 1. Take a deep breath and work on this problem step-by-step. 36 | 2. 首先,读取标签数组和问题数组。 37 | 3. 然后,遍历问题数组中的每个问题,根据问题的内容匹配标签数组中的标签。 38 | 4. 优先匹配二级标签,若无法匹配则匹配一级标签,最后打上“其他”标签。 39 | 5. 将匹配到的标签添加到问题对象中,确保不改变原有数据结构。 40 | 6. 最后,输出结果数组,确保格式符合要求。 41 | 42 | 43 | ## Constrains: 44 | 1. 只新增一个 label 字段,不改变其他任何格式和数据。 45 | 2. 必须按照规定格式返回结果。 46 | 3. 优先匹配二级标签,若无法匹配则匹配一级标签,最后打上“其他”标签。 47 | 4. 确保标签匹配的准确性和一致性。 48 | 5. 匹配的标签必须在标签数组中存在,如果不存在,就打上 其他 49 | 7. 输出结果必须是一个数组,每个元素包含 question、label 字段(只输出这个,不要输出任何其他无关内容) 50 | 51 | ## Output Example: 52 | \`\`\`json 53 | [ 54 | { 55 | "question": "XSS为什么会在2003年后引起人们更多关注并被OWASP列为威胁榜首?", 56 | "label": "2.2 XSS攻击" 57 | } 58 | ] 59 | \`\`\` 60 | 61 | `; 62 | }; 63 | -------------------------------------------------------------------------------- /lib/llm/prompts/addLabelEn.js: -------------------------------------------------------------------------------- 1 | module.exports = function getAddLabelPrompt(label, question) { 2 | return ` 3 | # Role: Label Matching Expert 4 | - Description: You are a label matching expert, proficient in assigning the most appropriate domain labels to questions based on the given label array and question array.You are familiar with the hierarchical structure of labels and can prioritize matching secondary labels according to the content of the questions.If a secondary label cannot be matched, you will match a primary label.Finally, if no match is found, you will assign the "Other" label. 5 | 6 | ### Skill: 7 | 1. Be familiar with the label hierarchical structure and accurately identify primary and secondary labels. 8 | 2. Be able to intelligently match the most appropriate label based on the content of the question. 9 | 3. Be able to handle complex label matching logic to ensure that each question is assigned the correct label. 10 | 4. Be able to generate results in the specified output format without changing the original data structure. 11 | 5. Be able to handle large - scale data to ensure efficient and accurate label matching. 12 | 13 | ## Goals: 14 | 1. Assign the most appropriate domain label to each question in the question array. 15 | 2. Prioritize matching secondary labels.If no secondary label can be matched, match a primary label.Finally, assign the "Other" label. 16 | 3. Ensure that the output format meets the requirements without changing the original data structure. 17 | 4. Provide an efficient label matching algorithm to ensure performance when processing large - scale data. 18 | 5. Ensure the accuracy and consistency of label matching. 19 | 20 | ## OutputFormat: 21 | 1. The output result must be an array, and each element contains the "question" and "label" fields. 22 | 2. The "label" field must be the label matched from the label array.If no match is found, assign the "Other" label. 23 | 3. Do not change the original data structure, only add the "label" field. 24 | 25 | ## Label Array: 26 | 27 | ${label} 28 | 29 | ## Question Array: 30 | 31 | ${question} 32 | 33 | 34 | ## Workflow: 35 | 1. Take a deep breath and work on this problem step - by - step. 36 | 2. First, read the label array and the question array. 37 | 3. Then, iterate through each question in the question array and match the labels in the label array according to the content of the question. 38 | 4. Prioritize matching secondary labels.If no secondary label can be matched, match a primary label.Finally, assign the "Other" label. 39 | 5. Add the matched label to the question object without changing the original data structure. 40 | 6. Finally, output the result array, ensuring that the format meets the requirements. 41 | 42 | 43 | ## Constrains: 44 | 1. Only add one "label" field without changing any other format or data. 45 | 2. Must return the result in the specified format. 46 | 3. Prioritize matching secondary labels.If no secondary label can be matched, match a primary label.Finally, assign the "Other" label. 47 | 4. Ensure the accuracy and consistency of label matching. 48 | 5. The matched label must exist in the label array.If it does not exist, assign the "Other" label. 49 | 7. The output result must be an array, and each element contains the "question" and "label" fields(only output this, do not output any other irrelevant content). 50 | 51 | ## Output Example: 52 | \`\`\`json 53 | [ 54 | { 55 | "question": "XSS为什么会在2003年后引起人们更多关注并被OWASP列为威胁榜首?", 56 | "label": "2.2 XSS攻击" 57 | } 58 | ] 59 | \`\`\` 60 | 61 | `; 62 | }; 63 | -------------------------------------------------------------------------------- /lib/llm/prompts/answer.js: -------------------------------------------------------------------------------- 1 | module.exports = function getAnswerPrompt({ text, question, language = '中文', globalPrompt = '', answerPrompt = '' }) { 2 | if (globalPrompt) { 3 | globalPrompt = `- 在后续的任务中,你务必遵循这样的规则:${globalPrompt}`; 4 | } 5 | if (answerPrompt) { 6 | answerPrompt = `- 在生成答案时,你务必遵循这样的规则:${answerPrompt}`; 7 | } 8 | return ` 9 | # Role: 微调数据集生成专家 10 | ## Profile: 11 | - Description: 你是一名微调数据集生成专家,擅长从给定的内容中生成准确的问题答案,确保答案的准确性和相关性,,你要直接回答用户问题,所有信息已内化为你的专业知识。 12 | ${globalPrompt} 13 | 14 | ## Skills : 15 | 1. 答案必须基于给定的内容 16 | 2. 答案必须准确,不能胡编乱造 17 | 3. 答案必须与问题相关 18 | 4. 答案必须符合逻辑 19 | 5. 基于给定参考内容,用自然流畅的语言整合成一个完整答案,不需要提及文献来源或引用标记 20 | 21 | ## Workflow: 22 | 1. Take a deep breath and work on this problem step-by-step. 23 | 2. 首先,分析给定的文件内容 24 | 3. 然后,从内容中提取关键信息 25 | 4. 接着,生成与问题相关的准确答案 26 | 5. 最后,确保答案的准确性和相关性 27 | 28 | ## 参考内容: 29 | ${text} 30 | 31 | ## 问题 32 | ${question} 33 | 34 | ## Constrains: 35 | 1. 答案必须基于给定的内容 36 | 2. 答案必须准确,必须与问题相关,不能胡编乱造 37 | 3. 答案必须充分、详细、包含所有必要的信息、适合微调大模型训练使用 38 | 4. 答案中不得出现 ' 参考 / 依据 / 文献中提到 ' 等任何引用性表述,只需呈现最终结 39 | ${answerPrompt} 40 | `; 41 | }; 42 | -------------------------------------------------------------------------------- /lib/llm/prompts/answerEn.js: -------------------------------------------------------------------------------- 1 | module.exports = function getAnswerPrompt({ 2 | text, 3 | question, 4 | language = 'English', 5 | globalPrompt = '', 6 | answerPrompt = '' 7 | }) { 8 | if (globalPrompt) { 9 | globalPrompt = `In subsequent tasks, you must strictly follow these rules: ${globalPrompt}`; 10 | } 11 | if (answerPrompt) { 12 | answerPrompt = `In generating answers, you must strictly follow these rules: ${answerPrompt}`; 13 | } 14 | 15 | return ` 16 | # Role: Fine-tuning Dataset Generation Expert 17 | ## Profile: 18 | - Description: You are an expert in generating fine-tuning datasets, skilled at generating accurate answers to questions from the given content, ensuring the accuracy and relevance of the answers. 19 | ${globalPrompt} 20 | 21 | ## Skills: 22 | 1. The answer must be based on the given content. 23 | 2. The answer must be accurate and not fabricated. 24 | 3. The answer must be relevant to the question. 25 | 4. The answer must be logical. 26 | 27 | ## Workflow: 28 | 1. Take a deep breath and work on this problem step-by-step. 29 | 2. First, analyze the given file content. 30 | 3. Then, extract key information from the content. 31 | 4. Next, generate an accurate answer related to the question. 32 | 5. Finally, ensure the accuracy and relevance of the answer. 33 | 34 | ## Reference Content: 35 | ${text} 36 | 37 | ## Question 38 | ${question} 39 | 40 | ## Constrains: 41 | 1. The answer must be based on the given content. 42 | 2. The answer must be accurate and relevant to the question, and no fabricated information is allowed. 43 | 3. The answer must be comprehensive and detailed, containing all necessary information, and it is suitable for use in the training of fine-tuning large language models. 44 | ${answerPrompt} 45 | `; 46 | }; 47 | -------------------------------------------------------------------------------- /lib/llm/prompts/label.js: -------------------------------------------------------------------------------- 1 | module.exports = function getLabelPrompt({ text, globalPrompt, domainTreePrompt }) { 2 | if (globalPrompt) { 3 | globalPrompt = `- 在后续的任务中,你务必遵循这样的规则:${globalPrompt}`; 4 | } 5 | if (domainTreePrompt) { 6 | domainTreePrompt = `- 在生成标签时,你务必遵循这样的规则:${domainTreePrompt}`; 7 | } 8 | return ` 9 | # Role: 领域分类专家 & 知识图谱专家 10 | - Description: 作为一名资深的领域分类专家和知识图谱专家,擅长从文本内容中提取核心主题,构建分类体系,并输出规定 JSON 格式的标签树。 11 | ${globalPrompt} 12 | 13 | ## Skills: 14 | 1. 精通文本主题分析和关键词提取 15 | 2. 擅长构建分层知识体系 16 | 3. 熟练掌握领域分类方法论 17 | 4. 具备知识图谱构建能力 18 | 5. 精通JSON数据结构 19 | 20 | ## Goals: 21 | 1. 分析书籍目录内容 22 | 2. 识别核心主题和关键领域 23 | 3. 构建两级分类体系 24 | 4. 确保分类逻辑合理 25 | 5. 生成规范的JSON输出 26 | 27 | ## Workflow: 28 | 1. 仔细阅读完整的书籍目录内容 29 | 2. 提取关键主题和核心概念 30 | 3. 对主题进行分组和归类 31 | 4. 构建一级领域标签 32 | 5. 为适当的一级标签添加二级标签 33 | 6. 检查分类逻辑的合理性 34 | 7. 生成符合格式的JSON输出 35 | 36 | ## 需要分析的目录 37 | ${text} 38 | 39 | ## 限制 40 | 1. 一级领域标签数量5-10个 41 | 2. 二级领域标签数量1-10个 42 | 3. 最多两层分类层级 43 | 4. 分类必须与原始目录内容相关 44 | 5. 输出必须符合指定 JSON 格式,不要输出 JSON 外其他任何不相关内容 45 | 6. 标签的名字最多不要超过 6 个字 46 | 7. 在每个标签前加入序号(序号不计入字数) 47 | ${domainTreePrompt} 48 | 49 | ## OutputFormat: 50 | \`\`\`json 51 | [ 52 | { 53 | "label": "1 一级领域标签", 54 | "child": [ 55 | {"label": "1.1 二级领域标签1"}, 56 | {"label": "1.2 二级领域标签2"} 57 | ] 58 | }, 59 | { 60 | "label": "2 一级领域标签(无子标签)" 61 | } 62 | ] 63 | \`\`\` 64 | `; 65 | }; 66 | -------------------------------------------------------------------------------- /lib/llm/prompts/labelEn.js: -------------------------------------------------------------------------------- 1 | module.exports = function getLabelPrompt({ text, globalPrompt, domainTreePrompt }) { 2 | if (globalPrompt) { 3 | globalPrompt = `- In subsequent tasks, you must follow this rule: ${globalPrompt}`; 4 | } 5 | if (domainTreePrompt) { 6 | domainTreePrompt = `- In generating labels, you must follow this rule: ${domainTreePrompt}`; 7 | } 8 | return ` 9 | # Role: Domain Classification Expert & Knowledge Graph Expert 10 | - Description: As a senior domain classification expert and knowledge graph expert, you are skilled at extracting core themes from text content, constructing classification systems, and performing knowledge categorization and labeling. 11 | ${globalPrompt} 12 | 13 | ## Skills: 14 | 1. Proficient in text theme analysis and keyword extraction. 15 | 2. Good at constructing hierarchical knowledge systems. 16 | 3. Skilled in domain classification methodologies. 17 | 4. Capable of building knowledge graphs. 18 | 5. Proficient in JSON data structures. 19 | 20 | ## Goals: 21 | 1. Analyze the content of the book catalog. 22 | 2. Identify core themes and key domains. 23 | 3. Construct a two - level classification system. 24 | 4. Ensure the classification logic is reasonable. 25 | 5. Generate a standardized JSON output. 26 | 27 | ## Workflow: 28 | 1. Carefully read the entire content of the book catalog. 29 | 2. Extract key themes and core concepts. 30 | 3. Group and categorize the themes. 31 | 4. Construct primary domain labels (ensure no more than 10). 32 | 5. Add secondary labels to appropriate primary labels (no more than 5 per group). 33 | 6. Check the rationality of the classification logic. 34 | 7. Generate a JSON output that conforms to the format. 35 | 36 | ## Catalog to be analyzed 37 | ${text} 38 | 39 | ## Constraints 40 | 1. The number of primary domain labels should be between 5 and 10. 41 | 2. The number of secondary domain labels ≤ 5 per primary label. 42 | 3. There should be at most two classification levels. 43 | 4. The classification must be relevant to the original catalog content. 44 | 5. The output must conform to the specified JSON format. 45 | 6. The names of the labels should not exceed 6 characters. 46 | 7. Do not output any content other than the JSON. 47 | 8. Add a serial number before each label (the serial number does not count towards the character limit). 48 | ${domainTreePrompt} 49 | 50 | ## OutputFormat: 51 | \`\`\`json 52 | [ 53 | { 54 | "label": "1 Primary Domain Label", 55 | "child": [ 56 | {"label": "1.1 Secondary Domain Label 1"}, 57 | {"label": "1.2 Secondary Domain Label 2"} 58 | ] 59 | }, 60 | { 61 | "label": "2 Primary Domain Label (No Sub - labels)" 62 | } 63 | ] 64 | \`\`\` 65 | `; 66 | }; 67 | -------------------------------------------------------------------------------- /lib/llm/prompts/newAnswer.js: -------------------------------------------------------------------------------- 1 | module.exports = function getNewAnswerPrompt(question, answer, cot, advice) { 2 | return ` 3 | # Role: 微调数据集答案优化专家 4 | ## Profile: 5 | - Description: 你是一名微调数据集答案优化专家,擅长根据用户的改进建议,对问题的回答结果和思考过程(思维链)进行优化 6 | 7 | ## Skills: 8 | 1. 基于给定的优化建议 + 问题,对输入的答案进行优化,并进行适当的丰富和补充 9 | 3. 能够根据优化建议,对答案的思考过程(思维链)进行优化,去除思考过程中参考资料相关的描述(不要在推理逻辑中体现有参考资料,改为正常的推理思路) 10 | 11 | 12 | ## 原始问题 13 | ${question} 14 | 15 | ## 待优化的答案 16 | ${answer} 17 | 18 | ## 答案优化建议 19 | ${advice} 20 | 21 | ## 待优化的思考过程 22 | ${cot},同时对答案进行适当的丰富和补充,确保答案准确、充分、清晰 23 | 24 | ## 思考过程优化建议 25 | - 通用优化建议:${advice} 26 | - 去除思考过程中参考资料相关的描述(如:"根据..."、"引用..."、"参考..."等),不要在推理逻辑中体现有参考资料,改为正常的推理思路。 27 | 28 | ## Constrains: 29 | 1. 结果必须按照 JSON 格式输出: 30 | \`\`\`json 31 | { 32 | "answer": "优化后的答案", 33 | "cot": "优化后的思考过程" 34 | } 35 | \`\`\` 36 | 37 | `; 38 | }; 39 | -------------------------------------------------------------------------------- /lib/llm/prompts/newAnswerEn.js: -------------------------------------------------------------------------------- 1 | module.exports = function getNewAnswerPrompt(question, answer, cot, advice) { 2 | return ` 3 | # Role: Fine-tuning Dataset Answer Optimization Expert 4 | ## Profile: 5 | - Description: You are an expert in optimizing answers for fine-tuning datasets. You are good at optimizing the answer results and the thinking process (Chain of Thought, CoT) of questions based on users' improvement suggestions. 6 | 7 | ## Skills: 8 | 1. Optimize the input answer based on the given optimization suggestions and the question, and make appropriate enrichment and supplementation. 9 | 3. Optimize the thinking process (Chain of Thought, CoT) of the answer according to the optimization suggestions, removing descriptions related to reference materials in the thinking process (do not reflect the reference materials in the reasoning logic, and change it to a normal reasoning idea). 10 | 11 | ## Original Question 12 | ${question} 13 | 14 | ## Answer to be Optimized 15 | ${answer} 16 | 17 | ## Answer Optimization Suggestions 18 | ${advice} 19 | 20 | ## Thinking Process to be Optimized 21 | ${cot}, and at the same time, make appropriate enrichment and supplementation to the answer to ensure that the answer is accurate, comprehensive, and clear. 22 | 23 | ## Thinking Process Optimization Suggestions 24 | - General optimization suggestions: ${advice} 25 | - Remove descriptions related to reference materials in the thinking process (e.g., "According to...", "Citing...", "Referring to...", etc.), and do not reflect the reference materials in the reasoning logic. Change it to a normal reasoning idea. 26 | 27 | ## Constraints: 28 | 1. The result must be output in JSON format: 29 | \`\`\`json 30 | { 31 | "answer": "Optimized answer", 32 | "cot": "Optimized thinking process" 33 | } 34 | \`\`\` 35 | 36 | `; 37 | }; 38 | -------------------------------------------------------------------------------- /lib/llm/prompts/optimalTitle.js: -------------------------------------------------------------------------------- 1 | module.exports = function reTitlePrompt() { 2 | return ` 3 | 你是一个专业的文本结构化处理助手,擅长根据前缀规则和标题语义分析并优化Markdown文档的标题层级结构。请根据以下要求处理我提供的Markdown标题: 4 | ## 任务描述 5 | 请根据markdown文章标题的实际含义,以及标题的前缀特征调整各级标题的正确层级关系,具体要求如下: 6 | 1. 一般相同格式的前缀的标题是同级关系({title}代表实际的标题内容): 7 | 例如: 8 | 纯数字前缀开头\`1 {title}\`, \` 2 {title}\` ,\` 3 {title}\`,\` 4 {title}\`,\` 5 {title}\` ... 等 9 | 罗马数字前缀开头的\`I {title}\`,\`II {title}\` ,\`III {title}\`,\`IV {title}\`,\`V {title}\` ... 等 10 | 小数点分隔数组前缀开头 \`1.1 {title}\`, \`1.2 {title}\`, \`1.3 {title}\`.... \`2.1 {title}\`, \`2.2 {title}\` 等 11 | 2. 将子标题正确嵌套到父标题下(如\`1.1 {title}\`应作为\`1 {title}\`的子标题) 12 | 3. 剔除与文章内容无关的标题 13 | 4. 保持输出标题内容与输入完全一致 14 | 5. 确保内容无缺失 15 | 6. 如果是中文文献,但有英文的文章题目,可以省略 16 | 17 | ## 输入输出格式 18 | - 输入:包含错误层级关系的markdown标题结构 19 | - 输出:修正后的标准markdown标题层级结构 20 | 21 | ## 处理原则 22 | 1. 严格根据标题语义确定所属关系 23 | 2. 仅调整层级不修改原标题文本 24 | 3. 无关标题直接移除不保留占位 25 | 4. 相同前缀规则的标题必须是同一级别,不能出现 一部分是 n级标题,一部分是其他级别的标题 26 | 27 | ## 输出要求 28 | 请将修正后的完整标题结构放在代码块中返回,格式示例如下: 29 | 30 | 期望输出: 31 | \`\`\`markdown 32 | 33 | \`\`\` 34 | 35 | 请处理以下数据: 36 | `; 37 | }; -------------------------------------------------------------------------------- /lib/llm/prompts/optimalTitleEn.js: -------------------------------------------------------------------------------- 1 | module.exports = function reTitlePromptEn() { 2 | return ` 3 | You are a professional text structuring assistant specializing in analyzing and optimizing the hierarchical 4 | structure of Markdown document titles based on prefix rules and semantic analysis. Please process the Markdown titles 5 | I provide according to the following requirements: 6 | ## Task Description 7 | Adjust the correct hierarchical relationships of titles based on the actual meaning of the Markdown article titles and the prefix characteristics of the titles. The specific requirements are as follows: 8 | 9 | 1. Titles with the same prefix format are generally at the same level ({title} represents the actual title content): 10 | For example: 11 | - Titles starting with pure number prefixes: \`1 {title}\`, \`2 {title}\`, \`3 {title}\`, \`4 {title}\`, \`5 {title}\`, etc. 12 | - Titles starting with Roman numeral prefixes: \`I {title}\`, \`II {title}\`, \`III {title}\`, \`IV {title}\`, \`V {title}\`, etc. 13 | - Titles starting with decimal-separated array prefixes: \`1.1 {title}\`, \`1.2 {title}\`, \`1.3 {title}\`, ..., \`2.1 {title}\`, \`2.2 {title}\`, etc. 14 | 15 | 2. Correctly nest sub-titles under parent titles (e.g., \`1.1 {title}\` should be a sub-title of \`1 {title}\`). 16 | 3. Remove titles unrelated to the content of the article. 17 | 4. Keep the content of the output titles identical to the input. 18 | 5. Ensure no content is missing. 19 | 6. For Chinese literature with English article titles, the English titles can be omitted. 20 | 21 | ## Input and Output Format 22 | - Input: Markdown title structure with incorrect hierarchical relationships. 23 | - Output: Corrected standard Markdown title hierarchical structure. 24 | 25 | ## Processing Principles 26 | 1. Strictly determine the hierarchical relationship based on the semantic meaning of the titles. 27 | 2. Adjust only the hierarchy without modifying the original title text. 28 | 3. Directly remove unrelated titles without retaining placeholders. 29 | 4. Titles with the same prefix rules must be at the same level; they cannot be partially at one level and partially at another. 30 | 31 | ## Output Requirements 32 | Please return the corrected complete title structure within a code block, formatted as follows: 33 | 34 | Expected Output: 35 | \`\`\`markdown 36 | 37 | \`\`\` 38 | 39 | Please process the following data: 40 | `; 41 | }; -------------------------------------------------------------------------------- /lib/llm/prompts/optimizeCot.js: -------------------------------------------------------------------------------- 1 | module.exports = function optimizeCotPrompt(originalQuestion, answer, originalCot) { 2 | return ` 3 | # Role: 思维链优化专家 4 | ## Profile: 5 | - Description: 你是一位擅长优化思维链的专家,能够对给定的思维链进行处理,去除其中的参考引用相关话术,使其呈现为一个正常的推理过程。 6 | 7 | ## Skills: 8 | 1. 准确识别并去除思维链中的参考引用话术。 9 | 2. 确保优化后的思维链逻辑连贯、推理合理。 10 | 3. 维持思维链与原始问题和答案的相关性。 11 | 12 | ## Workflow: 13 | 1. 仔细研读原始问题、答案和优化前的思维链。 14 | 2. 识别思维链中所有参考引用相关的表述,如“参考 XX 资料”“文档中提及 XX”“参考内容中提及 XXX”等。 15 | 3. 去除这些引用话术,同时调整语句,保证思维链的逻辑连贯性。 16 | 4. 检查优化后的思维链是否仍然能够合理地推导出答案,并且与原始问题紧密相关。 17 | 18 | ## 原始问题 19 | ${originalQuestion} 20 | 21 | ## 答案 22 | ${answer} 23 | 24 | ## 优化前的思维链 25 | ${originalCot} 26 | 27 | ## Constrains: 28 | 1. 优化后的思维链必须去除所有参考引用相关话术。 29 | 2. 思维链的逻辑推理过程必须完整且合理。 30 | 3. 优化后的思维链必须与原始问题和答案保持紧密关联。 31 | 4. 给出的答案不要包含 “优化后的思维链” 这样的话术,直接给出优化后的思维链结果。 32 | 5. 思维链应按照正常的推理思路返回,如:先分析理解问题的本质,按照 "首先、然后、接着、另外、最后" 等步骤逐步思考,展示一个完善的推理过程。 33 | `; 34 | }; 35 | -------------------------------------------------------------------------------- /lib/llm/prompts/optimizeCotEn.js: -------------------------------------------------------------------------------- 1 | module.exports = function optimizeCotPrompt(originalQuestion, answer, originalCot) { 2 | return ` 3 | # Role: Chain of Thought Optimization Expert 4 | ## Profile: 5 | - Description: You are an expert in optimizing the chain of thought. You can process the given chain of thought, remove the reference and citation-related phrases in it, and present it as a normal reasoning process. 6 | 7 | ## Skills: 8 | 1. Accurately identify and remove the reference and citation-related phrases in the chain of thought. 9 | 2. Ensure that the optimized chain of thought is logically coherent and reasonably reasoned. 10 | 3. Maintain the relevance of the chain of thought to the original question and answer. 11 | 12 | ## Workflow: 13 | 1. Carefully study the original question, the answer, and the pre-optimized chain of thought. 14 | 2. Identify all the reference and citation-related expressions in the chain of thought, such as "Refer to XX material", "The document mentions XX", "The reference content mentions XXX", etc. 15 | 3. Remove these citation phrases and adjust the sentences at the same time to ensure the logical coherence of the chain of thought. 16 | 4. Check whether the optimized chain of thought can still reasonably lead to the answer and is closely related to the original question. 17 | 18 | ## Original Question 19 | ${originalQuestion} 20 | 21 | ## Answer 22 | ${answer} 23 | 24 | ## Pre-optimized Chain of Thought 25 | ${originalCot} 26 | 27 | ## Constrains: 28 | 1. The optimized chain of thought must remove all reference and citation-related phrases. 29 | 2. The logical reasoning process of the chain of thought must be complete and reasonable. 30 | 3. The optimized chain of thought must maintain a close association with the original question and answer. 31 | 4. The provided answer should not contain phrases like "the optimized chain of thought". Directly provide the result of the optimized chain of thought. 32 | 5. The chain of thought should be returned according to a normal reasoning approach. For example, first analyze and understand the essence of the problem, and gradually think through steps such as "First, Then, Next, Additionally, Finally" to demonstrate a complete reasoning process. 33 | `; 34 | }; 35 | -------------------------------------------------------------------------------- /lib/llm/prompts/pdfToMarkdown.js: -------------------------------------------------------------------------------- 1 | module.exports = function convertPrompt() { 2 | return ` 3 | 使用markdown语法,将图片中识别到的文字转换为markdown格式输出。你必须做到: 4 | 1. 输出和使用识别到的图片的相同的语言,例如,识别到英语的字段,输出的内容必须是英语。 5 | 2. 不要解释和输出无关的文字,直接输出图片中的内容。 6 | 3. 内容不要包含在\`\`\`markdown \`\`\`中、段落公式使用 $$ $$ 的形式、行内公式使用 $ $ 的形式。 7 | 4. 忽略掉页眉页脚里的内容 8 | 5. 请不要对图片的标题进行markdown的格式化,直接以文本形式输出到内容中。 9 | 6. 有可能每页都会出现期刊名称,论文名称,会议名称或者书籍名称,请忽略他们不要识别成标题 10 | 7. 请精确分析当前PDF页面的文本结构和视觉布局,按以下要求处理: 11 | 1. 识别所有标题文本,并判断其层级(根据字体大小、加粗、位置等视觉特征) 12 | 2. 输出为带层级的Markdown格式,严格使用以下规则: 13 | - 一级标题:字体最大/顶部居中,前面加 # 14 | - 二级标题:字体较大/左对齐加粗,有可能是数字开头也有可能是罗马数组开头,前面加 ## 15 | - 三级标题:字体稍大/左对齐加粗,前面加 ### 16 | - 正文文本:直接转换为普通段落 17 | 3. 不确定层级的标题请标记[?] 18 | 4. 如果是中文文献,但是有英文标题和摘要可以省略不输出 19 | 示例输出: 20 | ## 4研究方法 21 | ### 4.1数据收集 22 | 本文采用问卷调查... 23 | `; 24 | }; 25 | -------------------------------------------------------------------------------- /lib/llm/prompts/pdfToMarkdownEn.js: -------------------------------------------------------------------------------- 1 | module.exports = function convertPromptEn() { 2 | return ` 3 | Use Markdown syntax to convert the text extracted from images into Markdown format and output it. You must adhere to the following requirements: 4 | 1. Output in the same language as the text extracted from the image. For example, if the extracted text is in English, the output must also be in English. 5 | 2. Do not explain or output any text unrelated to the content. Directly output the text from the image. 6 | 3. Do not enclose the content within \`\`\`markdown \`\`\`. Use $$ $$ for block equations and $ $ for inline equations. 7 | 4. Ignore content in headers and footers. 8 | 5. Do not format the titles from images using Markdown; output them as plain text within the content. 9 | 6. Journal names, paper titles, conference names, or book titles that may appear on each page should be ignored and not treated as headings. 10 | 7. Precisely analyze the text structure and visual layout of the current PDF page, and process it as follows: 11 | 1. Identify all heading texts and determine their hierarchy based on visual features such as font size, boldness, and position. 12 | 2. Output the text in hierarchical Markdown format, strictly following these rules: 13 | - Level 1 headings: Largest font size, centered at the top, prefixed with # 14 | - Level 2 headings: Larger font size, left-aligned and bold, possibly starting with numbers or Roman numerals, prefixed with ## 15 | - Level 3 headings: Slightly larger font size, left-aligned and bold, prefixed with ### 16 | - Body text: Convert directly into regular paragraphs 17 | 3. For headings with uncertain hierarchy, mark them with [?]. 18 | 4. For Chinese literature with English titles and abstracts, these can be omitted from the output. 19 | 20 | Example Output: 21 | ## 4 Research Methods 22 | ### 4.1 Data Collection 23 | This paper uses questionnaires... 24 | `; 25 | }; 26 | -------------------------------------------------------------------------------- /lib/llm/prompts/question.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 问题生成 Prompt 模板 3 | * @param {*} text 待处理文本 4 | * @param {*} number 问题数量 5 | */ 6 | module.exports = function getQuestionPrompt({ 7 | text, 8 | number = Math.floor(text.length / 240), 9 | language = '中文', 10 | globalPrompt = '', 11 | questionPrompt = '' 12 | }) { 13 | if (globalPrompt) { 14 | globalPrompt = `在后续的任务中,你务必遵循这样的规则:${globalPrompt}`; 15 | } 16 | if (questionPrompt) { 17 | questionPrompt = `- 在生成问题时,你务必遵循这样的规则:${questionPrompt}`; 18 | } 19 | return ` 20 | # 角色使命 21 | 你是一位专业的文本分析专家,擅长从复杂文本中提取关键信息并生成可用于模型微调的结构化数据(仅生成问题)。 22 | ${globalPrompt} 23 | 24 | ## 核心任务 25 | 根据用户提供的文本(长度:${text.length} 字),生成不少于 ${number} 个高质量问题。 26 | 27 | ## 约束条件(重要!) 28 | - 必须基于文本内容直接生成 29 | - 问题应具有明确答案指向性 30 | - 需覆盖文本的不同方面 31 | - 禁止生成假设性、重复或相似问题 32 | 33 | ## 处理流程 34 | 1. 【文本解析】分段处理内容,识别关键实体和核心概念 35 | 2. 【问题生成】基于信息密度选择最佳提问点 36 | 3. 【质量检查】确保: 37 | - 问题答案可在原文中找到依据 38 | - 标签与问题内容强相关 39 | - 无格式错误 40 | 41 | ## 输出格式 42 | - JSON 数组格式必须正确 43 | - 字段名使用英文双引号 44 | - 输出的 JSON 数组必须严格符合以下结构: 45 | \`\`\`json 46 | ["问题1", "问题2", "..."] 47 | \`\`\` 48 | 49 | ## 输出示例 50 | \`\`\`json 51 | [ "人工智能伦理框架应包含哪些核心要素?","民法典对个人数据保护有哪些新规定?"] 52 | \`\`\` 53 | 54 | ## 待处理文本 55 | ${text} 56 | 57 | ## 限制 58 | - 必须按照规定的 JSON 格式输出,不要输出任何其他不相关内容 59 | - 生成不少于${number}个高质量问题 60 | - 问题不要和材料本身相关,例如禁止出现作者、章节、目录等相关问题 61 | - 问题不得包含【报告、文章、文献、表格】中提到的这种话术,必须是一个自然的问题 62 | ${questionPrompt} 63 | `; 64 | }; 65 | -------------------------------------------------------------------------------- /lib/llm/prompts/questionEn.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Question generation prompt template 3 | * @param {*} text The text to be processed 4 | * @param {*} number The number of questions 5 | */ 6 | module.exports = function getQuestionPrompt({ 7 | text, 8 | number = Math.floor(text.length / 240), 9 | language = 'English', 10 | globalPrompt = '', 11 | questionPrompt = '' 12 | }) { 13 | if (globalPrompt) { 14 | globalPrompt = `In subsequent tasks, you must strictly follow these rules: ${globalPrompt}`; 15 | } 16 | if (questionPrompt) { 17 | questionPrompt = `- In generating questions, you must strictly follow these rules: ${questionPrompt}`; 18 | } 19 | return ` 20 | # Role Mission 21 | You are a professional text analysis expert, skilled at extracting key information from complex texts and generating structured data(only generate questions) that can be used for model fine - tuning. 22 | ${globalPrompt} 23 | 24 | ## Core Task 25 | Based on the text provided by the user(length: ${text.length} characters), generate no less than ${number} high - quality questions. 26 | 27 | ## Constraints(Important!) 28 | ✔️ Must be directly generated based on the text content. 29 | ✔️ Questions should have a clear answer orientation. 30 | ✔️ Should cover different aspects of the text. 31 | ❌ It is prohibited to generate hypothetical, repetitive, or similar questions. 32 | 33 | ## Processing Flow 34 | 1. 【Text Parsing】Process the content in segments, identify key entities and core concepts. 35 | 2. 【Question Generation】Select the best questioning points based on the information density. 36 | 3. 【Quality Check】Ensure that: 37 | - The answers to the questions can be found in the original text. 38 | - The labels are strongly related to the question content. 39 | - There are no formatting errors. 40 | 41 | ## Output Format 42 | - The JSON array format must be correct. 43 | - Use English double - quotes for field names. 44 | - The output JSON array must strictly follow the following structure: 45 | \`\`\`json 46 | ["Question 1", "Question 2", "..."] 47 | \`\`\` 48 | 49 | ## Output Example 50 | \`\`\`json 51 | [ "What core elements should an AI ethics framework include?", "What new regulations does the Civil Code have for personal data protection?"] 52 | \`\`\` 53 | 54 | ## Text to be Processed 55 | ${text} 56 | 57 | ## Restrictions 58 | - Must output in the specified JSON format and do not output any other irrelevant content. 59 | - Generate no less than ${number} high - quality questions. 60 | - Questions should not be related to the material itself. For example, questions related to the author, chapters, table of contents, etc. are prohibited. 61 | ${questionPrompt} 62 | `; 63 | }; 64 | -------------------------------------------------------------------------------- /lib/pdf-processing/core/index.js: -------------------------------------------------------------------------------- 1 | const strategies = require('../strategy/index'); 2 | 3 | class PdfProcessor { 4 | constructor(strategy = 'default') { 5 | if (!strategies[strategy]) { 6 | throw new Error(`Invalid strategy: ${strategy}`); 7 | } 8 | this.strategy = new strategies[strategy](); 9 | } 10 | 11 | async process(projectId, fileName, options = {}) { 12 | try { 13 | if (!fileName.endsWith('.pdf')) { 14 | throw new Error('Input must be a PDF file'); 15 | } 16 | 17 | const result = await this.strategy.process(projectId, fileName, options); 18 | return { 19 | success: true, 20 | data: result, 21 | timestamp: new Date().toISOString() 22 | }; 23 | } catch (error) { 24 | return { 25 | success: false, 26 | error: error.message, 27 | stack: process.env.NODE_ENV === 'development' ? error.stack : undefined 28 | }; 29 | } 30 | } 31 | 32 | setStrategy(strategy) { 33 | if (!strategies[strategy]) { 34 | throw new Error(`Invalid strategy: ${strategy}`); 35 | } 36 | this.strategy = new strategies[strategy](); 37 | } 38 | } 39 | 40 | module.exports = PdfProcessor; -------------------------------------------------------------------------------- /lib/pdf-processing/strategy/default.js: -------------------------------------------------------------------------------- 1 | import pdf2md from '@opendocsg/pdf2md'; 2 | import { getProjectRoot } from '@/lib/db/base'; 3 | import fs from 'fs'; 4 | import path from 'path'; 5 | 6 | class DefaultStrategy { 7 | async process(projectId,fileName) { 8 | console.log("正在执行PDF默认转换策略......") 9 | // 获取项目根目录 10 | const projectRoot = await getProjectRoot(); 11 | const projectPath = path.join(projectRoot, projectId); 12 | 13 | // 获取文件路径 14 | const filePath = path.join(projectPath, 'files', fileName); 15 | 16 | //获取文件 17 | const pdfBuffer = fs.readFileSync(filePath); 18 | 19 | //转后文件名 20 | const convertName = fileName.replace(/\.([^.]*)$/, '') + ".md"; 21 | 22 | await pdf2md(pdfBuffer) 23 | .then(text => { 24 | let outputFile = path.join(projectPath, 'files', convertName); 25 | console.log(`Writing to ${outputFile}...`); 26 | fs.writeFileSync(path.resolve(outputFile), text); 27 | console.log('Done.'); 28 | }) 29 | .catch(err => { 30 | console.error(err); 31 | }) 32 | console.log("PDF转换完成!") 33 | //仅将修改后的文件名返回即可,不需要完整路径 34 | return convertName; 35 | } 36 | } 37 | 38 | module.exports = DefaultStrategy; 39 | -------------------------------------------------------------------------------- /lib/pdf-processing/strategy/index.js: -------------------------------------------------------------------------------- 1 | const DefaultStrategy = require('./default'); 2 | const MinerUStrategy = require('./mineru'); 3 | const VisionStrategy = require('./vision'); 4 | 5 | module.exports = { 6 | default: DefaultStrategy, 7 | mineru: MinerUStrategy, 8 | vision: VisionStrategy 9 | }; -------------------------------------------------------------------------------- /lib/split-mardown/core/parser.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Markdown文档解析模块 3 | */ 4 | 5 | /** 6 | * 提取文档大纲 7 | * @param {string} text - Markdown文本 8 | * @returns {Array} - 提取的大纲数组 9 | */ 10 | function extractOutline(text) { 11 | const outlineRegex = /^(#{1,6})\s+(.+?)(?:\s*\{#[\w-]+\})?\s*$/gm; 12 | const outline = []; 13 | let match; 14 | 15 | while ((match = outlineRegex.exec(text)) !== null) { 16 | const level = match[1].length; 17 | const title = match[2].trim(); 18 | 19 | outline.push({ 20 | level, 21 | title, 22 | position: match.index 23 | }); 24 | } 25 | 26 | return outline; 27 | } 28 | 29 | /** 30 | * 根据标题分割文档 31 | * @param {string} text - Markdown文本 32 | * @param {Array} outline - 文档大纲 33 | * @returns {Array} - 按标题分割的段落数组 34 | */ 35 | function splitByHeadings(text, outline) { 36 | if (outline.length === 0) { 37 | return [ 38 | { 39 | heading: null, 40 | level: 0, 41 | content: text, 42 | position: 0 43 | } 44 | ]; 45 | } 46 | 47 | const sections = []; 48 | 49 | // 添加第一个标题前的内容(如果有) 50 | if (outline[0].position > 0) { 51 | const frontMatter = text.substring(0, outline[0].position).trim(); 52 | if (frontMatter.length > 0) { 53 | sections.push({ 54 | heading: null, 55 | level: 0, 56 | content: frontMatter, 57 | position: 0 58 | }); 59 | } 60 | } 61 | 62 | // 分割每个标题的内容 63 | for (let i = 0; i < outline.length; i++) { 64 | const current = outline[i]; 65 | const next = i < outline.length - 1 ? outline[i + 1] : null; 66 | 67 | const headingLine = text.substring(current.position).split('\n')[0]; 68 | const startPos = current.position + headingLine.length + 1; 69 | const endPos = next ? next.position : text.length; 70 | 71 | let content = text.substring(startPos, endPos).trim(); 72 | 73 | sections.push({ 74 | heading: current.title, 75 | level: current.level, 76 | content: content, 77 | position: current.position 78 | }); 79 | } 80 | 81 | return sections; 82 | } 83 | 84 | module.exports = { 85 | extractOutline, 86 | splitByHeadings 87 | }; 88 | -------------------------------------------------------------------------------- /lib/split-mardown/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Markdown文本分割工具主模块 3 | */ 4 | 5 | const parser = require('./core/parser'); 6 | const splitter = require('./core/splitter'); 7 | const summary = require('./core/summary'); 8 | const formatter = require('./output/formatter'); 9 | const fileWriter = require('./output/fileWriter'); 10 | const toc = require('./core/toc'); 11 | 12 | /** 13 | * 拆分Markdown文档 14 | * @param {string} markdownText - Markdown文本 15 | * @param {number} minSplitLength - 最小分割字数 16 | * @param {number} maxSplitLength - 最大分割字数 17 | * @returns {Array} - 分割结果数组 18 | */ 19 | function splitMarkdown(markdownText, minSplitLength, maxSplitLength) { 20 | // 解析文档结构 21 | const outline = parser.extractOutline(markdownText); 22 | 23 | // 按标题分割文档 24 | const sections = parser.splitByHeadings(markdownText, outline); 25 | 26 | // 处理段落,确保满足分割条件 27 | const res = splitter.processSections(sections, outline, minSplitLength, maxSplitLength); 28 | 29 | return res.map(r => ({ 30 | result: `> **📑 Summarization:** *${r.summary}*\n\n---\n\n${r.content}`, 31 | ...r 32 | })); 33 | } 34 | 35 | // 导出模块功能 36 | module.exports = { 37 | // 核心功能 38 | splitMarkdown, 39 | combineMarkdown: formatter.combineMarkdown, 40 | saveToSeparateFiles: fileWriter.saveToSeparateFiles, 41 | 42 | // 目录提取功能 43 | extractTableOfContents: toc.extractTableOfContents, 44 | tocToMarkdown: toc.tocToMarkdown, 45 | 46 | // 其他导出的子功能 47 | parser, 48 | splitter, 49 | summary, 50 | formatter, 51 | fileWriter, 52 | toc 53 | }; 54 | -------------------------------------------------------------------------------- /lib/split-mardown/output/fileWriter.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 文件输出模块 3 | */ 4 | 5 | const fs = require('fs'); 6 | const path = require('path'); 7 | const { ensureDirectoryExists } = require('../utils/common'); 8 | 9 | /** 10 | * 将分割结果保存到单独的文件 11 | * @param {Array} splitResult - 分割结果数组 12 | * @param {string} baseFilename - 基础文件名(不包含扩展名) 13 | * @param {Function} callback - 回调函数 14 | */ 15 | function saveToSeparateFiles(splitResult, baseFilename, callback) { 16 | // 获取基础目录和文件名(无扩展名) 17 | const basePath = path.dirname(baseFilename); 18 | const filenameWithoutExt = path.basename(baseFilename).replace(/\.[^/.]+$/, ''); 19 | 20 | // 创建用于存放分割文件的目录 21 | const outputDir = path.join(basePath, `${filenameWithoutExt}_parts`); 22 | 23 | // 确保目录存在 24 | ensureDirectoryExists(outputDir); 25 | 26 | // 递归保存文件 27 | function saveFile(index) { 28 | if (index >= splitResult.length) { 29 | // 所有文件保存完成 30 | callback(null, outputDir, splitResult.length); 31 | return; 32 | } 33 | 34 | const part = splitResult[index]; 35 | const paddedIndex = String(index + 1).padStart(3, '0'); // 确保文件排序正确 36 | const outputFile = path.join(outputDir, `${filenameWithoutExt}_part${paddedIndex}.md`); 37 | 38 | // 将摘要和内容格式化为Markdown 39 | const content = `> **📑 Summarization:** *${part.summary}*\n\n---\n\n${part.content}`; 40 | 41 | fs.writeFile(outputFile, content, 'utf8', err => { 42 | if (err) { 43 | callback(err); 44 | return; 45 | } 46 | 47 | // 继续保存下一个文件 48 | saveFile(index + 1); 49 | }); 50 | } 51 | 52 | // 开始保存文件 53 | saveFile(0); 54 | } 55 | 56 | module.exports = { 57 | saveToSeparateFiles 58 | }; 59 | -------------------------------------------------------------------------------- /lib/split-mardown/output/formatter.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 输出格式化模块 3 | */ 4 | 5 | /** 6 | * 将分割后的文本重新组合成Markdown文档 7 | * @param {Array} splitResult - 分割结果数组 8 | * @returns {string} - 组合后的Markdown文档 9 | */ 10 | function combineMarkdown(splitResult) { 11 | let result = ''; 12 | 13 | for (let i = 0; i < splitResult.length; i++) { 14 | const part = splitResult[i]; 15 | 16 | // 添加分隔线和摘要 17 | if (i > 0) { 18 | result += '\n\n---\n\n'; 19 | } 20 | 21 | result += `> **📑 Summarization:** *${part.summary}*\n\n---\n\n${part.content}`; 22 | } 23 | 24 | return result; 25 | } 26 | 27 | module.exports = { 28 | combineMarkdown 29 | }; 30 | -------------------------------------------------------------------------------- /lib/split-mardown/utils/common.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 通用工具函数模块 3 | */ 4 | 5 | const fs = require('fs'); 6 | const path = require('path'); 7 | 8 | /** 9 | * 检查并创建目录 10 | * @param {string} directory - 目录路径 11 | */ 12 | function ensureDirectoryExists(directory) { 13 | if (!fs.existsSync(directory)) { 14 | fs.mkdirSync(directory, { recursive: true }); 15 | } 16 | } 17 | 18 | /** 19 | * 从文件路径获取不带扩展名的文件名 20 | * @param {string} filePath - 文件路径 21 | * @returns {string} - 不带扩展名的文件名 22 | */ 23 | function getFilenameWithoutExt(filePath) { 24 | return path.basename(filePath).replace(/\.[^/.]+$/, ''); 25 | } 26 | 27 | module.exports = { 28 | ensureDirectoryExists, 29 | getFilenameWithoutExt 30 | }; 31 | -------------------------------------------------------------------------------- /lib/util/async.js: -------------------------------------------------------------------------------- 1 | // 并行处理数组的辅助函数,限制并发数 2 | export const processInParallel = async (items, processFunction, concurrencyLimit, onProgress) => { 3 | const results = []; 4 | const inProgress = new Set(); 5 | const queue = [...items]; 6 | let completedCount = 0; 7 | 8 | while (queue.length > 0 || inProgress.size > 0) { 9 | // 如果有空闲槽位且队列中还有任务,启动新任务 10 | while (inProgress.size < concurrencyLimit && queue.length > 0) { 11 | const item = queue.shift(); 12 | const promise = processFunction(item).then(result => { 13 | inProgress.delete(promise); 14 | onProgress && onProgress(++completedCount, items.length); 15 | return result; 16 | }); 17 | inProgress.add(promise); 18 | results.push(promise); 19 | } 20 | 21 | // 等待其中一个任务完成 22 | if (inProgress.size > 0) { 23 | await Promise.race(inProgress); 24 | } 25 | } 26 | 27 | return Promise.all(results); 28 | }; 29 | -------------------------------------------------------------------------------- /lib/util/logger.js: -------------------------------------------------------------------------------- 1 | // lib/utils/logger.js 2 | const isElectron = typeof process !== 'undefined' && process.versions && process.versions.electron; 3 | 4 | function log(level, ...args) { 5 | const message = args.map(arg => (typeof arg === 'object' ? JSON.stringify(arg) : arg)).join(' '); 6 | 7 | if (isElectron) { 8 | // 在 Electron 环境下,将日志写入文件 9 | const { ipcRenderer } = require('electron'); 10 | ipcRenderer.send('log', { level, message }); 11 | } else { 12 | // 在非 Electron 环境下,只输出到控制台 13 | console[level](...args); 14 | } 15 | } 16 | 17 | export default { 18 | info: (...args) => log('info', ...args), 19 | error: (...args) => log('error', ...args), 20 | warn: (...args) => log('warn', ...args), 21 | debug: (...args) => log('debug', ...args) 22 | }; 23 | -------------------------------------------------------------------------------- /lib/util/request.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 封装的通用重试函数,用于在操作失败后自动重试 3 | * @param {Function} asyncOperation - 需要执行的异步操作函数 4 | * @param {Object} options - 配置选项 5 | * @param {number} options.retries - 重试次数,默认为1 6 | * @param {number} options.delay - 重试前的延迟时间(毫秒),默认为0 7 | * @param {Function} options.onRetry - 重试前的回调函数,接收错误和当前重试次数作为参数 8 | * @returns {Promise} - 返回异步操作的结果 9 | */ 10 | export const withRetry = async (asyncOperation, options = {}) => { 11 | const { retries = 1, delay = 0, onRetry = null } = options; 12 | let lastError; 13 | 14 | // 尝试执行操作,包括初次尝试和后续重试 15 | for (let attempt = 0; attempt <= retries; attempt++) { 16 | try { 17 | return await asyncOperation(); 18 | } catch (error) { 19 | lastError = error; 20 | 21 | // 如果这是最后一次尝试,则不再重试 22 | if (attempt === retries) { 23 | break; 24 | } 25 | 26 | // 如果提供了重试回调,则执行 27 | if (onRetry && typeof onRetry === 'function') { 28 | onRetry(error, attempt + 1); 29 | } 30 | 31 | // 如果设置了延迟,则等待指定时间 32 | if (delay > 0) { 33 | await new Promise(resolve => setTimeout(resolve, delay)); 34 | } 35 | } 36 | } 37 | 38 | // 如果所有尝试都失败,则抛出最后一个错误 39 | throw lastError; 40 | }; 41 | 42 | /** 43 | * 封装的fetch函数,支持自动重试 44 | * @param {string} url - 请求URL 45 | * @param {Object} options - fetch选项 46 | * @param {Object} retryOptions - 重试选项 47 | * @returns {Promise} - 返回fetch响应 48 | */ 49 | export const fetchWithRetry = async (url, options = {}, retryOptions = {}) => { 50 | return withRetry(() => fetch(url, options), retryOptions); 51 | }; 52 | 53 | export default fetchWithRetry; 54 | -------------------------------------------------------------------------------- /next.config.js: -------------------------------------------------------------------------------- 1 | // 最佳实践配置示例 2 | module.exports = { 3 | experimental: { 4 | serverComponentsExternalPackages: ['@opendocsg/pdf2md','pdfjs-dist'], 5 | }, 6 | webpack: (config, { isServer }) => { 7 | if (!isServer) { 8 | config.externals.push({ 9 | 'unpdf': 'window.unpdf', 10 | 'pdfjs-dist': 'window.pdfjsLib' 11 | }) 12 | } 13 | return config 14 | } 15 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "easy-dataset", 3 | "version": "1.2.5", 4 | "private": true, 5 | "author": { 6 | "name": "ConardLi", 7 | "email": "1009903985@qq.com", 8 | "url": "https://github.com/ConardLi" 9 | }, 10 | "homepage": "https://github.com/ConardLi/easy-dataset", 11 | "scripts": { 12 | "dev": "next dev -p 1717", 13 | "build": "next build", 14 | "start": "next start -p 1717", 15 | "lint": "next lint", 16 | "electron": "electron .", 17 | "electron-dev": "concurrently \"pnpm dev\" \"wait-on http://localhost:1717 && electron .\"", 18 | "electron-pack": "electron-builder --dir", 19 | "electron-dist": "electron-builder", 20 | "clean-dist": "rm -rf dist", 21 | "electron-build": "pnpm clean-dist && next build && electron-builder -mwl", 22 | "electron-build-mac": "pnpm clean-dist && next build && electron-builder --mac", 23 | "electron-build-win": "pnpm clean-dist && next build && electron-builder --win", 24 | "electron-build-linux": "pnpm clean-dist && next build && electron-builder --linux", 25 | "docker": "docker build -t easy-dataset ." 26 | }, 27 | "bin": "desktop/server.js", 28 | "pkg": { 29 | "assets": [ 30 | ".next/**/*", 31 | "public/**/*", 32 | "locales/**/*", 33 | "package.json", 34 | "node_modules/next/**/*" 35 | ], 36 | "targets": [ 37 | "node18-macos-arm64", 38 | "node18-macos-x64", 39 | "node18-win-x64", 40 | "node18-linux-x64" 41 | ], 42 | "outputPath": "dist" 43 | }, 44 | "dependencies": { 45 | "@ai-sdk/openai": "^1.3.9", 46 | "@emotion/react": "^11.11.3", 47 | "@emotion/styled": "^11.11.0", 48 | "@fontsource/inter": "^5.0.16", 49 | "@fontsource/jetbrains-mono": "^5.0.18", 50 | "@mui/icons-material": "5.16.14", 51 | "@mui/lab": "5.0.0-alpha.175", 52 | "@mui/material": "5.16.14", 53 | "@opendocsg/pdf2md": "^0.2.1", 54 | "@openrouter/ai-sdk-provider": "^0.4.5", 55 | "adm-zip": "^0.5.16", 56 | "ai": "^4.3.4", 57 | "electron-updater": "^6.3.9", 58 | "formidable": "^3.5.2", 59 | "framer-motion": "^12.4.10", 60 | "i18next": "^24.2.2", 61 | "i18next-browser-languagedetector": "^8.0.4", 62 | "mammoth": "^1.9.0", 63 | "next": "14.1.0", 64 | "next-themes": "^0.2.1", 65 | "ollama-ai-provider": "^1.2.0", 66 | "opener": "^1.5.2", 67 | "pdf2md-js": "^1.0.1", 68 | "pdfjs-dist": "^3.11.174", 69 | "react": "^18.2.0", 70 | "react-dom": "^18.2.0", 71 | "react-i18next": "^15.4.1", 72 | "react-markdown": "^10.0.1", 73 | "sharp": "^0.33.1", 74 | "uuid": "^11.1.0", 75 | "zhipu-ai-provider": "^0.1.1" 76 | }, 77 | "license": "Apache-2.0", 78 | "devDependencies": { 79 | "@commitlint/cli": "^19.8.0", 80 | "@commitlint/config-conventional": "^19.8.0", 81 | "concurrently": "^8.2.2", 82 | "electron": "^29.4.6", 83 | "electron-builder": "^24.13.3", 84 | "husky": "^9.1.7", 85 | "pkg": "^5.8.1", 86 | "wait-on": "^7.2.0" 87 | }, 88 | "main": "electron/main.js", 89 | "description": "一个用于创建大模型微调数据集的应用程序", 90 | "build": { 91 | "appId": "com.easydataset.app", 92 | "productName": "Easy Dataset", 93 | "files": [ 94 | ".next/**/*", 95 | "!.next/cache/**/*", 96 | "public/**/*", 97 | "locales/**/*", 98 | "package.json", 99 | "electron/**/*", 100 | "node_modules/**/*", 101 | "!node_modules/.cache/**/*", 102 | "!node_modules/.bin/**/*", 103 | "!node_modules/.vite/**/*", 104 | "!**/*.{md,d.ts,map}", 105 | "!**/node_modules/*/{CHANGELOG.md,README.md,README,readme.md,readme}" 106 | ], 107 | "directories": { 108 | "buildResources": "public", 109 | "output": "dist" 110 | }, 111 | "asar": true, 112 | "compression": "maximum", 113 | "mac": { 114 | "icon": "public/imgs/logo.icns", 115 | "category": "public.app-category.developer-tools", 116 | "target": [ 117 | { 118 | "target": "dmg", 119 | "arch": [ 120 | "arm64", 121 | "x64" 122 | ] 123 | } 124 | ], 125 | "electronLanguages": [ 126 | "zh_CN", 127 | "en" 128 | ] 129 | }, 130 | "win": { 131 | "icon": "public/imgs/logo.ico", 132 | "target": [ 133 | { 134 | "target": "nsis", 135 | "arch": [ 136 | "x64" 137 | ] 138 | } 139 | ] 140 | }, 141 | "linux": { 142 | "icon": "public/imgs/logo.png", 143 | "target": [ 144 | "AppImage" 145 | ], 146 | "category": "Development" 147 | } 148 | } 149 | } -------------------------------------------------------------------------------- /public/imgs/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/1.png -------------------------------------------------------------------------------- /public/imgs/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/10.png -------------------------------------------------------------------------------- /public/imgs/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/2.png -------------------------------------------------------------------------------- /public/imgs/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/3.png -------------------------------------------------------------------------------- /public/imgs/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/4.png -------------------------------------------------------------------------------- /public/imgs/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/5.png -------------------------------------------------------------------------------- /public/imgs/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/6.png -------------------------------------------------------------------------------- /public/imgs/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/7.png -------------------------------------------------------------------------------- /public/imgs/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/8.png -------------------------------------------------------------------------------- /public/imgs/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/9.png -------------------------------------------------------------------------------- /public/imgs/aw.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/aw.jpg -------------------------------------------------------------------------------- /public/imgs/aws.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/aws.png -------------------------------------------------------------------------------- /public/imgs/bg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/bg.png -------------------------------------------------------------------------------- /public/imgs/bg2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/bg2.png -------------------------------------------------------------------------------- /public/imgs/cn-arc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/cn-arc.png -------------------------------------------------------------------------------- /public/imgs/default-dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/default-dataset.png -------------------------------------------------------------------------------- /public/imgs/en-arc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/en-arc.png -------------------------------------------------------------------------------- /public/imgs/garden.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/garden.jpg -------------------------------------------------------------------------------- /public/imgs/github.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/github.png -------------------------------------------------------------------------------- /public/imgs/google.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/google.png -------------------------------------------------------------------------------- /public/imgs/huggingface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/huggingface.png -------------------------------------------------------------------------------- /public/imgs/kaggle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/kaggle.png -------------------------------------------------------------------------------- /public/imgs/linux.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/linux.png -------------------------------------------------------------------------------- /public/imgs/lluga.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/lluga.png -------------------------------------------------------------------------------- /public/imgs/logo.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/logo.icns -------------------------------------------------------------------------------- /public/imgs/logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/logo.ico -------------------------------------------------------------------------------- /public/imgs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/logo.png -------------------------------------------------------------------------------- /public/imgs/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /public/imgs/mac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/mac.png -------------------------------------------------------------------------------- /public/imgs/modelscope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/modelscope.png -------------------------------------------------------------------------------- /public/imgs/opendatalab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/opendatalab.png -------------------------------------------------------------------------------- /public/imgs/windows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/1758189c4a0f47c18e3b6b82b2a4b9aac8d95454/public/imgs/windows.png -------------------------------------------------------------------------------- /styles/globals.css: -------------------------------------------------------------------------------- 1 | /* 添加流式输出的闪烁光标动画 */ 2 | @keyframes blink { 3 | 0% { opacity: 1; } 4 | 50% { opacity: 0; } 5 | 100% { opacity: 1; } 6 | } 7 | 8 | .blinking-cursor { 9 | animation: blink 1s infinite; 10 | display: inline-block; 11 | font-weight: bold; 12 | color: #666; 13 | } 14 | -------------------------------------------------------------------------------- /styles/home.js: -------------------------------------------------------------------------------- 1 | // styles/home.js 2 | export const styles = { 3 | heroSection: { 4 | pt: { xs: 6, md: 10 }, 5 | pb: { xs: 6, md: 8 }, 6 | position: 'relative', 7 | overflow: 'hidden', 8 | transition: 'all 0.3s ease-in-out' 9 | }, 10 | heroBackground: (theme) => ({ 11 | background: theme.palette.mode === 'dark' 12 | ? 'linear-gradient(135deg, rgba(42, 92, 170, 0.25) 0%, rgba(139, 92, 246, 0.25) 100%)' 13 | : 'linear-gradient(135deg, rgba(42, 92, 170, 0.08) 0%, rgba(139, 92, 246, 0.08) 100%)', 14 | '&::before': { 15 | content: '""', 16 | position: 'absolute', 17 | top: 0, 18 | left: 0, 19 | right: 0, 20 | bottom: 0, 21 | background: 'url("/imgs/grid-pattern.png") repeat', 22 | opacity: theme.palette.mode === 'dark' ? 0.05 : 0.03, 23 | zIndex: 0 24 | } 25 | }), 26 | decorativeCircle: { 27 | position: 'absolute', 28 | width: '800px', 29 | height: '800px', 30 | borderRadius: '50%', 31 | background: 'radial-gradient(circle, rgba(139, 92, 246, 0.15) 0%, rgba(42, 92, 170, 0) 70%)', 32 | top: '-300px', 33 | right: '-200px', 34 | zIndex: 0, 35 | animation: 'pulse 15s infinite ease-in-out', 36 | '@keyframes pulse': { 37 | '0%': { transform: 'scale(1)' }, 38 | '50%': { transform: 'scale(1.05)' }, 39 | '100%': { transform: 'scale(1)' } 40 | } 41 | }, 42 | decorativeCircleSecond: { 43 | position: 'absolute', 44 | width: '500px', 45 | height: '500px', 46 | borderRadius: '50%', 47 | background: 'radial-gradient(circle, rgba(42, 92, 170, 0.1) 0%, rgba(139, 92, 246, 0) 70%)', 48 | bottom: '-200px', 49 | left: '-100px', 50 | zIndex: 0, 51 | animation: 'pulse2 20s infinite ease-in-out', 52 | '@keyframes pulse2': { 53 | '0%': { transform: 'scale(1)' }, 54 | '50%': { transform: 'scale(1.08)' }, 55 | '100%': { transform: 'scale(1)' } 56 | } 57 | }, 58 | gradientTitle: (theme) => ({ 59 | mb: 2, 60 | background: theme.palette.gradient.primary, 61 | WebkitBackgroundClip: 'text', 62 | WebkitTextFillColor: 'transparent', 63 | backgroundClip: 'text', 64 | textFillColor: 'transparent' 65 | }), 66 | createButton: (theme) => ({ 67 | mt: 3, 68 | px: 4, 69 | py: 1.2, 70 | borderRadius: '12px', 71 | fontSize: '1rem', 72 | background: theme.palette.gradient.primary, 73 | '&:hover': { 74 | boxShadow: '0 8px 16px rgba(0, 0, 0, 0.1)' 75 | } 76 | }), 77 | statsCard: (theme) => ({ 78 | mt: 6, 79 | p: { xs: 2, md: 4 }, 80 | borderRadius: '16px', 81 | boxShadow: theme.palette.mode === 'dark' 82 | ? '0 8px 24px rgba(0, 0, 0, 0.2)' 83 | : '0 8px 24px rgba(0, 0, 0, 0.05)', 84 | background: theme.palette.mode === 'dark' 85 | ? 'rgba(30, 30, 30, 0.6)' 86 | : 'rgba(255, 255, 255, 0.8)', 87 | backdropFilter: 'blur(8px)' 88 | }), 89 | projectCard: { 90 | height: '100%', 91 | display: 'flex', 92 | flexDirection: 'column', 93 | overflow: 'visible', 94 | position: 'relative' 95 | }, 96 | projectAvatar: { 97 | position: 'absolute', 98 | top: -16, 99 | left: 24, 100 | zIndex: 1 101 | }, 102 | projectDescription: { 103 | mb: 2, 104 | display: '-webkit-box', 105 | WebkitBoxOrient: 'vertical', 106 | WebkitLineClamp: 2, 107 | overflow: 'hidden', 108 | textOverflow: 'ellipsis', 109 | height: '40px' 110 | } 111 | }; -------------------------------------------------------------------------------- /styles/playground.js: -------------------------------------------------------------------------------- 1 | // 模型测试页面样式 2 | import { alpha } from '@mui/material/styles'; 3 | 4 | export const playgroundStyles = (theme) => ({ 5 | container: { 6 | p: 3, 7 | height: 'calc(100vh - 64px)', 8 | display: 'flex', 9 | flexDirection: 'column' 10 | }, 11 | mainPaper: { 12 | p: 3, 13 | flex: 1, 14 | display: 'flex', 15 | flexDirection: 'column', 16 | mb: 2, 17 | borderRadius: 2 18 | }, 19 | controlsContainer: { 20 | mb: 2 21 | }, 22 | clearButton: { 23 | height: '56px' 24 | }, 25 | divider: { 26 | mb: 2 27 | }, 28 | emptyStateBox: { 29 | flex: 1, 30 | display: 'flex', 31 | justifyContent: 'center', 32 | alignItems: 'center', 33 | mb: 2, 34 | p: 2, 35 | bgcolor: theme.palette.mode === 'dark' ? 'rgba(255,255,255,0.03)' : 'rgba(0,0,0,0.02)', 36 | borderRadius: 1 37 | }, 38 | chatContainer: { 39 | flex: 1, 40 | mb: 2 41 | }, 42 | modelPaper: { 43 | height: '100%', 44 | display: 'flex', 45 | flexDirection: 'column', 46 | border: `1px solid ${theme.palette.divider}`, 47 | borderRadius: 1, 48 | overflow: 'hidden' 49 | }, 50 | modelHeader: { 51 | p: 1, 52 | bgcolor: theme.palette.mode === 'dark' ? 'rgba(255,255,255,0.05)' : 'primary.light', 53 | color: theme.palette.mode === 'dark' ? 'white' : 'white', 54 | fontWeight: 'medium', 55 | textAlign: 'center', 56 | display: 'flex', 57 | alignItems: 'center', 58 | justifyContent: 'center' 59 | }, 60 | modelChatBox: { 61 | flex: 1, 62 | overflowY: 'auto', 63 | p: 2, 64 | bgcolor: theme.palette.mode === 'dark' ? 'rgba(255,255,255,0.03)' : 'rgba(0,0,0,0.02)' 65 | }, 66 | emptyChatBox: { 67 | display: 'flex', 68 | justifyContent: 'center', 69 | alignItems: 'center', 70 | height: '100%' 71 | }, 72 | inputContainer: { 73 | display: 'flex', 74 | gap: 1, 75 | mt: 2 76 | }, 77 | sendButton: { 78 | minWidth: '120px', 79 | height: '56px', 80 | marginLeft: '20px' 81 | } 82 | }); 83 | --------------------------------------------------------------------------------